1 module pegged.examples.xml2;
2 
3 import pegged.grammar;
4 
5 enum XMLgrammar = `
6 XML:
7 
8 Document <- prolog element Misc*
9 
10 Char <- .
11 
12 # RestrictedChar <- [\u0001-\uD7FF\uE000-\uFFFD]
13 #\U00010000-\U0010FFFF]
14 
15 S <: ~('\x20' / '\x09' / '\x0D' / '\x0A')+
16 
17 NameStartChar <- ":" / [A-Z] / "_" / [a-z] / [\xC0-\xD6\xD8-\xF6]
18 
19 # \xF8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]
20 # \U00010000-\U000EFFFF]
21 
22 NameChar <- NameStartChar / "-" / "." / [0-9] / '\xB7'
23 # / [\u0300-\u036F] / [\x203F-\x2040]
24 
25 Name <~ NameStartChar (NameChar)*
26 
27 Names <- Name (' ' Name)*
28 
29 Nmtoken <~ (NameChar)+
30 
31 nmtokens <- Nmtoken (' ' Nmtoken)*
32 
33 EntityValue <- doublequote (!('%' / '&' / doublequote) Char / PEReference / Reference)* doublequote
34              / quote  (!('%' / '&' / quote) Char / PEReference / Reference)* quote
35 
36 AttValue <- doublequote (!('%' / '&' / doublequote) Char / Reference)* doublequote
37              / quote  (!('%' / '&' / quote) Char / Reference)* quote
38 
39 SystemLiteral <~ doublequote (!doublequote Char)* doublequote
40                / quote (!quote Char)* quote
41 
42 PubidLiteral <~ doublequote PubidChar* doublequote
43               / quote (!quote PubidChar)* quote
44 
45 PubidChar <- [\x20\x0D\x0A] / [a-zA-Z0-9] / [-'()+,./:=?;!*#@$_%]
46 
47 CharData <~ (!('<' / '&' / "]]>" ) Char)*
48 
49 Comment <- "<!--" ~(!'-' Char / '-' !'-' Char)* "-->"
50 
51 PI <- "<?" PITarget (S (!"?>" Char)*)? "?>"
52 
53 PITarget <- !([xX][mM][lL]) Name
54 
55 CDSect <- CDStart CData CDEnd
56 
57 CDStart <- "<![CDATA["
58 
59 CData <- (!"]]>" Char)*
60 
61 CDEnd <- "]]>"
62 
63 prolog <- XMLDecl Misc* (doctypedecl Misc*)?
64 
65 XMLDecl <- "<?xml" VersionInfo EncodingDecl? SDDecl? S? "?>"
66 
67 VersionInfo <- S "version" Eq (quote VersionNum quote / doublequote VersionNum doublequote)
68 
69 Eq <- S? '=' S?
70 
71 VersionNum <- '1.0' / '1.1'
72 
73 Misc <- Comment / PI / S
74 
75 doctypedecl <- "<!DOCTYPE" S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
76 
77 DeclSep <- PEReference / S
78 
79 intSubset <- (markupdecl / DeclSep)*
80 
81 markupdecl <- elementdecl / AttlistDecl / EntityDecl / NotationDecl / PI / Comment
82 
83 extSubset <- TextDecl? extSubsetDecl
84 extSubsetDecl <- (markupdecl / conditionalSect / DeclSep)*
85 
86 
87 SDDecl <- S 'standalone' Eq ( doublequote ("yes"/"no") doublequote
88                             / quote       ("yes"/"no") quote)
89 
90 element <- EmptyElemTag / STag content ETag
91 
92 STag <- "<" Name (S Attribute)* S? ">"
93 Attribute <- Name Eq AttValue
94 
95 ETag <- "</" Name S? ">"
96 
97 content <- CharData? ((element / Reference / CDSect / PI / Comment) CharData?)*
98 
99 EmptyElemTag <- "<" (S Attribute)* S? "/>"
100 
101 elementdecl <- "<!ELEMENT" S Name S contentspec S? ">"
102 contentspec <- "EMPTY" / "ANY" / Mixed / children
103 
104 children <- (choice / seq) ('?' / '*' / '+')?
105 cp <- (Name / choice / seq) ('?' / '*' / '+')?
106 choice <- '(' S? cp ( S? '|' S? cp )+ S? ')'
107 seq <-    '(' S? cp ( S? ',' S? cp )* S? ')'
108 
109 Mixed <- '(' S? "#PCDATA" (S? '|' S? Name)* S? ")*"
110        / '(' S? "#PCDATA" S? ")"
111 
112 AttlistDecl <- "<!ATTLIST" S Name AttDef* S? ">"
113 AttDef <- S Name S AttType S DefaultDecl
114 
115 AttType <- StringType / TokenizedType / EnumeratedType
116 StringType <- "CDATA"
117 TokenizedType <- "IDREFS" / "IDREF" / "ID"
118                / "ENTITIES" / "ENTITY"
119                / "NMTOKENS" / "NMTOKEN"
120 
121 EnumeratedType <- NotationType / Enumeration
122 NotationType <- "NOTATION" S "(" S? Name (S? '|' S? Name)* S? ')'
123 
124 Enumeration <- '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
125 
126 DefaultDecl <- "#REQUIRED" / "#IMPLIED"
127              / (("#FIXED" S)? AttValue)
128 
129 conditionalSect <- includeSect / ignoreSect
130 
131 includeSect <- "<![" S? "INCLUDE" S? "[" extSubsetDecl "]]>"
132 
133 ignoreSect <- "<![" S? "IGNORE"   S? "[" ignoreSectContents* "]]>"
134 
135 ignoreSectContents <- Ignore ("<![" ignoreSectContents "]]>" Ignore)*
136 
137 Ignore <- (!("<![" / "]]>") Char)*
138 
139 CharRef <- "&#"  [0-9]+       ";"
140          / "&#x" [0-9a-fA-F]+ ";"
141 
142 Reference <- EntityRef / CharRef
143 
144 EntityRef <- '&' Name ';'
145 
146 PEReference <- '%' Name ';'
147 
148 EntityDecl <- GEDecl / PEDecl
149 
150 GEDecl <- "<!ENTITY" S Name S EntityDef S? '>'
151 PEDecl <- "<!ENTITY" S '%' S Name S PEDef S? '>'
152 
153 EntityDef <- EntityValue / (ExternalID NDataDecl?)
154 
155 PEDef <- EntityValue / ExternalID
156 
157 ExternalID <- "SYSTEM" S SystemLiteral
158             / "PUBLIC" S PubidLiteral S SystemLiteral
159 
160 NDataDecl <- S "NDATA" S Name
161 
162 TextDecl <- "<?xml" VersionInfo? EncodingDecl S? "?>"
163 
164 extParsedEnt <- (TextDecl? content)
165 
166 EncodingDecl <- S "encoding" Eq ( doublequote EncName doublequote
167                                 / quote EncName quote)
168 EncName <~ [A-Za-z] ([A-Za-z0-9._] / '-')*
169 
170 NotationDecl <- "<!NOTATION" S Name S (ExternalID / PublicID) S? ">"
171 PublicID <- "PUBLIC" S PubidLiteral
172 
173 `;
174 
175 mixin(grammar(XMLgrammar));
176 
177 enum example1 =
178 `<?xml version="1.1" encoding="ISO-8859-1"?>
179 <!-- Edited by XMLSpy® -->
180 <CATALOG>
181     <CD>
182         <TITLE>Empire Burlesque</TITLE>
183         <ARTIST>Bob Dylan</ARTIST>
184         <COUNTRY>USA</COUNTRY>
185         <COMPANY>Columbia</COMPANY>
186         <PRICE>10.90</PRICE>
187         <YEAR>1985</YEAR>
188     </CD>
189     <CD>
190         <TITLE>Hide your heart</TITLE>
191         <ARTIST>Bonnie Tyler</ARTIST>
192         <COUNTRY>UK</COUNTRY>
193         <COMPANY>CBS Records</COMPANY>
194         <PRICE>9.90</PRICE>
195         <YEAR>1988</YEAR>
196     </CD>
197     <CD>
198         <TITLE>Greatest Hits</TITLE>
199         <ARTIST>Dolly Parton</ARTIST>
200         <COUNTRY>USA</COUNTRY>
201         <COMPANY>RCA</COMPANY>
202         <PRICE>9.90</PRICE>
203         <YEAR>1982</YEAR>
204     </CD>
205     <CD>
206         <TITLE>Still got the blues</TITLE>
207         <ARTIST>Gary Moore</ARTIST>
208         <COUNTRY>UK</COUNTRY>
209         <COMPANY>Virgin records</COMPANY>
210         <PRICE>10.20</PRICE>
211         <YEAR>1990</YEAR>
212     </CD>
213     <CD>
214         <TITLE>Eros</TITLE>
215         <ARTIST>Eros Ramazzotti</ARTIST>
216         <COUNTRY>EU</COUNTRY>
217         <COMPANY>BMG</COMPANY>
218         <PRICE>9.90</PRICE>
219         <YEAR>1997</YEAR>
220     </CD>
221     <CD>
222         <TITLE>One night only</TITLE>
223         <ARTIST>Bee Gees</ARTIST>
224         <COUNTRY>UK</COUNTRY>
225         <COMPANY>Polydor</COMPANY>
226         <PRICE>10.90</PRICE>
227         <YEAR>1998</YEAR>
228     </CD>
229     <CD>
230         <TITLE>Sylvias Mother</TITLE>
231         <ARTIST>Dr.Hook</ARTIST>
232         <COUNTRY>UK</COUNTRY>
233         <COMPANY>CBS</COMPANY>
234         <PRICE>8.10</PRICE>
235         <YEAR>1973</YEAR>
236     </CD>
237     <CD>
238         <TITLE>Maggie May</TITLE>
239         <ARTIST>Rod Stewart</ARTIST>
240         <COUNTRY>UK</COUNTRY>
241         <COMPANY>Pickwick</COMPANY>
242         <PRICE>8.50</PRICE>
243         <YEAR>1990</YEAR>
244     </CD>
245     <CD>
246         <TITLE>Romanza</TITLE>
247         <ARTIST>Andrea Bocelli</ARTIST>
248         <COUNTRY>EU</COUNTRY>
249         <COMPANY>Polydor</COMPANY>
250         <PRICE>10.80</PRICE>
251         <YEAR>1996</YEAR>
252     </CD>
253     <CD>
254         <TITLE>When a man loves a woman</TITLE>
255         <ARTIST>Percy Sledge</ARTIST>
256         <COUNTRY>USA</COUNTRY>
257         <COMPANY>Atlantic</COMPANY>
258         <PRICE>8.70</PRICE>
259         <YEAR>1987</YEAR>
260     </CD>
261     <CD>
262         <TITLE>Black angel</TITLE>
263         <ARTIST>Savage Rose</ARTIST>
264         <COUNTRY>EU</COUNTRY>
265         <COMPANY>Mega</COMPANY>
266         <PRICE>10.90</PRICE>
267         <YEAR>1995</YEAR>
268     </CD>
269     <CD>
270         <TITLE>1999 Grammy Nominees</TITLE>
271         <ARTIST>Many</ARTIST>
272         <COUNTRY>USA</COUNTRY>
273         <COMPANY>Grammy</COMPANY>
274         <PRICE>10.20</PRICE>
275         <YEAR>1999</YEAR>
276     </CD>
277     <CD>
278         <TITLE>For the good times</TITLE>
279         <ARTIST>Kenny Rogers</ARTIST>
280         <COUNTRY>UK</COUNTRY>
281         <COMPANY>Mucik Master</COMPANY>
282         <PRICE>8.70</PRICE>
283         <YEAR>1995</YEAR>
284     </CD>
285     <CD>
286         <TITLE>Big Willie style</TITLE>
287         <ARTIST>Will Smith</ARTIST>
288         <COUNTRY>USA</COUNTRY>
289         <COMPANY>Columbia</COMPANY>
290         <PRICE>9.90</PRICE>
291         <YEAR>1997</YEAR>
292     </CD>
293     <CD>
294         <TITLE>Tupelo Honey</TITLE>
295         <ARTIST>Van Morrison</ARTIST>
296         <COUNTRY>UK</COUNTRY>
297         <COMPANY>Polydor</COMPANY>
298         <PRICE>8.20</PRICE>
299         <YEAR>1971</YEAR>
300     </CD>
301     <CD>
302         <TITLE>Soulsville</TITLE>
303         <ARTIST>Jorn Hoel</ARTIST>
304         <COUNTRY>Norway</COUNTRY>
305         <COMPANY>WEA</COMPANY>
306         <PRICE>7.90</PRICE>
307         <YEAR>1996</YEAR>
308     </CD>
309     <CD>
310         <TITLE>The very best of</TITLE>
311         <ARTIST>Cat Stevens</ARTIST>
312         <COUNTRY>UK</COUNTRY>
313         <COMPANY>Island</COMPANY>
314         <PRICE>8.90</PRICE>
315         <YEAR>1990</YEAR>
316     </CD>
317     <CD>
318         <TITLE>Stop</TITLE>
319         <ARTIST>Sam Brown</ARTIST>
320         <COUNTRY>UK</COUNTRY>
321         <COMPANY>A and M</COMPANY>
322         <PRICE>8.90</PRICE>
323         <YEAR>1988</YEAR>
324     </CD>
325     <CD>
326         <TITLE>Bridge of Spies</TITLE>
327         <ARTIST>T'Pau</ARTIST>
328         <COUNTRY>UK</COUNTRY>
329         <COMPANY>Siren</COMPANY>
330         <PRICE>7.90</PRICE>
331         <YEAR>1987</YEAR>
332     </CD>
333     <CD>
334         <TITLE>Private Dancer</TITLE>
335         <ARTIST>Tina Turner</ARTIST>
336         <COUNTRY>UK</COUNTRY>
337         <COMPANY>Capitol</COMPANY>
338         <PRICE>8.90</PRICE>
339         <YEAR>1983</YEAR>
340     </CD>
341     <CD>
342         <TITLE>Midt om natten</TITLE>
343         <ARTIST>Kim Larsen</ARTIST>
344         <COUNTRY>EU</COUNTRY>
345         <COMPANY>Medley</COMPANY>
346         <PRICE>7.80</PRICE>
347         <YEAR>1983</YEAR>
348     </CD>
349     <CD>
350         <TITLE>Pavarotti Gala Concert</TITLE>
351         <ARTIST>Luciano Pavarotti</ARTIST>
352         <COUNTRY>UK</COUNTRY>
353         <COMPANY>DECCA</COMPANY>
354         <PRICE>9.90</PRICE>
355         <YEAR>1991</YEAR>
356     </CD>
357     <CD>
358         <TITLE>The dock of the bay</TITLE>
359         <ARTIST>Otis Redding</ARTIST>
360         <COUNTRY>USA</COUNTRY>
361         <COMPANY>Atlantic</COMPANY>
362         <PRICE>7.90</PRICE>
363         <YEAR>1987</YEAR>
364     </CD>
365     <CD>
366         <TITLE>Picture book</TITLE>
367         <ARTIST>Simply Red</ARTIST>
368         <COUNTRY>EU</COUNTRY>
369         <COMPANY>Elektra</COMPANY>
370         <PRICE>7.20</PRICE>
371         <YEAR>1985</YEAR>
372     </CD>
373     <CD>
374         <TITLE>Red</TITLE>
375         <ARTIST>The Communards</ARTIST>
376         <COUNTRY>UK</COUNTRY>
377         <COMPANY>London</COMPANY>
378         <PRICE>7.80</PRICE>
379         <YEAR>1987</YEAR>
380     </CD>
381     <CD>
382         <TITLE>Unchain my heart</TITLE>
383         <ARTIST>Joe Cocker</ARTIST>
384         <COUNTRY>USA</COUNTRY>
385         <COMPANY>EMI</COMPANY>
386         <PRICE>8.20</PRICE>
387         <YEAR>1987</YEAR>
388     </CD>
389 </CATALOG>
390 `;
391 
392 enum example2 =
393 `<?xml version="1.0"?>
394 <catalog>
395    <book id="bk101">
396       <author>Gambardella, Matthew</author>
397       <title>XML Developer's Guide</title>
398       <genre>Computer</genre>
399       <price>44.95</price>
400       <publish_date>2000-10-01</publish_date>
401       <description>An in-depth look at creating applications
402       with XML.</description>
403    </book>
404    <book id="bk102">
405       <author>Ralls, Kim</author>
406       <title>Midnight Rain</title>
407       <genre>Fantasy</genre>
408       <price>5.95</price>
409       <publish_date>2000-12-16</publish_date>
410       <description>A former architect battles corporate zombies,
411       an evil sorceress, and her own childhood to become queen
412       of the world.</description>
413    </book>
414    <book id="bk103">
415       <author>Corets, Eva</author>
416       <title>Maeve Ascendant</title>
417       <genre>Fantasy</genre>
418       <price>5.95</price>
419       <publish_date>2000-11-17</publish_date>
420       <description>After the collapse of a nanotechnology
421       society in England, the young survivors lay the
422       foundation for a new society.</description>
423    </book>
424    <book id="bk104">
425       <author>Corets, Eva</author>
426       <title>Oberon's Legacy</title>
427       <genre>Fantasy</genre>
428       <price>5.95</price>
429       <publish_date>2001-03-10</publish_date>
430       <description>In post-apocalypse England, the mysterious
431       agent known only as Oberon helps to create a new life
432       for the inhabitants of London. Sequel to Maeve
433       Ascendant.</description>
434    </book>
435    <book id="bk105">
436       <author>Corets, Eva</author>
437       <title>The Sundered Grail</title>
438       <genre>Fantasy</genre>
439       <price>5.95</price>
440       <publish_date>2001-09-10</publish_date>
441       <description>The two daughters of Maeve, half-sisters,
442       battle one another for control of England. Sequel to
443       Oberon's Legacy.</description>
444    </book>
445    <book id="bk106">
446       <author>Randall, Cynthia</author>
447       <title>Lover Birds</title>
448       <genre>Romance</genre>
449       <price>4.95</price>
450       <publish_date>2000-09-02</publish_date>
451       <description>When Carla meets Paul at an ornithology
452       conference, tempers fly as feathers get ruffled.</description>
453    </book>
454    <book id="bk107">
455       <author>Thurman, Paula</author>
456       <title>Splish Splash</title>
457       <genre>Romance</genre>
458       <price>4.95</price>
459       <publish_date>2000-11-02</publish_date>
460       <description>A deep sea diver finds true love twenty
461       thousand leagues beneath the sea.</description>
462    </book>
463    <book id="bk108">
464       <author>Knorr, Stefan</author>
465       <title>Creepy Crawlies</title>
466       <genre>Horror</genre>
467       <price>4.95</price>
468       <publish_date>2000-12-06</publish_date>
469       <description>An anthology of horror stories about roaches,
470       centipedes, scorpions  and other insects.</description>
471    </book>
472    <book id="bk109">
473       <author>Kress, Peter</author>
474       <title>Paradox Lost</title>
475       <genre>Science Fiction</genre>
476       <price>6.95</price>
477       <publish_date>2000-11-02</publish_date>
478       <description>After an inadvertant trip through a Heisenberg
479       Uncertainty Device, James Salway discovers the problems
480       of being quantum.</description>
481    </book>
482    <book id="bk110">
483       <author>O'Brien, Tim</author>
484       <title>Microsoft .NET: The Programming Bible</title>
485       <genre>Computer</genre>
486       <price>36.95</price>
487       <publish_date>2000-12-09</publish_date>
488       <description>Microsoft's .NET initiative is explored in
489       detail in this deep programmer's reference.</description>
490    </book>
491    <book id="bk111">
492       <author>O'Brien, Tim</author>
493       <title>MSXML3: A Comprehensive Guide</title>
494       <genre>Computer</genre>
495       <price>36.95</price>
496       <publish_date>2000-12-01</publish_date>
497       <description>The Microsoft MSXML3 parser is covered in
498       detail, with attention to XML DOM interfaces, XSLT processing,
499       SAX and more.</description>
500    </book>
501    <book id="bk112">
502       <author>Galos, Mike</author>
503       <title>Visual Studio 7: A Comprehensive Guide</title>
504       <genre>Computer</genre>
505       <price>49.95</price>
506       <publish_date>2001-04-16</publish_date>
507       <description>Microsoft Visual Studio 7 is explored in depth,
508       looking at how Visual Basic, Visual C++, C#, and ASP+ are
509       integrated into a comprehensive development
510       environment.</description>
511    </book>
512 </catalog>
513 `;
514 
515 unittest
516 {
517     assert(XML(example1).successful);
518     assert(XML(example2).successful);
519 }