1 module pegged.examples.markdown;
2 
3 enum MarkdownGrammar= `
4 # Markdown grammar.
5 # Taken from the PEG grammar at
6 # https://github.com/jgm/peg-markdown
7 # And simplified somewhat.
8 
9 Markdown:
10 
11 Doc <- BOM?  TitleBlock? (%Block)*
12 
13 #### Pandoc Extension #### Partially implemented
14 TitleBlock <- :"%" TitleText
15               :"%" Date
16               :"%" Authors
17 TitleText <~ Line (:Spacechar Line)*
18 Date      <- ;Line
19 Authors   <- Author ((:";" (:Newline :Spacechar)? / :Newline :Spacechar) Author)*
20 Author    <- (!";" !Newline %Inlines)+
21 
22 Block <- BlankLine*
23          ( BlockQuote
24          / FootnoteDefinition #### Pandoc Extension
25          / CodeBlock
26          / Verbatim
27          / Note
28          / Reference
29          / HorizontalRule
30          / Heading
31          / Table #### Pandoc Extension
32          / DefinitionList #### Pandoc Extension
33          / OrderedList
34          / BulletList
35          / HtmlBlock
36          / StyleBlock
37          / Para
38          / %Inlines )
39 
40 Para <- :NonindentSpace %Inlines BlankLine+
41 
42 #Plain <~ %Inlines
43 
44 Heading <- SetextHeading / AtxHeading
45 
46 SetextHeading <- SetextHeading1 / SetextHeading2
47 
48 SetextHeading1 <~ &(Line SetextBottom1)
49                   ( !Endline Inline )+ Sp? :Newline
50                   :SetextBottom1
51 
52 SetextHeading2 <~ &(Line SetextBottom2)
53                   ( !Endline Inline)+ Sp? :Newline
54                   :SetextBottom2
55 
56 SetextBottom1 <~ "===" "="* Newline
57 
58 SetextBottom2 <~ "---" "-"* Newline
59 
60 AtxHeading <- AtxStart ~(Sp? AtxInline+ Sp? ("#"* Sp)?  Newline)
61 
62 AtxInline <- !Newline !(Sp? "#"* Sp Newline) Inline
63 
64 AtxStart <- ( "######" / "#####" / "####" / "###" / "##" / "#" )
65 
66 #### Pandoc Extension ####
67 # A semantic function must find the columns, based on the dashes
68 Table <- SimpleTable # For further extension (multiline tables and grid tables)
69 
70 SimpleTable <- TableHeaders
71                TableLine+
72                :(BlankLine / "-"+ Newline BlankLine)
73                TableCaption?
74 
75 TableHeaders <- ;Line?
76                 ~("-"+) ~(Spacechar+) ~("-"+) (~(Spacechar+) ~("-"+))* :Newline
77 
78 # ;Line makes all the inlines disappear. Is that wanted or not?
79 TableLine <- !(BlankLine / "-"+ Newline BlankLine) ;Line
80 
81 TableCaption <- :"Table:" ;Line
82               / :":" ;Line
83 
84 BlockQuote <- ( ">" " "? Line ( !">" !BlankLine Line )* BlankLine* )+
85 
86 NonblankIndentedLine <~ !BlankLine IndentedLine
87 
88 VerbatimChunk <- BlankLine* NonblankIndentedLine+
89 
90 Verbatim <~ VerbatimChunk+
91 
92 HorizontalRule <- NonindentSpace
93                  ( "*" Sp "*" Sp "*" (Sp "*")*
94                  / "-" Sp "-" Sp "-" (Sp "-")*
95                  / "_" Sp "_" Sp "_" (Sp "_")*)
96                  Sp Newline BlankLine+
97 
98 BulletList <- &Bullet (%BulletListTight / %BulletListLoose)
99 
100 BulletListTight <- (%BulletListItemTight)+ :BlankLine* !Bullet
101 
102 BulletListItemTight <- Bullet ListBlock
103                        (!BlankLine ListContinuationBlock)*
104                        #!ListContinuationBlock
105 
106 BulletListLoose <- (%BulletListItem :BlankLine*)+
107 
108 BulletListItem <- Bullet ListBlock ListContinuationBlock*
109 
110 Bullet <: !HorizontalRule NonindentSpace ("+" / "*" / "-") Spacechar+
111 
112 OrderedList <- &Enumerator (OrderedListTight / OrderedListLoose)
113 
114 OrderedListTight <- (%OrderedListItemTight)+ :BlankLine* !Enumerator
115 
116 OrderedListItemTight <- Enumerator ListBlock
117                         (!BlankLine ListContinuationBlock)*
118                         #!ListContinuationBlock # Is it necessary?
119 
120 OrderedListLoose <- (%OrderedListItem :BlankLine*)+
121 
122 OrderedListItem <- Enumerator ListBlock ListContinuationBlock*
123 
124 Enumerator <: NonindentSpace ~[0-9]+ "." Spacechar+
125 
126 ListBlock <- !BlankLine %Inlines ListBlockLine*
127 
128 ListContinuationBlock <- BlankLine* (Indent ListBlock)+
129 
130 ListBlockLine <- !BlankLine !( Indent? (Bullet / Enumerator)) !HorizontalRule Indent? %Inlines
131 
132 DefinitionList <- Term :(BlankLine?) Definition+
133 
134 Term <- (!Newline .)+ :Newline
135 
136 Definition <- ( Spacechar Spacechar :(":"/"~") Spacechar Spacechar
137               / Spacechar :(":"/"~") Spacechar Spacechar Spacechar
138               / :(":"/"~") Spacechar Spacechar Spacechar Spacechar)
139               Inlines :Newline
140               IndentedLine*
141 
142 # Parsers for different kinds of block-level HTML content.
143 # This is repetitive due to constraints of PEG grammar.
144 
145 HtmlBlockOpen(Type) <- :"<" :Spnl Type :Spnl HtmlAttribute* :">" :(Spnl*)
146 HtmlBlockClose(Type) <- :"<" :Spnl :"/" Type :Spnl :">" :(Spnl*)
147 HtmlBlockT(Type) <- ;HtmlBlockOpen(Type)
148                     (%HtmlBlockInTags / NonHtml)*
149                     ;HtmlBlockClose(Type)
150 # Hack. This should use a HtmlBlockClose(every possibility)
151 NonHtml <- (!("<" Spnl "/") Inline)*
152 
153 HtmlBlockInTags <- HtmlBlockT("address" / "ADDRESS")
154                  / HtmlBlockT("blockquote" / "BLOCKQUOTE")
155                  / HtmlBlockT("center" / "CENTER")
156                  / HtmlBlockT("dir" / "DIR")
157                  / HtmlBlockT("div" / "DIV")
158                  / HtmlBlockT("dl" / "DL")
159                  / HtmlBlockT("fieldset" / "FIELDSET")
160                  / HtmlBlockT("form" / "FORM")
161                  / HtmlBlockT("h1" / "H1")
162                  / HtmlBlockT("h2" / "H2")
163                  / HtmlBlockT("h3" / "H3")
164                  / HtmlBlockT("h4" / "H4")
165                  / HtmlBlockT("h5" / "H5")
166                  / HtmlBlockT("h6" / "H6")
167                  / HtmlBlockT("menu" / "MENU")
168                  / HtmlBlockT("noframes" / "NOFRAMES")
169                  / HtmlBlockT("noscript" / "NOSCRIPT")
170                  / HtmlBlockT("ol" / "OL")
171                  / HtmlBlockT("p" / "P")
172                  / HtmlBlockT("pre" / "PRE")
173                  / HtmlBlockT("table" / "TABLE")
174                  / HtmlBlockT("ul" / "UL")
175                  / HtmlBlockT("dd" / "DD")
176                  / HtmlBlockT("dt" / "DT")
177                  / HtmlBlockT("frameset" / "FRAMESET")
178                  / HtmlBlockT("li" / "LI")
179                  / HtmlBlockT("tbody" / "TBODY")
180                  / HtmlBlockT("td" / "TD")
181                  / HtmlBlockT("tfoot" / "TFOOT")
182                  / HtmlBlockT("th" / "TH")
183                  / HtmlBlockT("thead" / "THEAD")
184                  / HtmlBlockT("tr" / "TR")
185                  / HtmlBlockT("script" / "SCRIPT")
186 
187 HtmlBlock <- (%HtmlBlockInTags / HtmlComment / HtmlBlockSelfClosing) BlankLine+
188 
189 HtmlBlockSelfClosing <- "<" Spnl HtmlBlockType Spnl HtmlAttribute* "/" Spnl ">"
190 
191 HtmlBlockType <- "address" / "blockquote" / "center" / "dir" / "div" / "dl" / "fieldset" / "form" / "h1" / "h2" / "h3" /
192                 "h4" / "h5" / "h6" / "hr" / "isindex" / "menu" / "noframes" / "noscript" / "ol" / "p" / "pre" / "table" /
193                 "ul" / "dd" / "dt" / "frameset" / "li" / "tbody" / "td" / "tfoot" / "th" / "thead" / "tr" / "script" /
194                 "ADDRESS" / "BLOCKQUOTE" / "CENTER" / "DIR" / "DIV" / "DL" / "FIELDSET" / "FORM" / "H1" / "H2" / "H3" /
195                 "H4" / "H5" / "H6" / "HR" / "ISINDEX" / "MENU" / "NOFRAMES" / "NOSCRIPT" / "OL" / "P" / "PRE" / "TABLE" /
196                 "UL" / "DD" / "DT" / "FRAMESET" / "LI" / "TBODY" / "TD" / "TFOOT" / "TH" / "THEAD" / "TR" / "SCRIPT"
197 
198 StyleOpen <- "<" Spnl ("style" / "STYLE") Spnl HtmlAttribute* ">"
199 StyleClose <- "<" Spnl "/" ("style" / "STYLE") Spnl ">"
200 InStyleTags <- StyleOpen (!StyleClose .)* StyleClose
201 StyleBlock <- InStyleTags BlankLine*
202 
203 Inlines <- (!Endline %Inline )+ Endline?
204 
205 Inline <- Str
206         / Endline
207         / UlOrStarLine
208         / Space
209         / Strong
210         / Emph
211         / Strikeout          #### Pandoc Extension
212         / Superscript        #### Pandoc Extension
213         / Subscript          #### Pandoc Extension
214         / Math               #### Pandoc Extension
215         / FootnoteReference  #### Pandoc Extension
216         / Image
217         / Link
218         / NoteReference
219         / InlineNote
220         / Code
221         / RawHtml
222         / Entity
223         / EscapedChar
224         / Smart
225         / Symbol
226 
227 Space <~ Spacechar+
228 
229 Str <~ NormalChar+ StrChunk*
230 
231 StrChunk <~ (NormalChar / "_"+ &Alphanumeric)+ / AposChunk
232 
233 AposChunk <- quote &Alphanumeric
234 
235 EscapedChar <- backslash (backquote / backslash / [-/_*{}[\]()#+.!><])
236 
237 Entity <- HexEntity / DecEntity / CharEntity
238 
239 Endline <~ LineBreak / TerminalEndline / NormalEndline
240 
241 NormalEndline <- Sp Newline !BlankLine !">" !AtxStart
242                  !(Line ("<-<-<-" "<-"* / "---" "-"*) Newline)
243 
244 TerminalEndline <- Sp Newline eoi
245 
246 LineBreak <~ "  " NormalEndline
247 
248 Symbol <~ SpecialChar
249 
250 UlOrStarLine <~ UlLine / StarLine
251 StarLine <- "****" "*"* / Spacechar "*"+ &Spacechar
252 UlLine   <- "____" "_"* / Spacechar "_"+ &Spacechar
253 
254 Emph <~ EmphStar / EmphUl
255 
256 OneStarOpen  <- !StarLine "*" !Spacechar !Newline
257 OneStarClose <- !Spacechar !Newline Inline :"*"
258 
259 EmphStar <- :OneStarOpen
260             ( !OneStarClose Inline )*
261             OneStarClose
262 
263 OneUlOpen  <- !UlLine "_" !Spacechar !Newline
264 OneUlClose <- !Spacechar !Newline Inline :"_" !Alphanumeric
265 
266 EmphUl <- :OneUlOpen
267           ( !OneUlClose Inline )*
268           OneUlClose
269 
270 Strong <~ StrongStar / StrongUl
271 
272 TwoStarOpen <-  !StarLine "**" !Spacechar !Newline
273 TwoStarClose <- !Spacechar !Newline Inline :"**"
274 
275 StrongStar <- :TwoStarOpen
276               ( !TwoStarClose Inline )*
277               TwoStarClose
278 
279 TwoUlOpen <- !UlLine "__" !Spacechar !Newline
280 TwoUlClose <- !Spacechar !Newline Inline :"__" !Alphanumeric
281 
282 StrongUl <- :TwoUlOpen
283             ( !TwoUlClose Inline )*
284             :TwoUlClose
285 
286 #### Pandoc Extension ####
287 Strikeout <- :"~~" Inline :"~~"
288 
289 #### Pandoc Extension ####
290 Superscript <- :"^" Inline :"^"
291 
292 #### Pandoc Extension ####
293 Subscript <- :"~" Inline :"~"
294 
295 #### Pandoc Extension ####
296 Math <- :"$" !Spacechar (!(Spacechar "$") .)* :"$"
297 
298 Image <- "!" ( ExplicitLink / ReferenceLink )
299 
300 Link <-  ExplicitLink / ReferenceLink / AutoLink
301 
302 ReferenceLink <- ReferenceLinkDouble / ReferenceLinkSingle
303 
304 ReferenceLinkDouble <-  Label Spnl !"[]" Label
305 
306 ReferenceLinkSingle <-  Label (Spnl "[]")?
307 
308 ExplicitLink <-  Label Spnl :"(" Sp Source Spnl Title? Sp :")"
309 
310 Source  <- HeaderIdentifier #### Pandoc extension ####
311          / :"<" SourceContents :">"
312          / SourceContents
313 
314 HeaderIdentifier <~ :"#" [a-z][-_.a-z0-9]*
315 
316 SourceContents <~ ( ( !"(" !")" !">" Nonspacechar )+ / :"(" SourceContents :")")*
317 
318 Title <~ (TitleSingle / TitleDouble)
319 
320 TitleSingle <- :quote ( !( quote Sp ( ")" / Newline ) ) . )*  :quote
321 
322 TitleDouble <- :doublequote ( !( doublequote Sp ( ")" / Newline ) ) . )* :doublequote
323 
324 AutoLink <- AutoLinkUrl / AutoLinkEmail
325 
326 AutoLinkUrl <- :"<" ~([A-Za-z]+ "://" ( !Newline !">" . )+) :">"
327 
328 AutoLinkEmail <- :"<" ( "mailto:" )? ~([-A-Za-z0-9+_./!%~$]+ "@" ( !Newline !">" . )+) :">"
329 
330 Reference <- NonindentSpace !"[]" Label ":" Spnl RefSrc RefTitle BlankLine+
331 
332 Label <~ :"[" (!"]" Inline )* :"]"
333 
334 RefSrc <- Nonspacechar+
335 
336 RefTitle <- RefTitleSingle / RefTitleDouble / RefTitleParens / EmptyTitle
337 
338 EmptyTitle <- eps
339 
340 RefTitleSingle <- Spnl quote ( !( quote Sp Newline / Newline ) . )* quote
341 
342 RefTitleDouble <- Spnl doublequote ( !(doublequote Sp Newline / Newline) . )* doublequote
343 
344 RefTitleParens <- Spnl "(" ( !(")" Sp Newline / Newline) .)* ")"
345 
346 References <- ( Reference / SkipBlock )*
347 
348 Ticks1 <- backquote !backquote
349 Ticks2 <- backquote backquote !backquote
350 Ticks3 <- backquote backquote backquote !backquote
351 Ticks4 <- backquote backquote backquote backquote !backquote
352 Ticks5 <- backquote backquote backquote backquote backquote !backquote
353 
354 Tildes <- "~~~" "~"*
355 
356 ### Standard extension. Covers both Github Markdown and Pandoc Markdown
357 CodeBlock <- ( :Ticks5 CodeOptions? :Newline ~(!Ticks5 .)+ :Ticks5 :Newline
358              / :Ticks4 CodeOptions? :Newline ~(!Ticks4 .)+ :Ticks4 :Newline
359              / :Ticks3 CodeOptions? :Newline ~(!Ticks3 .)+ :Ticks3 :Newline
360              / :Tildes CodeOptions? :Newline ~(!Tildes .)+ :Tildes :Newline)
361 
362 Code <- ( :Ticks1 ~(!Ticks1 .)+ :Ticks1 CodeOptions?
363         / :Ticks2 ~(!Ticks2 .)+ :Ticks2 CodeOptions?)
364 
365 CodeOptions <- :"{" :Sp (;Option :Sp)* :Sp :"}"
366              / ;Option
367 
368 Option <~ "."? identifier (:"=" (digit+ / identifier))?
369 
370 #### Pandoc Extension #### Partially implemented (multiline footnotes)
371 FootnoteReference <- :"[^" FootnoteName :"]" !":"
372 FootnoteDefinition <- :"[^" FootnoteName :"]:" Line (BlankLine / Indent Line)*
373 FootnoteName <- (digit+ / identifier)
374 
375 RawHtml <- HtmlComment / HtmlBlockT("script" / "SCRIPT") / HtmlTag
376 
377 BlankLine <~ Sp Newline
378 
379 Quoted <- doublequote (!doublequote .)* doublequote / quote (!quote .)* quote
380 HtmlAttribute <- (AlphanumericAscii / "-")+ Spnl ("=" Spnl (Quoted / (!">" Nonspacechar)+))? Spnl
381 HtmlComment <- "<!--" (!"-->" .)* "-->"
382 HtmlTag <- "<" Spnl "/"? AlphanumericAscii+ Spnl HtmlAttribute* "/"? Spnl ">"
383 
384 Spacechar <- " " / "\t"
385 Nonspacechar <- !Spacechar !Newline .
386 Newline <- endOfLine
387 Sp <- Spacechar*
388 Spnl <- Sp (Newline Sp)?
389 
390 SpecialChar <- "*" / "_" / backquote / "&" / "[" / "]" / "(" / ")" / "<" / "!" / "#" / backslash / quote / doublequote / ExtendedSpecialChar
391 NormalChar <-    !( SpecialChar / Spacechar / Newline ) .
392 NonAlphanumeric <- !Alphanumeric . #[\001-\057] / [\072-\100] / [\133-\140] / [\173-\177]
393 Alphanumeric <- [0-9A-Za-z] / "\200" / "\201" / "\202" / "\203" / "\204"
394                / "\205" / "\206" / "\207" / "\210" / "\211" / "\212"
395                / "\213" / "\214" / "\215" / "\216" / "\217" / "\220"
396                / "\221" / "\222" / "\223" / "\224" / "\225" / "\226"
397                / "\227" / "\230" / "\231" / "\232" / "\233" / "\234"
398                / "\235" / "\236" / "\237" / "\240" / "\241" / "\242"
399                / "\243" / "\244" / "\245" / "\246" / "\247" / "\250"
400                / "\251" / "\252" / "\253" / "\254" / "\255" / "\256"
401                / "\257" / "\260" / "\261" / "\262" / "\263" / "\264"
402                / "\265" / "\266" / "\267" / "\270" / "\271" / "\272"
403                / "\273" / "\274" / "\275" / "\276" / "\277" / "\300"
404                / "\301" / "\302" / "\303" / "\304" / "\305" / "\306"
405                / "\307" / "\310" / "\311" / "\312" / "\313" / "\314"
406                / "\315" / "\316" / "\317" / "\320" / "\321" / "\322"
407                / "\323" / "\324" / "\325" / "\326" / "\327" / "\330"
408                / "\331" / "\332" / "\333" / "\334" / "\335" / "\336"
409                / "\337" / "\340" / "\341" / "\342" / "\343" / "\344"
410                / "\345" / "\346" / "\347" / "\350" / "\351" / "\352"
411                / "\353" / "\354" / "\355" / "\356" / "\357" / "\360"
412                / "\361" / "\362" / "\363" / "\364" / "\365" / "\366"
413                / "\367" / "\370" / "\371" / "\372" / "\373" / "\374"
414                / "\375" / "\376" / "\377"
415 
416 AlphanumericAscii <- [A-Za-z0-9]
417 Digit <- [0-9]
418 BOM <- "\357\273\277"
419 
420 HexEntity <-  "&" "#" [Xx] [0-9a-fA-F]+
421 DecEntity <-  "&" "#" [0-9]+
422 CharEntity <- "&" [A-Za-z0-9]+
423 
424 NonindentSpace <: ("   " / "  " / " ")?
425 Indent <- "\t" / "    "
426 IndentedLine <- :Indent Line
427 OptionallyIndentedLine <- Indent? Line
428 
429 Line <~ (!Newline .)* :Newline
430       / .+ :eoi
431 
432 SkipBlock <- HtmlBlock
433            / ( !"#" !SetextBottom1 !SetextBottom2 !BlankLine Line )+ BlankLine*
434            / BlankLine+
435            / Line
436 
437 ExtendedSpecialChar <- "." / "-" / quote / doublequote / "^"
438 
439 Smart <- Ellipsis / Dash / SingleQuoted / DoubleQuoted / Apostrophe
440 
441 Apostrophe <- quote
442 
443 Ellipsis <- "..." / ". . ."
444 
445 Dash <- EmDash / EnDash
446 
447 EnDash <- "-" &Digit
448 
449 EmDash <- "---" / "--"
450 
451 SingleQuoteStart <- quote !(Spacechar / Newline)
452 
453 SingleQuoteEnd <- quote !Alphanumeric
454 
455 SingleQuoted <- SingleQuoteStart ( !SingleQuoteEnd Inline )+  SingleQuoteEnd
456 
457 DoubleQuoteStart <- doublequote
458 
459 DoubleQuoteEnd <- doublequote
460 
461 DoubleQuoted <-  DoubleQuoteStart ( !DoubleQuoteEnd Inline )+ DoubleQuoteEnd
462 
463 NoteReference <- RawNoteReference
464 
465 RawNoteReference <~ :"[^"  ( !Newline !"]" . )+  :"]" !":"
466 
467 Note <- :NonindentSpace RawNoteReference :":" :Sp
468         RawNoteBlock
469         ( &Indent RawNoteBlock  )*
470 
471 InlineNote <- :"^[" ( !"]" Inline)+ :"]"
472 
473 Notes <- (Note / SkipBlock)*
474 
475 RawNoteBlock <- ( !BlankLine OptionallyIndentedLine )+ BlankLine*
476 `;