1 module pegged.examples.markdown; 2 3 enum MarkdownGrammar= ` 4 # Markdown grammar. 5 # Taken from the PEG grammar at 6 # https://github.com/jgm/peg-markdown 7 # And simplified somewhat. 8 9 Markdown: 10 11 Doc <- BOM? TitleBlock? (%Block)* 12 13 #### Pandoc Extension #### Partially implemented 14 TitleBlock <- :"%" TitleText 15 :"%" Date 16 :"%" Authors 17 TitleText <~ Line (:Spacechar Line)* 18 Date <- ;Line 19 Authors <- Author ((:";" (:Newline :Spacechar)? / :Newline :Spacechar) Author)* 20 Author <- (!";" !Newline %Inlines)+ 21 22 Block <- BlankLine* 23 ( BlockQuote 24 / FootnoteDefinition #### Pandoc Extension 25 / CodeBlock 26 / Verbatim 27 / Note 28 / Reference 29 / HorizontalRule 30 / Heading 31 / Table #### Pandoc Extension 32 / DefinitionList #### Pandoc Extension 33 / OrderedList 34 / BulletList 35 / HtmlBlock 36 / StyleBlock 37 / Para 38 / %Inlines ) 39 40 Para <- :NonindentSpace %Inlines BlankLine+ 41 42 #Plain <~ %Inlines 43 44 Heading <- SetextHeading / AtxHeading 45 46 SetextHeading <- SetextHeading1 / SetextHeading2 47 48 SetextHeading1 <~ &(Line SetextBottom1) 49 ( !Endline Inline )+ Sp? :Newline 50 :SetextBottom1 51 52 SetextHeading2 <~ &(Line SetextBottom2) 53 ( !Endline Inline)+ Sp? :Newline 54 :SetextBottom2 55 56 SetextBottom1 <~ "===" "="* Newline 57 58 SetextBottom2 <~ "---" "-"* Newline 59 60 AtxHeading <- AtxStart ~(Sp? AtxInline+ Sp? ("#"* Sp)? Newline) 61 62 AtxInline <- !Newline !(Sp? "#"* Sp Newline) Inline 63 64 AtxStart <- ( "######" / "#####" / "####" / "###" / "##" / "#" ) 65 66 #### Pandoc Extension #### 67 # A semantic function must find the columns, based on the dashes 68 Table <- SimpleTable # For further extension (multiline tables and grid tables) 69 70 SimpleTable <- TableHeaders 71 TableLine+ 72 :(BlankLine / "-"+ Newline BlankLine) 73 TableCaption? 74 75 TableHeaders <- ;Line? 76 ~("-"+) ~(Spacechar+) ~("-"+) (~(Spacechar+) ~("-"+))* :Newline 77 78 # ;Line makes all the inlines disappear. Is that wanted or not? 79 TableLine <- !(BlankLine / "-"+ Newline BlankLine) ;Line 80 81 TableCaption <- :"Table:" ;Line 82 / :":" ;Line 83 84 BlockQuote <- ( ">" " "? Line ( !">" !BlankLine Line )* BlankLine* )+ 85 86 NonblankIndentedLine <~ !BlankLine IndentedLine 87 88 VerbatimChunk <- BlankLine* NonblankIndentedLine+ 89 90 Verbatim <~ VerbatimChunk+ 91 92 HorizontalRule <- NonindentSpace 93 ( "*" Sp "*" Sp "*" (Sp "*")* 94 / "-" Sp "-" Sp "-" (Sp "-")* 95 / "_" Sp "_" Sp "_" (Sp "_")*) 96 Sp Newline BlankLine+ 97 98 BulletList <- &Bullet (%BulletListTight / %BulletListLoose) 99 100 BulletListTight <- (%BulletListItemTight)+ :BlankLine* !Bullet 101 102 BulletListItemTight <- Bullet ListBlock 103 (!BlankLine ListContinuationBlock)* 104 #!ListContinuationBlock 105 106 BulletListLoose <- (%BulletListItem :BlankLine*)+ 107 108 BulletListItem <- Bullet ListBlock ListContinuationBlock* 109 110 Bullet <: !HorizontalRule NonindentSpace ("+" / "*" / "-") Spacechar+ 111 112 OrderedList <- &Enumerator (OrderedListTight / OrderedListLoose) 113 114 OrderedListTight <- (%OrderedListItemTight)+ :BlankLine* !Enumerator 115 116 OrderedListItemTight <- Enumerator ListBlock 117 (!BlankLine ListContinuationBlock)* 118 #!ListContinuationBlock # Is it necessary? 119 120 OrderedListLoose <- (%OrderedListItem :BlankLine*)+ 121 122 OrderedListItem <- Enumerator ListBlock ListContinuationBlock* 123 124 Enumerator <: NonindentSpace ~[0-9]+ "." Spacechar+ 125 126 ListBlock <- !BlankLine %Inlines ListBlockLine* 127 128 ListContinuationBlock <- BlankLine* (Indent ListBlock)+ 129 130 ListBlockLine <- !BlankLine !( Indent? (Bullet / Enumerator)) !HorizontalRule Indent? %Inlines 131 132 DefinitionList <- Term :(BlankLine?) Definition+ 133 134 Term <- (!Newline .)+ :Newline 135 136 Definition <- ( Spacechar Spacechar :(":"/"~") Spacechar Spacechar 137 / Spacechar :(":"/"~") Spacechar Spacechar Spacechar 138 / :(":"/"~") Spacechar Spacechar Spacechar Spacechar) 139 Inlines :Newline 140 IndentedLine* 141 142 # Parsers for different kinds of block-level HTML content. 143 # This is repetitive due to constraints of PEG grammar. 144 145 HtmlBlockOpen(Type) <- :"<" :Spnl Type :Spnl HtmlAttribute* :">" :(Spnl*) 146 HtmlBlockClose(Type) <- :"<" :Spnl :"/" Type :Spnl :">" :(Spnl*) 147 HtmlBlockT(Type) <- ;HtmlBlockOpen(Type) 148 (%HtmlBlockInTags / NonHtml)* 149 ;HtmlBlockClose(Type) 150 # Hack. This should use a HtmlBlockClose(every possibility) 151 NonHtml <- (!("<" Spnl "/") Inline)* 152 153 HtmlBlockInTags <- HtmlBlockT("address" / "ADDRESS") 154 / HtmlBlockT("blockquote" / "BLOCKQUOTE") 155 / HtmlBlockT("center" / "CENTER") 156 / HtmlBlockT("dir" / "DIR") 157 / HtmlBlockT("div" / "DIV") 158 / HtmlBlockT("dl" / "DL") 159 / HtmlBlockT("fieldset" / "FIELDSET") 160 / HtmlBlockT("form" / "FORM") 161 / HtmlBlockT("h1" / "H1") 162 / HtmlBlockT("h2" / "H2") 163 / HtmlBlockT("h3" / "H3") 164 / HtmlBlockT("h4" / "H4") 165 / HtmlBlockT("h5" / "H5") 166 / HtmlBlockT("h6" / "H6") 167 / HtmlBlockT("menu" / "MENU") 168 / HtmlBlockT("noframes" / "NOFRAMES") 169 / HtmlBlockT("noscript" / "NOSCRIPT") 170 / HtmlBlockT("ol" / "OL") 171 / HtmlBlockT("p" / "P") 172 / HtmlBlockT("pre" / "PRE") 173 / HtmlBlockT("table" / "TABLE") 174 / HtmlBlockT("ul" / "UL") 175 / HtmlBlockT("dd" / "DD") 176 / HtmlBlockT("dt" / "DT") 177 / HtmlBlockT("frameset" / "FRAMESET") 178 / HtmlBlockT("li" / "LI") 179 / HtmlBlockT("tbody" / "TBODY") 180 / HtmlBlockT("td" / "TD") 181 / HtmlBlockT("tfoot" / "TFOOT") 182 / HtmlBlockT("th" / "TH") 183 / HtmlBlockT("thead" / "THEAD") 184 / HtmlBlockT("tr" / "TR") 185 / HtmlBlockT("script" / "SCRIPT") 186 187 HtmlBlock <- (%HtmlBlockInTags / HtmlComment / HtmlBlockSelfClosing) BlankLine+ 188 189 HtmlBlockSelfClosing <- "<" Spnl HtmlBlockType Spnl HtmlAttribute* "/" Spnl ">" 190 191 HtmlBlockType <- "address" / "blockquote" / "center" / "dir" / "div" / "dl" / "fieldset" / "form" / "h1" / "h2" / "h3" / 192 "h4" / "h5" / "h6" / "hr" / "isindex" / "menu" / "noframes" / "noscript" / "ol" / "p" / "pre" / "table" / 193 "ul" / "dd" / "dt" / "frameset" / "li" / "tbody" / "td" / "tfoot" / "th" / "thead" / "tr" / "script" / 194 "ADDRESS" / "BLOCKQUOTE" / "CENTER" / "DIR" / "DIV" / "DL" / "FIELDSET" / "FORM" / "H1" / "H2" / "H3" / 195 "H4" / "H5" / "H6" / "HR" / "ISINDEX" / "MENU" / "NOFRAMES" / "NOSCRIPT" / "OL" / "P" / "PRE" / "TABLE" / 196 "UL" / "DD" / "DT" / "FRAMESET" / "LI" / "TBODY" / "TD" / "TFOOT" / "TH" / "THEAD" / "TR" / "SCRIPT" 197 198 StyleOpen <- "<" Spnl ("style" / "STYLE") Spnl HtmlAttribute* ">" 199 StyleClose <- "<" Spnl "/" ("style" / "STYLE") Spnl ">" 200 InStyleTags <- StyleOpen (!StyleClose .)* StyleClose 201 StyleBlock <- InStyleTags BlankLine* 202 203 Inlines <- (!Endline %Inline )+ Endline? 204 205 Inline <- Str 206 / Endline 207 / UlOrStarLine 208 / Space 209 / Strong 210 / Emph 211 / Strikeout #### Pandoc Extension 212 / Superscript #### Pandoc Extension 213 / Subscript #### Pandoc Extension 214 / Math #### Pandoc Extension 215 / FootnoteReference #### Pandoc Extension 216 / Image 217 / Link 218 / NoteReference 219 / InlineNote 220 / Code 221 / RawHtml 222 / Entity 223 / EscapedChar 224 / Smart 225 / Symbol 226 227 Space <~ Spacechar+ 228 229 Str <~ NormalChar+ StrChunk* 230 231 StrChunk <~ (NormalChar / "_"+ &Alphanumeric)+ / AposChunk 232 233 AposChunk <- quote &Alphanumeric 234 235 EscapedChar <- backslash (backquote / backslash / [-/_*{}[\]()#+.!><]) 236 237 Entity <- HexEntity / DecEntity / CharEntity 238 239 Endline <~ LineBreak / TerminalEndline / NormalEndline 240 241 NormalEndline <- Sp Newline !BlankLine !">" !AtxStart 242 !(Line ("<-<-<-" "<-"* / "---" "-"*) Newline) 243 244 TerminalEndline <- Sp Newline eoi 245 246 LineBreak <~ " " NormalEndline 247 248 Symbol <~ SpecialChar 249 250 UlOrStarLine <~ UlLine / StarLine 251 StarLine <- "****" "*"* / Spacechar "*"+ &Spacechar 252 UlLine <- "____" "_"* / Spacechar "_"+ &Spacechar 253 254 Emph <~ EmphStar / EmphUl 255 256 OneStarOpen <- !StarLine "*" !Spacechar !Newline 257 OneStarClose <- !Spacechar !Newline Inline :"*" 258 259 EmphStar <- :OneStarOpen 260 ( !OneStarClose Inline )* 261 OneStarClose 262 263 OneUlOpen <- !UlLine "_" !Spacechar !Newline 264 OneUlClose <- !Spacechar !Newline Inline :"_" !Alphanumeric 265 266 EmphUl <- :OneUlOpen 267 ( !OneUlClose Inline )* 268 OneUlClose 269 270 Strong <~ StrongStar / StrongUl 271 272 TwoStarOpen <- !StarLine "**" !Spacechar !Newline 273 TwoStarClose <- !Spacechar !Newline Inline :"**" 274 275 StrongStar <- :TwoStarOpen 276 ( !TwoStarClose Inline )* 277 TwoStarClose 278 279 TwoUlOpen <- !UlLine "__" !Spacechar !Newline 280 TwoUlClose <- !Spacechar !Newline Inline :"__" !Alphanumeric 281 282 StrongUl <- :TwoUlOpen 283 ( !TwoUlClose Inline )* 284 :TwoUlClose 285 286 #### Pandoc Extension #### 287 Strikeout <- :"~~" Inline :"~~" 288 289 #### Pandoc Extension #### 290 Superscript <- :"^" Inline :"^" 291 292 #### Pandoc Extension #### 293 Subscript <- :"~" Inline :"~" 294 295 #### Pandoc Extension #### 296 Math <- :"$" !Spacechar (!(Spacechar "$") .)* :"$" 297 298 Image <- "!" ( ExplicitLink / ReferenceLink ) 299 300 Link <- ExplicitLink / ReferenceLink / AutoLink 301 302 ReferenceLink <- ReferenceLinkDouble / ReferenceLinkSingle 303 304 ReferenceLinkDouble <- Label Spnl !"[]" Label 305 306 ReferenceLinkSingle <- Label (Spnl "[]")? 307 308 ExplicitLink <- Label Spnl :"(" Sp Source Spnl Title? Sp :")" 309 310 Source <- HeaderIdentifier #### Pandoc extension #### 311 / :"<" SourceContents :">" 312 / SourceContents 313 314 HeaderIdentifier <~ :"#" [a-z][-_.a-z0-9]* 315 316 SourceContents <~ ( ( !"(" !")" !">" Nonspacechar )+ / :"(" SourceContents :")")* 317 318 Title <~ (TitleSingle / TitleDouble) 319 320 TitleSingle <- :quote ( !( quote Sp ( ")" / Newline ) ) . )* :quote 321 322 TitleDouble <- :doublequote ( !( doublequote Sp ( ")" / Newline ) ) . )* :doublequote 323 324 AutoLink <- AutoLinkUrl / AutoLinkEmail 325 326 AutoLinkUrl <- :"<" ~([A-Za-z]+ "://" ( !Newline !">" . )+) :">" 327 328 AutoLinkEmail <- :"<" ( "mailto:" )? ~([-A-Za-z0-9+_./!%~$]+ "@" ( !Newline !">" . )+) :">" 329 330 Reference <- NonindentSpace !"[]" Label ":" Spnl RefSrc RefTitle BlankLine+ 331 332 Label <~ :"[" (!"]" Inline )* :"]" 333 334 RefSrc <- Nonspacechar+ 335 336 RefTitle <- RefTitleSingle / RefTitleDouble / RefTitleParens / EmptyTitle 337 338 EmptyTitle <- eps 339 340 RefTitleSingle <- Spnl quote ( !( quote Sp Newline / Newline ) . )* quote 341 342 RefTitleDouble <- Spnl doublequote ( !(doublequote Sp Newline / Newline) . )* doublequote 343 344 RefTitleParens <- Spnl "(" ( !(")" Sp Newline / Newline) .)* ")" 345 346 References <- ( Reference / SkipBlock )* 347 348 Ticks1 <- backquote !backquote 349 Ticks2 <- backquote backquote !backquote 350 Ticks3 <- backquote backquote backquote !backquote 351 Ticks4 <- backquote backquote backquote backquote !backquote 352 Ticks5 <- backquote backquote backquote backquote backquote !backquote 353 354 Tildes <- "~~~" "~"* 355 356 ### Standard extension. Covers both Github Markdown and Pandoc Markdown 357 CodeBlock <- ( :Ticks5 CodeOptions? :Newline ~(!Ticks5 .)+ :Ticks5 :Newline 358 / :Ticks4 CodeOptions? :Newline ~(!Ticks4 .)+ :Ticks4 :Newline 359 / :Ticks3 CodeOptions? :Newline ~(!Ticks3 .)+ :Ticks3 :Newline 360 / :Tildes CodeOptions? :Newline ~(!Tildes .)+ :Tildes :Newline) 361 362 Code <- ( :Ticks1 ~(!Ticks1 .)+ :Ticks1 CodeOptions? 363 / :Ticks2 ~(!Ticks2 .)+ :Ticks2 CodeOptions?) 364 365 CodeOptions <- :"{" :Sp (;Option :Sp)* :Sp :"}" 366 / ;Option 367 368 Option <~ "."? identifier (:"=" (digit+ / identifier))? 369 370 #### Pandoc Extension #### Partially implemented (multiline footnotes) 371 FootnoteReference <- :"[^" FootnoteName :"]" !":" 372 FootnoteDefinition <- :"[^" FootnoteName :"]:" Line (BlankLine / Indent Line)* 373 FootnoteName <- (digit+ / identifier) 374 375 RawHtml <- HtmlComment / HtmlBlockT("script" / "SCRIPT") / HtmlTag 376 377 BlankLine <~ Sp Newline 378 379 Quoted <- doublequote (!doublequote .)* doublequote / quote (!quote .)* quote 380 HtmlAttribute <- (AlphanumericAscii / "-")+ Spnl ("=" Spnl (Quoted / (!">" Nonspacechar)+))? Spnl 381 HtmlComment <- "<!--" (!"-->" .)* "-->" 382 HtmlTag <- "<" Spnl "/"? AlphanumericAscii+ Spnl HtmlAttribute* "/"? Spnl ">" 383 384 Spacechar <- " " / "\t" 385 Nonspacechar <- !Spacechar !Newline . 386 Newline <- endOfLine 387 Sp <- Spacechar* 388 Spnl <- Sp (Newline Sp)? 389 390 SpecialChar <- "*" / "_" / backquote / "&" / "[" / "]" / "(" / ")" / "<" / "!" / "#" / backslash / quote / doublequote / ExtendedSpecialChar 391 NormalChar <- !( SpecialChar / Spacechar / Newline ) . 392 NonAlphanumeric <- !Alphanumeric . #[\001-\057] / [\072-\100] / [\133-\140] / [\173-\177] 393 Alphanumeric <- [0-9A-Za-z] / "\200" / "\201" / "\202" / "\203" / "\204" 394 / "\205" / "\206" / "\207" / "\210" / "\211" / "\212" 395 / "\213" / "\214" / "\215" / "\216" / "\217" / "\220" 396 / "\221" / "\222" / "\223" / "\224" / "\225" / "\226" 397 / "\227" / "\230" / "\231" / "\232" / "\233" / "\234" 398 / "\235" / "\236" / "\237" / "\240" / "\241" / "\242" 399 / "\243" / "\244" / "\245" / "\246" / "\247" / "\250" 400 / "\251" / "\252" / "\253" / "\254" / "\255" / "\256" 401 / "\257" / "\260" / "\261" / "\262" / "\263" / "\264" 402 / "\265" / "\266" / "\267" / "\270" / "\271" / "\272" 403 / "\273" / "\274" / "\275" / "\276" / "\277" / "\300" 404 / "\301" / "\302" / "\303" / "\304" / "\305" / "\306" 405 / "\307" / "\310" / "\311" / "\312" / "\313" / "\314" 406 / "\315" / "\316" / "\317" / "\320" / "\321" / "\322" 407 / "\323" / "\324" / "\325" / "\326" / "\327" / "\330" 408 / "\331" / "\332" / "\333" / "\334" / "\335" / "\336" 409 / "\337" / "\340" / "\341" / "\342" / "\343" / "\344" 410 / "\345" / "\346" / "\347" / "\350" / "\351" / "\352" 411 / "\353" / "\354" / "\355" / "\356" / "\357" / "\360" 412 / "\361" / "\362" / "\363" / "\364" / "\365" / "\366" 413 / "\367" / "\370" / "\371" / "\372" / "\373" / "\374" 414 / "\375" / "\376" / "\377" 415 416 AlphanumericAscii <- [A-Za-z0-9] 417 Digit <- [0-9] 418 BOM <- "\357\273\277" 419 420 HexEntity <- "&" "#" [Xx] [0-9a-fA-F]+ 421 DecEntity <- "&" "#" [0-9]+ 422 CharEntity <- "&" [A-Za-z0-9]+ 423 424 NonindentSpace <: (" " / " " / " ")? 425 Indent <- "\t" / " " 426 IndentedLine <- :Indent Line 427 OptionallyIndentedLine <- Indent? Line 428 429 Line <~ (!Newline .)* :Newline 430 / .+ :eoi 431 432 SkipBlock <- HtmlBlock 433 / ( !"#" !SetextBottom1 !SetextBottom2 !BlankLine Line )+ BlankLine* 434 / BlankLine+ 435 / Line 436 437 ExtendedSpecialChar <- "." / "-" / quote / doublequote / "^" 438 439 Smart <- Ellipsis / Dash / SingleQuoted / DoubleQuoted / Apostrophe 440 441 Apostrophe <- quote 442 443 Ellipsis <- "..." / ". . ." 444 445 Dash <- EmDash / EnDash 446 447 EnDash <- "-" &Digit 448 449 EmDash <- "---" / "--" 450 451 SingleQuoteStart <- quote !(Spacechar / Newline) 452 453 SingleQuoteEnd <- quote !Alphanumeric 454 455 SingleQuoted <- SingleQuoteStart ( !SingleQuoteEnd Inline )+ SingleQuoteEnd 456 457 DoubleQuoteStart <- doublequote 458 459 DoubleQuoteEnd <- doublequote 460 461 DoubleQuoted <- DoubleQuoteStart ( !DoubleQuoteEnd Inline )+ DoubleQuoteEnd 462 463 NoteReference <- RawNoteReference 464 465 RawNoteReference <~ :"[^" ( !Newline !"]" . )+ :"]" !":" 466 467 Note <- :NonindentSpace RawNoteReference :":" :Sp 468 RawNoteBlock 469 ( &Indent RawNoteBlock )* 470 471 InlineNote <- :"^[" ( !"]" Inline)+ :"]" 472 473 Notes <- (Note / SkipBlock)* 474 475 RawNoteBlock <- ( !BlankLine OptionallyIndentedLine )+ BlankLine* 476 `;