1 module pegged.examples.python; 2 3 4 import pegged.grammar; 5 6 // https://docs.python.org/3/reference/grammar.html 7 // 3.10.3 Documentation » The Python Language Reference » 10. Full Grammar specification 8 enum string pythonGrammar = ` 9 Python: 10 11 # PEG grammar for Python 12 13 file <- statements? # ENDMARKER 14 interactive < statement_newline 15 eval < expressions NEWLINE* # ENDMARKER 16 func_type < "(" (type_expressions)? ")" "->" expression NEWLINE* # ENDMARKER 17 fstring < star_expressions 18 19 # type_expressions allow */** but ignore them 20 type_expressions < 21 ((expression ("," expression)*) "," "*" expression "," "**" expression) 22 / ((expression ("," expression)*) "," "*" expression) 23 / ((expression ("," expression)*) "," "**" expression) 24 / ("*" expression "," "**" expression) 25 / ("*" expression) 26 / ("**" expression) 27 / (expression ("," expression)*) 28 29 statements < statement+ 30 statement < compound_stmt / simple_stmts 31 statement_newline < 32 (compound_stmt NEWLINE) 33 / simple_stmts 34 / NEWLINE 35 # / ENDMARKER 36 simple_stmts < 37 (simple_stmt !";" NEWLINE) # Not needed, there for speedup 38 / ((simple_stmt (";" simple_stmt)*) (";")? NEWLINE) 39 # NOTE = assignment MUST precede expression, else parsing a simple assignment 40 # will throw a SyntaxError. 41 simple_stmt < 42 assignment 43 / star_expressions 44 / return_stmt 45 / import_stmt 46 / raise_stmt 47 / "pass" 48 / del_stmt 49 / yield_stmt 50 / assert_stmt 51 / "break" 52 / "continue" 53 / global_stmt 54 / nonlocal_stmt 55 compound_stmt < 56 function_def 57 / if_stmt 58 / class_def 59 / with_stmt 60 / for_stmt 61 / try_stmt 62 / while_stmt 63 / match_stmt 64 65 # NOTE: annotated_rhs may start with "yield"; yield_expr must start with "yield" 66 assignment < 67 (NAME ":" expression ("=" annotated_rhs )?) 68 / ((("(" single_target ")") 69 / single_subscript_attribute_target) ":" expression ("=" annotated_rhs )?) 70 / ((star_targets "=" )+ (yield_expr / star_expressions) !"=" (TYPE_COMMENT)?) 71 / (single_target augassign (yield_expr / star_expressions)) 72 73 augassign < 74 "+=" 75 / "-=" 76 / "*=" 77 / "@=" 78 / "/=" 79 / "%=" 80 / "&=" 81 / "|=" 82 / "^=" 83 / "<<=" 84 / ">>=" 85 / "**=" 86 / "//=" 87 88 global_stmt < "global" (NAME ("," NAME)*) 89 nonlocal_stmt < "nonlocal" (NAME ("," NAME)*) 90 91 yield_stmt < yield_expr 92 93 assert_stmt < ("assert" expression ("," expression )?) 94 95 del_stmt < 96 ("del" del_targets &(";" / NEWLINE)) 97 98 import_stmt < import_name / import_from 99 import_name < ("import" dotted_as_names) 100 # note below < the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS 101 import_from < 102 ("from" ("..." / ".")* dotted_name "import" import_from_targets) 103 / ("from" ("..." / ".")+ "import" import_from_targets) 104 import_from_targets < 105 ("(" import_from_as_names (",")? ")") 106 / (import_from_as_names !",") 107 / "*" 108 import_from_as_names < 109 (import_from_as_name ("," import_from_as_name)*) 110 import_from_as_name < 111 NAME ("as" NAME )? 112 dotted_as_names < 113 (dotted_as_name ("," dotted_as_name)*) 114 dotted_as_name < 115 (dotted_name ("as" NAME )?) 116 dotted_name < 117 (NAME ("." NAME)+) 118 / NAME 119 120 if_stmt < 121 ("if" named_expression ":" block elif_stmt) 122 / ("if" named_expression ":" block (else_block)?) 123 elif_stmt < 124 ("elif" named_expression ":" block elif_stmt) 125 / ("elif" named_expression ":" block (else_block)?) 126 else_block < 127 ("else" ":" block) 128 129 while_stmt < 130 ("while" named_expression ":" block (else_block)?) 131 132 for_stmt < 133 ("for" star_targets "in" star_expressions ":" (TYPE_COMMENT)? block (else_block)?) 134 / (ASYNC "for" star_targets "in" star_expressions ":" (TYPE_COMMENT)? block (else_block)?) 135 136 with_stmt < 137 ("with" "(" (with_item ("," with_item)*) ","? ")" ":" block) 138 / ("with" (with_item ("," with_item)*) ":" (TYPE_COMMENT)? block) 139 / (ASYNC "with" "(" (with_item ("," with_item)*) ","? ")" ":" block) 140 / (ASYNC "with" (with_item ("," with_item)*) ":" (TYPE_COMMENT)? block) 141 142 with_item < 143 (expression "as" star_target &("," / ")" / ":")) 144 / (expression) 145 146 try_stmt < 147 ("try" ":" block finally_block) 148 / ("try" ":" block except_block+ (else_block)? (finally_block)?) 149 except_block < 150 ("except" expression ("as" NAME )? ":" block) 151 / ("except" ":" block) 152 finally_block < 153 ("finally" ":" block) 154 155 match_stmt < 156 ("match" subject_expr ":" NEWLINE INDENT case_block+ DEDENT) 157 subject_expr < 158 (star_named_expression "," star_named_expressions?) 159 / (named_expression) 160 case_block < 161 ("case" patterns guard? ":" block) 162 guard < "if" named_expression 163 164 patterns < 165 (open_sequence_pattern) 166 / (pattern) 167 pattern < 168 (as_pattern) 169 / (or_pattern) 170 as_pattern < 171 (or_pattern "as" pattern_capture_target) 172 or_pattern < 173 ((closed_pattern ("|" closed_pattern)*)) 174 closed_pattern < 175 (literal_pattern) 176 / (capture_pattern) 177 / (wildcard_pattern) 178 / (value_pattern) 179 / (group_pattern) 180 / (sequence_pattern) 181 / (mapping_pattern) 182 / (class_pattern) 183 184 # Literal patterns are used for equality and identity constraints 185 literal_pattern < 186 (signed_number !("+" / "-")) 187 / (complex_number) 188 / (strings) 189 / ("None") 190 / ("True") 191 / ("False") 192 193 # Literal expressions are used to restrict permitted mapping pattern keys 194 literal_expr < 195 (signed_number !("+" / "-")) 196 / (complex_number) 197 / (strings) 198 / ("None") 199 / ("True") 200 / ("False") 201 202 complex_number < 203 (signed_real_number "+" imaginary_number) 204 / (signed_real_number "-" imaginary_number) 205 206 signed_number < 207 (NUMBER) 208 / ("-" NUMBER) 209 210 signed_real_number < 211 (real_number) 212 / ("-" real_number) 213 214 real_number < 215 (NUMBER) 216 217 imaginary_number < 218 (NUMBER) 219 220 capture_pattern < 221 (pattern_capture_target) 222 223 pattern_capture_target < 224 (!"_" NAME !("." / "(" / "=")) 225 226 wildcard_pattern < 227 ("_") 228 229 value_pattern < 230 (attr !("." / "(" / "=")) 231 attr < 232 (name_or_attr "." NAME) 233 name_or_attr < 234 (attr) 235 / (NAME) 236 237 group_pattern < 238 ("(" pattern ")") 239 240 sequence_pattern < 241 ("[" maybe_sequence_pattern? "]") 242 / ("(" open_sequence_pattern? ")") 243 open_sequence_pattern < 244 (maybe_star_pattern "," maybe_sequence_pattern?) 245 maybe_sequence_pattern < 246 ((maybe_star_pattern ("," maybe_star_pattern)*) ","?) 247 maybe_star_pattern < 248 (star_pattern) 249 / (pattern) 250 star_pattern < 251 ("*" pattern_capture_target) 252 / ("*" wildcard_pattern) 253 254 mapping_pattern < 255 ("{" "}") 256 / ("{" double_star_pattern ","? "}") 257 / ("{" items_pattern "," double_star_pattern ","? "}") 258 / ("{" items_pattern ","? "}") 259 items_pattern < 260 ((key_value_pattern ("," key_value_pattern)*)) 261 key_value_pattern < 262 ((literal_expr / attr) ":" pattern) 263 double_star_pattern < 264 ("**" pattern_capture_target) 265 266 class_pattern < 267 (name_or_attr "(" ")") 268 / (name_or_attr "(" positional_patterns ","? ")") 269 / (name_or_attr "(" keyword_patterns ","? ")") 270 / (name_or_attr "(" positional_patterns "," keyword_patterns ","? ")") 271 positional_patterns < 272 ((pattern ("," pattern)*)) 273 keyword_patterns < 274 ((keyword_pattern ("," keyword_pattern)*)) 275 keyword_pattern < 276 (NAME "=" pattern) 277 278 return_stmt < 279 ("return" (star_expressions)?) 280 281 raise_stmt < 282 ("raise" expression ("from" expression )?) 283 / ("raise") 284 285 function_def < 286 (decorators function_def_raw) 287 / (function_def_raw) 288 289 function_def_raw < 290 ("def" NAME "(" (params)? ")" ("->" expression )? ":" (func_type_comment)? block) 291 / (ASYNC "def" NAME "(" (params)? ")" ("->" expression )? ":" (func_type_comment)? block) 292 func_type_comment < 293 (NEWLINE TYPE_COMMENT &(NEWLINE INDENT)) # Must be followed by indented block 294 / (TYPE_COMMENT) 295 296 params < 297 (parameters) 298 299 parameters < 300 (slash_no_default param_no_default* param_with_default* (star_etc)?) 301 / (slash_with_default param_with_default* (star_etc)?) 302 / (param_no_default+ param_with_default* (star_etc)?) 303 / (param_with_default+ (star_etc)?) 304 / (star_etc) 305 306 # Some duplication here because we can't write ("," / &")"), 307 # which is because we don't support empty alternatives (yet). 308 # 309 slash_no_default < 310 (param_no_default+ "/" ",") 311 / (param_no_default+ "/" &")") 312 slash_with_default < 313 (param_no_default* param_with_default+ "/" ",") 314 / (param_no_default* param_with_default+ "/" &")") 315 316 star_etc < 317 ("*" param_no_default param_maybe_default* (kwds)?) 318 / ("*" "," param_maybe_default+ (kwds)?) 319 / (kwds) 320 321 kwds < "**" param_no_default 322 323 # One parameter. This *includes* a following comma and type comment. 324 # 325 # There are three styles: 326 # - No default 327 # - With default 328 # - Maybe with default 329 # 330 # There are two alternative forms of each, to deal with type comments: 331 # - Ends in a comma followed by an optional type comment 332 # - No comma, optional type comment, must be followed by close paren 333 # The latter form is for a final parameter without trailing comma. 334 # 335 param_no_default < 336 (param "," TYPE_COMMENT?) 337 / (param TYPE_COMMENT? &")") 338 param_with_default < 339 (param default_expr "," TYPE_COMMENT?) 340 / (param default_expr TYPE_COMMENT? &")") 341 param_maybe_default < 342 (param default_expr? "," TYPE_COMMENT?) 343 / (param default_expr? TYPE_COMMENT? &")") 344 param < NAME annotation? 345 346 annotation < ":" expression 347 default_expr < "=" expression 348 349 decorators < ("@" named_expression NEWLINE )+ 350 351 class_def < 352 (decorators ClassDeclaration) 353 / (ClassDeclaration) 354 355 ClassDeclaration < 356 ("class" NAME ("(" arguments? ")")? ":" block) 357 358 block < 359 (NEWLINE INDENT statements DEDENT) 360 / (simple_stmts) 361 362 star_expressions < 363 (star_expression ("," star_expression )+ (",")?) 364 / (star_expression ",") 365 / (star_expression) 366 star_expression < 367 ("*" bitwise_or) 368 / (expression) 369 370 star_named_expressions < (star_named_expression ("," star_named_expression)*) (",")? 371 star_named_expression < 372 ("*" bitwise_or) 373 / (named_expression) 374 375 376 assignment_expression < 377 (NAME ":=" expression) 378 379 named_expression < 380 (assignment_expression) 381 / (expression !":=") 382 383 annotated_rhs < yield_expr / star_expressions 384 385 expressions < 386 (expression ("," expression )+ (",")?) 387 / (expression ",") 388 / (expression) 389 expression < 390 (disjunction "if" disjunction "else" expression) 391 / (disjunction) 392 / (lambdef) 393 394 lambdef < 395 ("lambda" (lambda_params)? ":" expression) 396 397 lambda_params < 398 (lambda_parameters) 399 400 # lambda_parameters etc. duplicates parameters but without annotations 401 # or type comments, and if there's no comma after a parameter, we expect 402 # a colon, not a close parenthesis. (For more, see parameters above.) 403 # 404 lambda_parameters < 405 (lambda_slash_no_default lambda_param_no_default* lambda_param_with_default* (lambda_star_etc)?) 406 / (lambda_slash_with_default lambda_param_with_default* (lambda_star_etc)?) 407 / (lambda_param_no_default+ lambda_param_with_default* (lambda_star_etc)?) 408 / (lambda_param_with_default+ (lambda_star_etc)?) 409 / (lambda_star_etc) 410 411 lambda_slash_no_default < 412 (lambda_param_no_default+ "/" ",") 413 / (lambda_param_no_default+ "/" &":") 414 lambda_slash_with_default < 415 (lambda_param_no_default* lambda_param_with_default+ "/" ",") 416 / (lambda_param_no_default* lambda_param_with_default+ "/" &":") 417 418 lambda_star_etc < 419 ("*" lambda_param_no_default lambda_param_maybe_default* (lambda_kwds)?) 420 / ("*" "," lambda_param_maybe_default+ (lambda_kwds)?) 421 / (lambda_kwds) 422 423 lambda_kwds < "**" lambda_param_no_default 424 425 lambda_param_no_default < 426 (lambda_param ",") 427 / (lambda_param &":") 428 lambda_param_with_default < 429 (lambda_param default_expr ",") 430 / (lambda_param default_expr &":") 431 lambda_param_maybe_default < 432 (lambda_param default_expr? ",") 433 / (lambda_param default_expr? &":") 434 lambda_param < NAME 435 436 disjunction < 437 (conjunction ("or" conjunction )+) 438 / (conjunction) 439 conjunction < 440 (inversion ("and" inversion )+) 441 / (inversion) 442 inversion < 443 ("not" inversion) 444 / (comparison) 445 comparison < 446 (bitwise_or compare_op_bitwise_or_pair+) 447 / (bitwise_or) 448 compare_op_bitwise_or_pair < 449 (eq_bitwise_or) 450 / (noteq_bitwise_or) 451 / (lte_bitwise_or) 452 / (lt_bitwise_or) 453 / (gte_bitwise_or) 454 / (gt_bitwise_or) 455 / (notin_bitwise_or) 456 / (in_bitwise_or) 457 / (isnot_bitwise_or) 458 / (is_bitwise_or) 459 eq_bitwise_or < "==" bitwise_or 460 noteq_bitwise_or < 461 (("!=" ) bitwise_or) 462 lte_bitwise_or < "<=" bitwise_or 463 lt_bitwise_or < "<" bitwise_or 464 gte_bitwise_or < ">=" bitwise_or 465 gt_bitwise_or < ">" bitwise_or 466 notin_bitwise_or < "not" "in" bitwise_or 467 in_bitwise_or < "in" bitwise_or 468 isnot_bitwise_or < "is" "not" bitwise_or 469 is_bitwise_or < "is" bitwise_or 470 471 bitwise_or < 472 (bitwise_or "|" bitwise_xor) 473 / (bitwise_xor) 474 bitwise_xor < 475 (bitwise_xor "^" bitwise_and) 476 / (bitwise_and) 477 bitwise_and < 478 (bitwise_and "&" shift_expr) 479 / (shift_expr) 480 shift_expr < 481 (shift_expr "<<" sum) 482 / (shift_expr ">>" sum) 483 / (sum) 484 485 sum < 486 (sum "+" term) 487 / (sum "-" term) 488 / (term) 489 term < 490 (term "*" factor) 491 / (term "/" factor) 492 / (term "//" factor) 493 / (term "%" factor) 494 / (term "@" factor) 495 / (factor) 496 factor < 497 ("+" factor) 498 / ("-" factor) 499 / ("~" factor) 500 / (power) 501 power < 502 (await_primary "**" factor) 503 / (await_primary) 504 await_primary < 505 ("await" primary) 506 / (primary) 507 primary < 508 (primary "." NAME) 509 / (primary genexp) 510 / (primary "(" (arguments)? ")") 511 / (primary "[" slices "]") 512 / (atom) 513 514 slices < 515 (slice !",") 516 / ((slice ("," slice)*) (",")?) 517 slice < 518 ((expression)? ":" (expression)? (":" expression? )?) 519 / (named_expression) 520 atom < 521 (NAME) 522 / ("True") 523 / ("False") 524 / ("None") 525 / (strings) 526 / (NUMBER) 527 / ((tuple_expr / group / genexp)) 528 / ((list / listcomp)) 529 / ((dict / set / dictcomp / setcomp)) 530 / ("...") 531 532 strings < STRING+ 533 list < 534 ("[" (star_named_expressions)? "]") 535 listcomp < 536 ("[" named_expression for_if_clauses "]") 537 tuple_expr < 538 "(" (star_named_expression "," star_named_expressions? )? ")" 539 group < 540 ("(" (yield_expr / named_expression) ")") 541 genexp < 542 ("(" ( assignment_expression / (expression !":=")) for_if_clauses ")") 543 set < "{" star_named_expressions "}" 544 setcomp < 545 ("{" named_expression for_if_clauses "}") 546 dict < 547 ("{" (double_starred_kvpairs)? "}") 548 549 dictcomp < 550 ("{" kvpair for_if_clauses "}") 551 double_starred_kvpairs < (double_starred_kvpair ("," double_starred_kvpair)*) (",")? 552 double_starred_kvpair < 553 ("**" bitwise_or) 554 / (kvpair) 555 kvpair < expression ":" expression 556 for_if_clauses < 557 (for_if_clause+) 558 for_if_clause < 559 (ASYNC "for" star_targets "in" disjunction ("if" disjunction )*) 560 / ("for" star_targets "in" disjunction ("if" disjunction )*) 561 562 yield_expr < 563 ("yield" "from" expression) 564 / ("yield" (star_expressions)?) 565 566 arguments < 567 (args (",")? &")") 568 args < 569 ((args_helper ("," args_helper)*) ("," kwargs )?) 570 / (kwargs) 571 572 args_helper < ((starred_expression / ( (assignment_expression / expression) !":=")) !"=") 573 kwargs < 574 ((kwarg_or_starred ("," kwarg_or_starred)*) "," (kwarg_or_double_starred ("," kwarg_or_double_starred)*)) 575 / ((kwarg_or_starred ("," kwarg_or_starred)*)) 576 / ((kwarg_or_double_starred ("," kwarg_or_double_starred)*)) 577 starred_expression < 578 ("*" expression) 579 kwarg_or_starred < 580 (NAME "=" expression) 581 / (starred_expression) 582 kwarg_or_double_starred < 583 (NAME "=" expression) 584 / ("**" expression) 585 586 # NOTE: star_targets may contain *bitwise_or, targets may not. 587 star_targets < 588 (star_target !",") 589 / (star_target ("," star_target )* (",")?) 590 star_targets_list_seq < (star_target ("," star_target)*) (",")? 591 star_targets_tuple_seq < 592 (star_target ("," star_target )+ (",")?) 593 / (star_target ",") 594 star_target < 595 ("*" (!"*" star_target)) 596 / (target_with_star_atom) 597 target_with_star_atom < 598 (t_primary "." NAME !t_lookahead) 599 / (t_primary "[" slices "]" !t_lookahead) 600 / (star_atom) 601 star_atom < 602 (NAME) 603 / ("(" target_with_star_atom ")") 604 / ("(" (star_targets_tuple_seq)? ")") 605 / ("[" (star_targets_list_seq)? "]") 606 607 single_target < 608 (single_subscript_attribute_target) 609 / (NAME) 610 / ("(" single_target ")") 611 single_subscript_attribute_target < 612 (t_primary "." NAME !t_lookahead) 613 / (t_primary "[" slices "]" !t_lookahead) 614 615 del_targets < (del_target ("," del_target)*) (",")? 616 del_target < 617 (t_primary "." NAME !t_lookahead) 618 / (t_primary "[" slices "]" !t_lookahead) 619 / (del_t_atom) 620 del_t_atom < 621 (NAME) 622 / ("(" del_target ")") 623 / ("(" (del_targets)? ")") 624 / ("[" (del_targets)? "]") 625 626 t_primary < 627 (t_primary "." NAME &t_lookahead) 628 / (t_primary "[" slices "]" &t_lookahead) 629 / (t_primary genexp &t_lookahead) 630 / (t_primary "(" (arguments)? ")" &t_lookahead) 631 / (atom &t_lookahead) 632 t_lookahead < "(" / "[" / "." 633 634 STRING <- doublequote (DQChar)* doublequote StringPostfix? 635 636 DQChar <- EscapeSequence 637 / (!doublequote .) 638 639 EscapeSequence <- backslash ( quote 640 / doublequote 641 / backslash 642 / [abfnrtv] 643 / ('x' HexDigit HexDigit) 644 / ('u' HexDigit HexDigit HexDigit HexDigit) 645 / ('U' HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit) 646 ) 647 648 StringPostfix < "c" / "w" / "d" 649 650 651 ASYNC < "async" 652 653 NAME <~ (!Keyword [a-zA-Z_][a-zA-Z0-9_]*) 654 # https://docs.python.org/3/reference/lexical_analysis.html#keywords 655 Keyword < 656 "False" / "await" / "else" / "import" / "pass" 657 / "None" / "break" / "except" / "in" / "raise" 658 / "True" / "class" / "finally" / "is" / "return" 659 / "and" / "continue" / "for" / "lambda" / "try" 660 / "as" / "def" / "from" / "nonlocal" / "while" 661 / "assert" / "del" / "global" / "not" / "with" 662 / "async" / "elif" / "if" / "or" / "yield" 663 / "type" 664 665 NUMBER < IntegerLiteral / FloatLiteral 666 667 IntegerLiteral <- DecimalInteger 668 / BinaryInteger 669 / HexadecimalInteger 670 671 DecimalInteger < Integer IntegerSuffix? 672 673 Integer <- digit (digit/"_")* 674 675 IntegerSuffix < "Lu" / "LU" / "uL" / "UL" 676 / "L" / "u" / "U" 677 678 BinaryInteger < ("0b" / "0B") [01] ([01] / "_")* 679 680 HexadecimalInteger < ("0x"/"0X") HexDigit (HexDigit / "_")* 681 682 digit < [0-9] 683 HexDigit < [0-9a-fA-F] 684 685 FloatLiteral < Sign? Integer "." Integer? (("e" / "E") Sign? Integer)? 686 687 Sign < ("-" / "+")? 688 689 INDENT < ("\t" / " ")+ 690 DEDENT < endOfLine 691 NEWLINE <- endOfLine # Comment? 692 693 TYPE_COMMENT < "#" "type" ":" NAME endOfLine 694 695 Comment <- 696 LineComment 697 LineComment <~ :'#' (!endOfLine .)* :endOfLine 698 699 `; 700 701 702 mixin(grammar(pythonGrammar)); 703 704 unittest 705 { 706 import std.stdio; 707 writeln(Python(`m = "Hello";`)); 708 }