1 module pegged.examples.python;
2 
3 
4 import pegged.grammar;
5 
6 // https://docs.python.org/3/reference/grammar.html
7 // 3.10.3 Documentation » The Python Language Reference » 10. Full Grammar specification
8 enum string pythonGrammar = `
9 Python:
10 
11 # PEG grammar for Python
12 
13 file <- statements? # ENDMARKER
14 interactive <  statement_newline
15 eval <  expressions NEWLINE* # ENDMARKER
16 func_type <  "(" (type_expressions)? ")" "->" expression NEWLINE* # ENDMARKER
17 fstring <  star_expressions
18 
19 # type_expressions allow */** but ignore them
20 type_expressions < 
21       ((expression ("," expression)*) "," "*" expression "," "**" expression)
22     / ((expression ("," expression)*) "," "*" expression)
23     / ((expression ("," expression)*) "," "**" expression)
24     / ("*" expression "," "**" expression)
25     / ("*" expression)
26     / ("**" expression)
27     / (expression ("," expression)*)
28 
29 statements <  statement+
30 statement <  compound_stmt  / simple_stmts
31 statement_newline <
32       (compound_stmt NEWLINE)
33     / simple_stmts
34     / NEWLINE
35 #   / ENDMARKER
36 simple_stmts <
37       (simple_stmt !";" NEWLINE)  # Not needed, there for speedup
38     / ((simple_stmt (";" simple_stmt)*) (";")? NEWLINE)
39 # NOTE =  assignment MUST precede expression, else parsing a simple assignment
40 # will throw a SyntaxError.
41 simple_stmt < 
42       assignment
43     / star_expressions
44     / return_stmt
45     / import_stmt
46     / raise_stmt
47     / "pass"
48     / del_stmt
49     / yield_stmt
50     / assert_stmt
51     / "break"
52     / "continue"
53     / global_stmt
54     / nonlocal_stmt
55 compound_stmt < 
56       function_def
57     / if_stmt
58     / class_def
59     / with_stmt
60     / for_stmt
61     / try_stmt
62     / while_stmt
63     / match_stmt
64 
65 # NOTE: annotated_rhs may start with "yield"; yield_expr must start with "yield"
66 assignment < 
67       (NAME ":" expression ("=" annotated_rhs )?)
68     / ((("(" single_target ")")
69          / single_subscript_attribute_target) ":" expression ("=" annotated_rhs )?)
70     / ((star_targets "=" )+ (yield_expr / star_expressions) !"=" (TYPE_COMMENT)?)
71     / (single_target augassign   (yield_expr / star_expressions))
72 
73 augassign < 
74       "+="
75     / "-="
76     / "*="
77     / "@="
78     / "/="
79     / "%="
80     / "&="
81     / "|="
82     / "^="
83     / "<<="
84     / ">>="
85     / "**="
86     / "//="
87 
88 global_stmt <  "global" (NAME ("," NAME)*)
89 nonlocal_stmt <  "nonlocal" (NAME ("," NAME)*)
90 
91 yield_stmt <  yield_expr
92 
93 assert_stmt <  ("assert" expression ("," expression )?)
94 
95 del_stmt < 
96       ("del" del_targets &(";" / NEWLINE))
97 
98 import_stmt <  import_name / import_from
99 import_name <  ("import" dotted_as_names)
100 # note below <  the ("." | "...") is necessary because "..." is tokenized as ELLIPSIS
101 import_from < 
102       ("from" ("..." / ".")* dotted_name "import" import_from_targets)
103     / ("from" ("..." / ".")+ "import" import_from_targets)
104 import_from_targets < 
105       ("(" import_from_as_names (",")? ")")
106     / (import_from_as_names !",")
107     / "*"
108 import_from_as_names < 
109       (import_from_as_name ("," import_from_as_name)*)
110 import_from_as_name < 
111       NAME ("as" NAME )?
112 dotted_as_names < 
113       (dotted_as_name ("," dotted_as_name)*)
114 dotted_as_name < 
115       (dotted_name ("as" NAME )?)
116 dotted_name < 
117       (NAME ("." NAME)+)
118     / NAME
119 
120 if_stmt < 
121       ("if" named_expression ":" block elif_stmt)
122     / ("if" named_expression ":" block (else_block)?)
123 elif_stmt < 
124       ("elif" named_expression ":" block elif_stmt)
125     / ("elif" named_expression ":" block (else_block)?)
126 else_block < 
127       ("else" ":" block)
128 
129 while_stmt < 
130       ("while" named_expression ":" block (else_block)?)
131 
132 for_stmt < 
133       ("for" star_targets "in"   star_expressions ":" (TYPE_COMMENT)? block (else_block)?)
134     / (ASYNC "for" star_targets "in"   star_expressions ":" (TYPE_COMMENT)? block (else_block)?)
135 
136 with_stmt < 
137       ("with" "(" (with_item ("," with_item)*) ","? ")" ":" block)
138     / ("with" (with_item ("," with_item)*) ":" (TYPE_COMMENT)? block)
139     / (ASYNC "with" "(" (with_item ("," with_item)*) ","? ")" ":" block)
140     / (ASYNC "with" (with_item ("," with_item)*) ":" (TYPE_COMMENT)? block)
141 
142 with_item < 
143       (expression "as" star_target &("," / ")" / ":"))
144     / (expression)
145 
146 try_stmt < 
147       ("try" ":" block finally_block)
148     / ("try" ":" block except_block+ (else_block)? (finally_block)?)
149 except_block < 
150       ("except" expression ("as" NAME )? ":" block)
151     / ("except" ":" block)
152 finally_block < 
153       ("finally" ":" block)
154 
155 match_stmt < 
156       ("match" subject_expr ":" NEWLINE INDENT case_block+ DEDENT)
157 subject_expr < 
158       (star_named_expression "," star_named_expressions?)
159     / (named_expression)
160 case_block < 
161       ("case" patterns guard? ":" block)
162 guard <  "if" named_expression
163 
164 patterns < 
165       (open_sequence_pattern)
166     / (pattern)
167 pattern < 
168       (as_pattern)
169     / (or_pattern)
170 as_pattern < 
171       (or_pattern "as" pattern_capture_target)
172 or_pattern < 
173       ((closed_pattern ("|" closed_pattern)*))
174 closed_pattern < 
175       (literal_pattern)
176     / (capture_pattern)
177     / (wildcard_pattern)
178     / (value_pattern)
179     / (group_pattern)
180     / (sequence_pattern)
181     / (mapping_pattern)
182     / (class_pattern)
183 
184 # Literal patterns are used for equality and identity constraints
185 literal_pattern < 
186       (signed_number !("+" / "-"))
187     / (complex_number)
188     / (strings)
189     / ("None")
190     / ("True")
191     / ("False")
192 
193 # Literal expressions are used to restrict permitted mapping pattern keys
194 literal_expr < 
195       (signed_number !("+" / "-"))
196     / (complex_number)
197     / (strings)
198     / ("None")
199     / ("True")
200     / ("False")
201 
202 complex_number < 
203       (signed_real_number "+" imaginary_number)
204     / (signed_real_number "-" imaginary_number)
205 
206 signed_number < 
207       (NUMBER)
208     / ("-" NUMBER)
209 
210 signed_real_number < 
211       (real_number)
212     / ("-" real_number)
213 
214 real_number < 
215       (NUMBER)
216 
217 imaginary_number < 
218       (NUMBER)
219 
220 capture_pattern < 
221       (pattern_capture_target)
222 
223 pattern_capture_target < 
224       (!"_" NAME !("." / "(" / "="))
225 
226 wildcard_pattern < 
227       ("_")
228 
229 value_pattern < 
230       (attr !("." / "(" / "="))
231 attr < 
232       (name_or_attr "." NAME)
233 name_or_attr < 
234       (attr)
235     / (NAME)
236 
237 group_pattern < 
238       ("(" pattern ")")
239 
240 sequence_pattern < 
241       ("[" maybe_sequence_pattern? "]")
242     / ("(" open_sequence_pattern? ")")
243 open_sequence_pattern < 
244       (maybe_star_pattern "," maybe_sequence_pattern?)
245 maybe_sequence_pattern < 
246       ((maybe_star_pattern ("," maybe_star_pattern)*) ","?)
247 maybe_star_pattern < 
248       (star_pattern)
249     / (pattern)
250 star_pattern < 
251       ("*" pattern_capture_target)
252     / ("*" wildcard_pattern)
253 
254 mapping_pattern < 
255       ("{" "}")
256     / ("{" double_star_pattern ","? "}")
257     / ("{" items_pattern "," double_star_pattern ","? "}")
258     / ("{" items_pattern ","? "}")
259 items_pattern < 
260       ((key_value_pattern ("," key_value_pattern)*))
261 key_value_pattern < 
262       ((literal_expr / attr) ":" pattern)
263 double_star_pattern < 
264       ("**" pattern_capture_target)
265 
266 class_pattern < 
267       (name_or_attr "(" ")")
268     / (name_or_attr "(" positional_patterns ","? ")")
269     / (name_or_attr "(" keyword_patterns ","? ")")
270     / (name_or_attr "(" positional_patterns "," keyword_patterns ","? ")")
271 positional_patterns < 
272       ((pattern ("," pattern)*))
273 keyword_patterns < 
274       ((keyword_pattern ("," keyword_pattern)*))
275 keyword_pattern < 
276       (NAME "=" pattern)
277 
278 return_stmt < 
279       ("return" (star_expressions)?)
280 
281 raise_stmt < 
282       ("raise" expression ("from" expression )?)
283     / ("raise")
284 
285 function_def < 
286       (decorators function_def_raw)
287     / (function_def_raw)
288 
289 function_def_raw < 
290       ("def" NAME "(" (params)? ")" ("->" expression )? ":" (func_type_comment)? block)
291     / (ASYNC "def" NAME "(" (params)? ")" ("->" expression )? ":" (func_type_comment)? block)
292 func_type_comment < 
293       (NEWLINE TYPE_COMMENT &(NEWLINE INDENT))   # Must be followed by indented block
294     / (TYPE_COMMENT)
295 
296 params < 
297       (parameters)
298 
299 parameters < 
300       (slash_no_default param_no_default* param_with_default* (star_etc)?)
301     / (slash_with_default param_with_default* (star_etc)?)
302     / (param_no_default+ param_with_default* (star_etc)?)
303     / (param_with_default+ (star_etc)?)
304     / (star_etc)
305 
306 # Some duplication here because we can't write ("," / &")"),
307 # which is because we don't support empty alternatives (yet).
308 #
309 slash_no_default < 
310       (param_no_default+ "/" ",")
311     / (param_no_default+ "/" &")")
312 slash_with_default < 
313       (param_no_default* param_with_default+ "/" ",")
314     / (param_no_default* param_with_default+ "/" &")")
315 
316 star_etc < 
317       ("*" param_no_default param_maybe_default* (kwds)?)
318     / ("*" "," param_maybe_default+ (kwds)?)
319     / (kwds)
320 
321 kwds <  "**" param_no_default
322 
323 # One parameter.  This *includes* a following comma and type comment.
324 #
325 # There are three styles:
326 # - No default
327 # - With default
328 # - Maybe with default
329 #
330 # There are two alternative forms of each, to deal with type comments:
331 # - Ends in a comma followed by an optional type comment
332 # - No comma, optional type comment, must be followed by close paren
333 # The latter form is for a final parameter without trailing comma.
334 #
335 param_no_default < 
336       (param "," TYPE_COMMENT?)
337     / (param TYPE_COMMENT? &")")
338 param_with_default < 
339       (param default_expr "," TYPE_COMMENT?)
340     / (param default_expr TYPE_COMMENT? &")")
341 param_maybe_default < 
342       (param default_expr? "," TYPE_COMMENT?)
343     / (param default_expr? TYPE_COMMENT? &")")
344 param <  NAME annotation?
345 
346 annotation <  ":" expression
347 default_expr <  "=" expression
348 
349 decorators <  ("@" named_expression NEWLINE )+
350 
351 class_def < 
352       (decorators ClassDeclaration)
353     / (ClassDeclaration)
354 
355 ClassDeclaration < 
356       ("class" NAME ("(" arguments? ")")? ":" block)
357 
358 block < 
359       (NEWLINE INDENT statements DEDENT)
360     / (simple_stmts)
361 
362 star_expressions < 
363       (star_expression ("," star_expression )+ (",")?)
364     / (star_expression ",")
365     / (star_expression)
366 star_expression < 
367       ("*" bitwise_or)
368     / (expression)
369 
370 star_named_expressions <  (star_named_expression ("," star_named_expression)*) (",")?
371 star_named_expression < 
372       ("*" bitwise_or)
373     / (named_expression)
374 
375 
376 assignment_expression < 
377       (NAME ":="   expression)
378 
379 named_expression < 
380       (assignment_expression)
381     / (expression !":=")
382 
383 annotated_rhs <  yield_expr / star_expressions
384 
385 expressions < 
386       (expression ("," expression )+ (",")?)
387     / (expression ",")
388     / (expression)
389 expression < 
390       (disjunction "if" disjunction "else" expression)
391     / (disjunction)
392     / (lambdef)
393 
394 lambdef < 
395       ("lambda" (lambda_params)? ":" expression)
396 
397 lambda_params < 
398       (lambda_parameters)
399 
400 # lambda_parameters etc. duplicates parameters but without annotations
401 # or type comments, and if there's no comma after a parameter, we expect
402 # a colon, not a close parenthesis.  (For more, see parameters above.)
403 #
404 lambda_parameters < 
405       (lambda_slash_no_default lambda_param_no_default* lambda_param_with_default* (lambda_star_etc)?)
406     / (lambda_slash_with_default lambda_param_with_default* (lambda_star_etc)?)
407     / (lambda_param_no_default+ lambda_param_with_default* (lambda_star_etc)?)
408     / (lambda_param_with_default+ (lambda_star_etc)?)
409     / (lambda_star_etc)
410 
411 lambda_slash_no_default < 
412       (lambda_param_no_default+ "/" ",")
413     / (lambda_param_no_default+ "/" &":")
414 lambda_slash_with_default < 
415       (lambda_param_no_default* lambda_param_with_default+ "/" ",")
416     / (lambda_param_no_default* lambda_param_with_default+ "/" &":")
417 
418 lambda_star_etc < 
419       ("*" lambda_param_no_default lambda_param_maybe_default* (lambda_kwds)?)
420     / ("*" "," lambda_param_maybe_default+ (lambda_kwds)?)
421     / (lambda_kwds)
422 
423 lambda_kwds <  "**" lambda_param_no_default
424 
425 lambda_param_no_default < 
426       (lambda_param ",")
427     / (lambda_param &":")
428 lambda_param_with_default < 
429       (lambda_param default_expr ",")
430     / (lambda_param default_expr &":")
431 lambda_param_maybe_default < 
432       (lambda_param default_expr? ",")
433     / (lambda_param default_expr? &":")
434 lambda_param <  NAME
435 
436 disjunction < 
437       (conjunction ("or" conjunction )+)
438     / (conjunction)
439 conjunction < 
440       (inversion ("and" inversion )+)
441     / (inversion)
442 inversion < 
443       ("not" inversion)
444     / (comparison)
445 comparison < 
446       (bitwise_or compare_op_bitwise_or_pair+)
447     / (bitwise_or)
448 compare_op_bitwise_or_pair < 
449       (eq_bitwise_or)
450     / (noteq_bitwise_or)
451     / (lte_bitwise_or)
452     / (lt_bitwise_or)
453     / (gte_bitwise_or)
454     / (gt_bitwise_or)
455     / (notin_bitwise_or)
456     / (in_bitwise_or)
457     / (isnot_bitwise_or)
458     / (is_bitwise_or)
459 eq_bitwise_or <  "==" bitwise_or
460 noteq_bitwise_or < 
461       (("!=" ) bitwise_or)
462 lte_bitwise_or <  "<=" bitwise_or
463 lt_bitwise_or <  "<" bitwise_or
464 gte_bitwise_or <  ">=" bitwise_or
465 gt_bitwise_or <  ">" bitwise_or
466 notin_bitwise_or <  "not" "in" bitwise_or
467 in_bitwise_or <  "in" bitwise_or
468 isnot_bitwise_or <  "is" "not" bitwise_or
469 is_bitwise_or <  "is" bitwise_or
470 
471 bitwise_or < 
472       (bitwise_or "|" bitwise_xor)
473     / (bitwise_xor)
474 bitwise_xor < 
475       (bitwise_xor "^" bitwise_and)
476     / (bitwise_and)
477 bitwise_and < 
478       (bitwise_and "&" shift_expr)
479     / (shift_expr)
480 shift_expr < 
481       (shift_expr "<<" sum)
482     / (shift_expr ">>" sum)
483     / (sum)
484 
485 sum < 
486       (sum "+" term)
487     / (sum "-" term)
488     / (term)
489 term < 
490       (term "*" factor)
491     / (term "/" factor)
492     / (term "//" factor)
493     / (term "%" factor)
494     / (term "@" factor)
495     / (factor)
496 factor < 
497       ("+" factor)
498     / ("-" factor)
499     / ("~" factor)
500     / (power)
501 power < 
502       (await_primary "**" factor)
503     / (await_primary)
504 await_primary <
505       ("await" primary)
506     / (primary)
507 primary <
508       (primary "." NAME)
509     / (primary genexp)
510     / (primary "(" (arguments)? ")")
511     / (primary "[" slices "]")
512     / (atom)
513 
514 slices < 
515       (slice !",")
516     / ((slice ("," slice)*) (",")?)
517 slice < 
518       ((expression)? ":" (expression)? (":"  expression? )?)
519     / (named_expression)
520 atom < 
521       (NAME)
522     / ("True")
523     / ("False")
524     / ("None")
525     / (strings)
526     / (NUMBER)
527     / ((tuple_expr / group / genexp))
528     / ((list / listcomp))
529     / ((dict / set / dictcomp / setcomp))
530     / ("...")
531 
532 strings <  STRING+
533 list < 
534       ("[" (star_named_expressions)? "]")
535 listcomp < 
536       ("[" named_expression for_if_clauses "]")
537 tuple_expr < 
538       "(" (star_named_expression ","  star_named_expressions?  )? ")"
539 group < 
540       ("(" (yield_expr / named_expression) ")")
541 genexp < 
542       ("(" ( assignment_expression / (expression !":=")) for_if_clauses ")")
543 set <  "{" star_named_expressions "}"
544 setcomp < 
545       ("{" named_expression for_if_clauses "}")
546 dict < 
547       ("{" (double_starred_kvpairs)? "}")
548 
549 dictcomp < 
550       ("{" kvpair for_if_clauses "}")
551 double_starred_kvpairs <  (double_starred_kvpair ("," double_starred_kvpair)*) (",")?
552 double_starred_kvpair < 
553       ("**" bitwise_or)
554     / (kvpair)
555 kvpair <  expression ":" expression
556 for_if_clauses < 
557       (for_if_clause+)
558 for_if_clause < 
559       (ASYNC "for" star_targets "in"   disjunction ("if" disjunction )*)
560     / ("for" star_targets "in"   disjunction ("if" disjunction )*)
561 
562 yield_expr < 
563       ("yield" "from" expression)
564     / ("yield" (star_expressions)?)
565 
566 arguments < 
567       (args (",")? &")")
568 args < 
569       ((args_helper ("," args_helper)*) ("," kwargs )?)
570     / (kwargs)
571 
572 args_helper < ((starred_expression / ( (assignment_expression / expression) !":=")) !"=")
573 kwargs < 
574       ((kwarg_or_starred ("," kwarg_or_starred)*) "," (kwarg_or_double_starred ("," kwarg_or_double_starred)*))
575     / ((kwarg_or_starred ("," kwarg_or_starred)*))
576     / ((kwarg_or_double_starred ("," kwarg_or_double_starred)*))
577 starred_expression < 
578       ("*" expression)
579 kwarg_or_starred < 
580       (NAME "=" expression)
581     / (starred_expression)
582 kwarg_or_double_starred < 
583       (NAME "=" expression)
584     / ("**" expression)
585 
586 # NOTE: star_targets may contain *bitwise_or, targets may not.
587 star_targets < 
588       (star_target !",")
589     / (star_target ("," star_target )* (",")?)
590 star_targets_list_seq <  (star_target ("," star_target)*) (",")?
591 star_targets_tuple_seq < 
592       (star_target ("," star_target )+ (",")?)
593     / (star_target ",")
594 star_target < 
595       ("*" (!"*" star_target))
596     / (target_with_star_atom)
597 target_with_star_atom < 
598       (t_primary "." NAME !t_lookahead)
599     / (t_primary "[" slices "]" !t_lookahead)
600     / (star_atom)
601 star_atom < 
602       (NAME)
603     / ("(" target_with_star_atom ")")
604     / ("(" (star_targets_tuple_seq)? ")")
605     / ("[" (star_targets_list_seq)? "]")
606 
607 single_target < 
608       (single_subscript_attribute_target)
609     / (NAME)
610     / ("(" single_target ")")
611 single_subscript_attribute_target < 
612       (t_primary "." NAME !t_lookahead)
613     / (t_primary "[" slices "]" !t_lookahead)
614 
615 del_targets <  (del_target ("," del_target)*) (",")?
616 del_target < 
617       (t_primary "." NAME !t_lookahead)
618     / (t_primary "[" slices "]" !t_lookahead)
619     / (del_t_atom)
620 del_t_atom < 
621       (NAME)
622     / ("(" del_target ")")
623     / ("(" (del_targets)? ")")
624     / ("[" (del_targets)? "]")
625 
626 t_primary < 
627       (t_primary "." NAME &t_lookahead)
628     / (t_primary "[" slices "]" &t_lookahead)
629     / (t_primary genexp &t_lookahead)
630     / (t_primary "(" (arguments)? ")" &t_lookahead)
631     / (atom &t_lookahead)
632 t_lookahead <  "(" / "[" / "."
633 
634 STRING <- doublequote (DQChar)* doublequote StringPostfix?
635 
636 DQChar <- EscapeSequence
637         / (!doublequote .)
638 
639 EscapeSequence <- backslash ( quote
640     / doublequote
641     / backslash
642     / [abfnrtv]
643     / ('x' HexDigit HexDigit)
644     / ('u' HexDigit HexDigit HexDigit HexDigit)
645     / ('U' HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit)
646     )
647 
648 StringPostfix < "c" / "w" / "d"
649 
650 
651 ASYNC < "async"
652 
653 NAME <~  (!Keyword [a-zA-Z_][a-zA-Z0-9_]*)
654 # https://docs.python.org/3/reference/lexical_analysis.html#keywords
655 Keyword < 
656     "False"   / "await"    / "else"    / "import"   / "pass"
657   / "None"    / "break"    / "except"  / "in"       / "raise"
658   / "True"    / "class"    / "finally" / "is"       / "return"
659   / "and"     / "continue" / "for"     / "lambda"   / "try"
660   / "as"      / "def"      / "from"    / "nonlocal" / "while"
661   / "assert"  / "del"      / "global"  / "not"      / "with"
662   / "async"   / "elif"     / "if"      / "or"       / "yield"
663   / "type"
664 
665 NUMBER < IntegerLiteral / FloatLiteral
666 
667 IntegerLiteral <- DecimalInteger
668                 / BinaryInteger
669                 / HexadecimalInteger
670 
671 DecimalInteger < Integer IntegerSuffix?
672 
673 Integer <- digit (digit/"_")*
674 
675 IntegerSuffix < "Lu" / "LU" / "uL" / "UL"
676                / "L" / "u" / "U"
677 
678 BinaryInteger < ("0b" / "0B") [01] ([01] / "_")*
679 
680 HexadecimalInteger < ("0x"/"0X") HexDigit (HexDigit / "_")*
681 
682 digit < [0-9]
683 HexDigit < [0-9a-fA-F]
684 
685 FloatLiteral < Sign? Integer "." Integer? (("e" / "E") Sign? Integer)?
686 
687 Sign < ("-" / "+")?
688 
689 INDENT < ("\t" / " ")+
690 DEDENT < endOfLine
691 NEWLINE <- endOfLine  # Comment?
692 
693 TYPE_COMMENT < "#" "type" ":" NAME endOfLine
694 
695 Comment <- 
696            LineComment
697 LineComment <~ :'#' (!endOfLine .)* :endOfLine
698 
699 `;
700 
701 
702 mixin(grammar(pythonGrammar));
703 
704 unittest
705 {
706     import std.stdio;
707     writeln(Python(`m = "Hello";`));
708 }