1 /**
2 This module contains function to inspect a Pegged grammar.
3 */
4 module pegged.introspection;
5 
6 import std.algorithm : equal;
7 import std.typecons;
8 
9 import pegged.parser;
10 
11 /**
12 The different kinds of recursion for a rule.
13 'direct' means the rule name appears in its own definition. 'indirect' means the rule calls itself through another rule (the call chain can be long).
14 */
15 enum Recursive { no, direct, indirect }
16 
17 /**
18 Left-recursion diagnostic for a rule. A rule is left-recursive when its own name appears at the beginning of its definition or behind possibly-null-matching rules (see below for null matches).
19 For example A <- A 'a' is left-recursive, whereas A <- 'a' A is not. *But* A <- 'a'? A is left-recursive, since if the input does not begin
20 with 'a', then the parsing will continue by invoking A again, at the same position.
21 
22 'direct' means the rule invokes itself in the first position of its definition (A <- A ...). 'hidden' means the rule names appears after a possibly-null other rule (A <- 'a'? A ...). 'indirect' means the rule calls itself trough another rule.
23 */
24 enum LeftRecursive { no, direct, hidden, indirect }
25 
26 /**
27 NullMatch.yes means a rule can succeed while consuming no input. For example e? or e*, for all expressions e.
28 Nullmatch.no means a rule will always consume at least a token while succeeding.
29 Nullmatch.indeterminate means the algorithm could not converge.
30 */
31 enum NullMatch { no, yes, indeterminate }
32 
33 /**
34 InfiniteLoop.yes means a rule can loop indefinitely while consuming nothing.
35 InfiniteLoop.no means a rule cannot loop indefinitely.
36 InfiniteLoop.indeterminate means the algorithm could not converge.
37 */
38 enum InfiniteLoop { no, yes, indeterminate }
39 
40 /**
41 Struct holding the introspection info on a rule.
42 */
43 struct RuleInfo
44 {
45     Recursive recursion; /// Is the rule recursive?
46     LeftRecursive leftRecursion; /// Is the rule left-recursive?
47     NullMatch nullMatch; /// Can the rule succeed while consuming nothing?
48     InfiniteLoop infiniteLoop; /// Can the rule loop indefinitely, while consuming nothing?
49     string[] leftRecursiveCycle; /// The path of rules traversed before indirect left-recursion recurses.
50 }
51 
52 /**
53 Struct holding the introspection info on a grammar.
54 */
55 struct GrammarInfo
56 {
57     RuleInfo[string] ruleInfo;
58     immutable(string[])[] leftRecursiveCycles;
59 }
60 
61 pure void appendCycleIfUnique(ref immutable(string[])[] cycles, const string[] cycle)
62 {
63     bool exists()
64     {
65         import std.algorithm : countUntil;
66         foreach (c; cycles)
67         {
68             if (c.length != cycle.length)
69                 continue;
70             auto pos = countUntil(c, cycle[0]);
71             if (pos < 0)
72                 continue;
73             if (equal!equal(c[pos .. $] ~ c[0 .. pos], cycle))
74                 return true;
75         }
76         return false;
77     }
78     if (!exists())
79         cycles ~= cycle.idup;
80 }
81 
82 /**
83 Returns for all grammar rules:
84 
85 - the recursion type (no recursion, direct or indirect recursion).
86 - the left-recursion type (no left-recursion, direct left-recursion, hidden, or indirect)
87 - the null-match for a grammar's rules: whether the rule can succeed while consuming nothing.
88 - the possibility of an infinite loop (if 'e' can null-match, then 'e*' can enter an infinite loop).
89 
90 This kind of potential problem can be detected statically and should be transmitted to the grammar designer.
91 */
92 pure GrammarInfo grammarInfo(ParseTree p)
93 {
94     if (p.name == "Pegged")
95         return grammarInfo(p.children[0]);
96     assert(p.name == "Pegged.Grammar");
97 
98     GrammarInfo result;
99     ParseTree[string] rules;
100 
101     /**
102     Returns the call graph of a grammar: the list of rules directly called by each rule of the grammar.
103     The graph is represented as a bool[string][string] associative array, the string holding
104     the rules names. graph["ruleName"] contains all rules called by ruleName, as a set (a bool[string] AA).
105 
106     graph.keys thus gives the grammar's rules names.
107 
108     If a rule calls itself, its own name will appear in the called set. If a rule calls an external rule, it will
109     also appear in the call graph when the rule has a name: hence, calls to predefined rules like 'identifier' or
110     'digit' will appear, but not a call to '[0-9]+', considered here as an anonymous rule.
111     */
112     bool[string][string] callGraph(ParseTree p)
113     {
114         bool[string] findIdentifiers(ParseTree p)
115         {
116             bool[string] idList;
117             if (p.name == "Pegged.Identifier")
118                 idList[p.matches[0]] = true;
119             else
120                 foreach(child; p.children)
121                     foreach(name; findIdentifiers(child).keys)
122                         idList[name] = true;
123 
124             return idList;
125         }
126 
127         bool[string][string] graph;
128 
129         foreach(definition; p.children)
130             if (definition.name == "Pegged.Definition")
131             {
132                 auto ids = findIdentifiers(definition.children[2]);
133                 graph[definition.matches[0]] = ids;
134                 foreach(id, _; ids) // getting possible external calls
135                     if (id !in graph)
136                         graph[id] = (bool[string]).init;
137             }
138 
139         return graph;
140     }
141 
142     /**
143     The transitive closure of a call graph.
144     It will propagate the calls to find all rules called by a given rule,
145     directly (already in the call graph) or indirectly (through another rule).
146     */
147     bool[string][string] closure(bool[string][string] graph)
148     {
149         bool[string][string] path;
150         foreach(rule, children; graph) // deep-dupping, to avoid children aliasing
151             path[rule] = children.dup;
152 
153         bool changed = true;
154 
155         while(changed)
156         {
157             changed = false;
158             foreach(rule1; graph.keys)
159                 foreach(rule2; graph.keys)
160                     if (rule2 in path[rule1])
161                         foreach(rule3; graph.keys)
162                             if (rule3 in path[rule2] && rule3 !in path[rule1])
163                             {
164                                 path[rule1][rule3] = true;
165                                 changed = true;
166                             }
167         }
168 
169         return path;
170     }
171 
172     Recursive[string] recursions(bool[string][string] graph)
173     {
174         bool[string][string] path = closure(graph);
175 
176         Recursive[string] result;
177         foreach(rule, children; path)
178         {
179             result[rule] = Recursive.no;
180             if (rule in children)
181             {
182                 if (rule in graph[rule])
183                     result[rule] = Recursive.direct;
184                 else
185                     result[rule] = Recursive.indirect;
186             }
187         }
188 
189         return result;
190     }
191 
192     NullMatch nullMatching(ParseTree p)
193     {
194         switch (p.name)
195         {
196             case "Pegged.Expression":
197                 return nullMatching(p.children[0]);
198             case "Pegged.FirstExpression",
199                  "Pegged.LongestExpression": // choice expressions null-match whenever one of their components can null-match
200                 foreach(seq; p.children)
201                     if (nullMatching(seq) == NullMatch.yes)
202                         return NullMatch.yes;
203                 foreach(seq; p.children)
204                     if (nullMatching(seq) == NullMatch.indeterminate)
205                         return NullMatch.indeterminate;
206                 return NullMatch.no;
207             case "Pegged.Sequence": // sequence expressions can null-match when all their components can null-match
208                 foreach(pref; p.children)
209                     if (nullMatching(pref) == NullMatch.no)
210                         return NullMatch.no;
211                 foreach(pref; p.children)
212                     if (nullMatching(pref) == NullMatch.indeterminate)
213                         return NullMatch.indeterminate;
214                 return NullMatch.yes;
215             case "Pegged.Prefix":
216                 foreach(pref; p.children[0..$-1])
217                     if (pref.name == "Pegged.POS" || pref.name == "Pegged.NEG")
218                         return NullMatch.yes;
219                 return nullMatching(p.children[$-1]);
220             case "Pegged.Suffix":
221                 foreach(suff; p.children[1..$])
222                     if (suff.name == "Pegged.ZEROORMORE" || suff.name == "Pegged.OPTION")
223                         return NullMatch.yes;
224                 return nullMatching(p.children[0]);
225             case "Pegged.Primary":
226                 return nullMatching(p.children[0]);
227             case "Pegged.RhsName":
228                 if (p.matches[0] in result.ruleInfo)
229                     if (result.ruleInfo[p.matches[0]].nullMatch != NullMatch.indeterminate)
230                         return result.ruleInfo[p.matches[0]].nullMatch;
231                 return nullMatching(p.children[0]);
232             case "Pegged.Identifier":
233                 if (p.matches[0] == "eps" ||
234                     p.matches[0] == "eoi")
235                     return NullMatch.yes;
236                 return NullMatch.indeterminate;
237             case "Pegged.Literal":
238                 if (p.matches[0].length == 0) // Empty literal, '' or ""
239                     return NullMatch.yes;
240                 else
241                     return NullMatch.no;
242             case "Pegged.CharClass":
243             case "Pegged.ANY":
244                 return NullMatch.no;
245             case "eps":
246             case "eoi":
247                 return NullMatch.yes;
248             default:
249                 return NullMatch.indeterminate;
250         }
251     }
252 
253     InfiniteLoop infiniteLooping(ParseTree p)
254     {
255         switch (p.name)
256         {
257             case "Pegged.Expression": // choice expressions loop whenever one of their components can loop
258             case "Pegged.Sequence": // sequence expressions can loop when one of their components can loop
259                 foreach(seq; p.children)
260                 {
261                     auto nm = infiniteLooping(seq);
262                     if (nm == InfiniteLoop.yes)
263                         return InfiniteLoop.yes;
264                     if (nm == InfiniteLoop.indeterminate)
265                         return InfiniteLoop.indeterminate;
266                 }
267                 return InfiniteLoop.no;
268             case "Pegged.Prefix":
269                 return infiniteLooping(p.children[$-1]);
270             case "Pegged.Suffix":
271                 foreach(pref; p.children[1..$])
272                     if ((  pref.name == "Pegged.ZEROORMORE" || pref.name == "Pegged.ONEORMORE")
273                         && p.matches[0] in result.ruleInfo
274                         && result.ruleInfo[p.matches[0]].nullMatch == NullMatch.yes)
275                         return InfiniteLoop.yes;
276                 return infiniteLooping(p.children[0]);
277             case "Pegged.Primary":
278                 return infiniteLooping(p.children[0]);
279             case "Pegged.RhsName":
280                 if (p.matches[0] in result.ruleInfo)
281                     return result.ruleInfo[p.matches[0]].infiniteLoop;
282                 else
283                     return infiniteLooping(p.children[0]);
284             case "Pegged.Literal":
285             case "Pegged.CharClass":
286             case "Pegged.ANY":
287             case "eps":
288             case "eoi":
289                 return InfiniteLoop.no;
290             default:
291                 return InfiniteLoop.indeterminate;
292         }
293     }
294 
295     LeftRecursive leftRecursion(ParseTree p, ref string[] cycle)
296     {
297         import std.algorithm.searching: countUntil;
298         switch (p.name)
299         {
300             case "Pegged.Expression":
301                 return leftRecursion(p.children[0], cycle);
302             case "Pegged.FirstExpression",
303                  "Pegged.LongestExpression": // Choices are left-recursive if any choice is left-recursive
304                 // Because memoized left-recursion handling needs to know about all left-recursive cycles,
305                 // we consider all choices, not just one.
306                 auto any_lr = LeftRecursive.no;
307                 size_t current = cycle.length;
308                 foreach(seq; p.children)
309                 {
310                     cycle = cycle[0..current];
311                     auto lr = leftRecursion(seq, cycle);
312                     if (lr != LeftRecursive.no && any_lr == LeftRecursive.no)
313                         any_lr = lr;
314                 }
315                 cycle = cycle[0..current];
316                 return any_lr;
317             case "Pegged.Sequence": // Sequences are left-recursive when the leftmost member is left-recursive
318                                     // or behind null-matching members
319                 size_t current = cycle.length;
320                 foreach(i, seq; p.children)
321                 {
322                     auto lr = leftRecursion(seq, cycle);
323                     if (lr == LeftRecursive.direct)
324                         return (i == 0 ? LeftRecursive.direct : LeftRecursive.hidden);
325                     if (lr == LeftRecursive.hidden || lr == LeftRecursive.indirect)
326                         return lr;
327                     if (nullMatching(seq) == NullMatch.yes)
328                         continue;
329                     else
330                     {
331                         cycle = cycle[0..current];
332                         return LeftRecursive.no;
333                     }
334                 }
335                 cycle = cycle[0..current];
336                 return LeftRecursive.no; // found only null-matching rules!
337             case "Pegged.Prefix":
338                 return leftRecursion(p.children[$-1], cycle);
339             case "Pegged.Suffix":
340             case "Pegged.Primary":
341                 return leftRecursion(p.children[0], cycle);
342             case "Pegged.RhsName":
343                 auto dejavu = cycle.countUntil(p.matches[0]);
344                 if (dejavu >= 0)
345                 {
346                     if (cycle.length == 1)
347                     {
348                         result.leftRecursiveCycles.appendCycleIfUnique(cycle);
349                         return LeftRecursive.direct;
350                     }
351                     // Strictly spoken, the rules before dejavu are not actively left-recursive, but since they
352                     // can call into a left-recursive cycle, they would still left-recurse if left-recursion
353                     // wasn't handled.
354                     result.leftRecursiveCycles.appendCycleIfUnique(cycle[dejavu..$]);
355                     return LeftRecursive.indirect;
356                 }
357                 size_t current = cycle.length;
358                 cycle ~= p.matches[0];
359                 if ((p.matches[0] in rules) && (leftRecursion(rules[p.matches[0]], cycle) != LeftRecursive.no))
360                     return LeftRecursive.indirect;
361                 cycle = cycle[0..current];
362                 return LeftRecursive.no;
363             default:
364                 return LeftRecursive.no;
365         }
366     }
367 
368     // Initialize rules and result.
369     foreach(definition; p.children)
370         if (definition.name == "Pegged.Definition")
371         {
372             rules[definition.matches[0]] = definition.children[2];
373             result.ruleInfo[definition.matches[0]] = RuleInfo(Recursive.no, LeftRecursive.no,
374                                                               NullMatch.indeterminate, InfiniteLoop.indeterminate);
375         }
376 
377     // Detect recursions.
378     foreach(rule, recursionType; recursions(callGraph(p)))
379         if (rule in result.ruleInfo) // external rules are in recursions, but not in result
380             result.ruleInfo[rule].recursion = recursionType;
381 
382     // Detect left-recursions.
383     foreach(name, tree; rules)
384         if (result.ruleInfo[name].recursion != Recursive.no)
385         {
386             result.ruleInfo[name].leftRecursiveCycle ~= name;
387             result.ruleInfo[name].leftRecursion = leftRecursion(tree, result.ruleInfo[name].leftRecursiveCycle);
388         }
389 
390     // Detect null matches.
391     bool changed = true;
392     while(changed) // while something new happened, the process is not over
393     {
394         changed = false;
395         foreach(name, tree; rules)
396             if (result.ruleInfo[name].nullMatch == NullMatch.indeterminate) // not done yet
397             {
398                 result.ruleInfo[name].nullMatch = nullMatching(tree); // try to find if it's null-matching
399                 if (result.ruleInfo[name].nullMatch != NullMatch.indeterminate)
400                     changed = true;
401             }
402     }
403 
404     // Detect infinite loops.
405     changed = true;
406     while(changed) // while something new happened, the process is not over
407     {
408         changed = false;
409         foreach(name, tree; rules)
410             if (result.ruleInfo[name].infiniteLoop == InfiniteLoop.indeterminate) // not done yet
411             {
412                 result.ruleInfo[name].infiniteLoop = infiniteLooping(tree); // try to find if it's looping
413                 if (result.ruleInfo[name].infiniteLoop != InfiniteLoop.indeterminate)
414                     changed = true;
415             }
416     }
417 
418     return result;
419 }
420 
421 /**
422 Returns for all grammar rules:
423 
424 - the recursion type (no recursion, direct or indirect recursion).
425 - the left-recursion type (no left-recursion, direct left-recursion, hidden, or indirect)
426 - the null-match for a grammar's rules: whether the rule can succeed while consuming nothing.
427 - the possibility of an infinite loop (if 'e' can null-match, then 'e*' can enter an infinite loop).
428 
429 This kind of potential problem can be detected statically and should be transmitted to the grammar designer.
430 */
431 pure RuleInfo[string] ruleInfo(ParseTree p)
432 {
433     return grammarInfo(p).ruleInfo;
434 }
435 
436 /** ditto */
437 RuleInfo[string] ruleInfo(string grammar)
438 {
439     return ruleInfo(Pegged(grammar).children[0]);
440 }
441 
442 unittest
443 {
444     auto info = ruleInfo(`
445         Test:
446             A <- A 'a'
447     `);
448     assert(info["A"].leftRecursion == LeftRecursive.direct);
449 
450     info = ruleInfo(`
451         Test:
452             A <- B? A 'a'
453             B <- 'b'
454     `);
455     assert(info["A"].leftRecursion == LeftRecursive.hidden);
456 
457     info = ruleInfo(`
458         Test:
459             A <- B 'a'
460             B <- A
461     `);
462     assert(info["A"].leftRecursion == LeftRecursive.indirect);
463 }
464 
465 // Test against infinite recursion in detection of indirect left-recursion.
466 unittest
467 {
468     auto info = ruleInfo(`
469         Test:
470             A <- B / C 'a'
471             B <- A
472             C <- A
473     `);
474     assert(info["A"].leftRecursion == LeftRecursive.indirect);
475 }
476 
477 // Test against compile-time infinite recursion.
478 unittest // Mutual left-recursion
479 {
480     enum ct = ruleInfo(`
481       Left:
482         A <- L
483         L <- P
484         P <- P / L
485     `);
486     static assert(ct["A"].leftRecursion == LeftRecursive.no);
487     static assert(ct["L"].leftRecursion == LeftRecursive.indirect);
488     static assert(ct["P"].leftRecursion == LeftRecursive.direct);
489 
490     auto rt = ruleInfo(`
491         Left:
492           A <- L
493           L <- P
494           P <- P / L
495     `);
496     assert(rt["A"].leftRecursion == LeftRecursive.no);
497     assert(rt["L"].leftRecursion == LeftRecursive.indirect);
498     assert(rt["P"].leftRecursion == LeftRecursive.direct);
499 }
500 
501 unittest // Intersecting cycles of left-recursion
502 {
503     enum ct = ruleInfo(`
504       Left:
505         C <- A
506         A <- B* C
507         B <- A
508     `);
509     static assert(ct["C"].leftRecursion == LeftRecursive.indirect);
510     static assert(ct["A"].leftRecursion == LeftRecursive.indirect);
511     static assert(ct["B"].leftRecursion == LeftRecursive.indirect);
512     auto rt = ruleInfo(`
513       Left:
514         C <- A
515         A <- B* C
516         B <- A
517     `);
518     assert(rt["C"].leftRecursion == LeftRecursive.indirect);
519     assert(rt["A"].leftRecursion == LeftRecursive.indirect);
520     assert(rt["B"].leftRecursion == LeftRecursive.indirect);
521 }
522 
523 unittest // Null-matching
524 {
525     enum ct = ruleInfo(`
526       NM:
527         NMM <- NML eoi
528         NML <- 'x'?
529     `);
530     static assert(ct["NML"].nullMatch == NullMatch.yes);
531     static assert(ct["NMM"].nullMatch == NullMatch.yes);
532     auto rt = ruleInfo(`
533       NM:
534         NMM <- NML eoi
535         NML <- 'x'?
536     `);
537     assert(rt["NML"].nullMatch == NullMatch.yes);
538     assert(rt["NMM"].nullMatch == NullMatch.yes);
539 }
540 
541 unittest // Not null-matching
542 {
543     enum ct = ruleInfo(`
544       Left:
545         M <- L eoi
546         L <- P '.x' / 'x'
547         P <- P '(n)' / L
548     `);
549     static assert(ct["M"].nullMatch == NullMatch.no);
550     static assert(ct["L"].nullMatch == NullMatch.no);
551     static assert(ct["P"].nullMatch == NullMatch.no);
552     auto rt = ruleInfo(`
553       Left:
554         M <- L eoi
555         L <- P '.x' / 'x'
556         P <- P '(n)' / L
557     `);
558     assert(rt["M"].nullMatch == NullMatch.no);
559     assert(rt["L"].nullMatch == NullMatch.no);
560     assert(rt["P"].nullMatch == NullMatch.no);
561 }
562 
563 unittest // Left-recursive null-matching
564 {
565     enum ct = ruleInfo(`
566       Left:
567         M <- L eoi
568         L <- P? '.x'? / 'x'
569         P <- P '(n)' / L
570     `);
571     static assert(ct["M"].nullMatch == NullMatch.yes);
572     static assert(ct["L"].nullMatch == NullMatch.yes);
573     static assert(ct["P"].nullMatch == NullMatch.yes);
574     auto rt = ruleInfo(`
575       Left:
576         M <- L eoi
577         L <- P? '.x'? / 'x'
578         P <- P '(n)' / L
579     `);
580     assert(rt["M"].nullMatch == NullMatch.yes);
581     assert(rt["L"].nullMatch == NullMatch.yes);
582     assert(rt["P"].nullMatch == NullMatch.yes);
583 }
584 
585 
586 /**
587 Act on rules parse tree as produced by pegged.parser.
588 Replace every occurence of child in parent by child's parse tree
589 */
590 ParseTree replaceInto(ParseTree parent, ParseTree child)
591 {
592     if (parent.name == "Pegged.RhsName" && parent.matches[0] == child.matches[0])
593         return ParseTree("Pegged.Named", true, child.matches[0..1], "",0,0,
594                        [child.children[2],
595                         ParseTree("Pegged.Identifier", true, child.matches[0..1])]);
596     else
597         foreach(ref branch; parent.children)
598             branch = replaceInto(branch, child);
599     return parent;
600 }