1 /**
2 This module contains function to inspect a Pegged grammar.
3 */
4 module pegged.dev.introspection;
5 
6 import std.typecons;
7 
8 import pegged.grammar;
9 import pegged.parser;
10 
11 /**
12 The different kinds of recursion for a rule.
13 'direct' means the rule name appears in its own definition. 'indirect' means the rule calls itself through another rule (the call chain can be long).
14 */
15 enum Recursive { no, direct, indirect }
16 
17 /**
18 Left-recursion diagnostic for a rule. A rule is left-recursive when its own name appears at the beginning of its definition or behind possibly-null-matching rules (see below for null matches).
19 For example A <- A 'a' is left-recursive, whereas A <- 'a' A is not. *But* A <- 'a'? A is left-recursive, since if the input does not begin
20 with 'a', then the parsing will continue by invoking A again, at the same position.
21 
22 'direct' means the rule invokes itself in the first position of its definition (A <- A ...). 'hidden' means the rule names appears after a possibly-null other rule (A <- 'a'? A ...). 'indirect' means the rule calls itself trough another rule.
23 */
24 enum LeftRecursive { no, direct, hidden, indirect }
25 
26 /**
27 NullMatch.yes means a rule can succeed while consuming no input. For example e? or e*, for all expressions e.
28 Nullmatch.no means a rule will always consume at least a token while succeeding.
29 Nullmatch.indeterminate means the algorithm could not converge.
30 */
31 enum NullMatch { no, yes, indeterminate }
32 
33 /**
34 InfiniteLoop.yes means a rule can loop indefinitely while consuming nothing.
35 InfiniteLoop.no means a rule cannot loop indefinitely.
36 InfiniteLoop.indeterminate means the algorithm could not converge.
37 */
38 enum InfiniteLoop { no, yes, indeterminate }
39 
40 /**
41 Struct holding the introspection info on a rule.
42 */
43 struct RuleInfo
44 {
45     Recursive recursion; /// Is the rule recursive?
46     LeftRecursive leftRecursion; /// Is the rule left-recursive?
47     NullMatch nullMatch; /// Can the rule succeed while consuming nothing?
48     InfiniteLoop infiniteLoop; /// Can the rule loop while indefinitely, while consuming nothing?
49 }
50 
51 /**
52 Returns for all grammar rule:
53 
54 - the recursion type (no recursion, direct or indirect recursion).
55 - the left-recursion type (no left-recursion, direct left-recursion, hidden, or indirect)
56 - the null-match for a grammar's rules: whether the rule can succeed while consuming nothing.
57 - the possibility of an infinite loop (if 'e' can null-match, then 'e*' can enter an infinite loop).
58 
59 This kind of potential problem can be detected statically and should be transmitted to the grammar designer.
60 */
61 RuleInfo[string] ruleInfo(string grammar)
62 {
63     RuleInfo[string] result;
64     ParseTree[string] rules;
65 
66     /**
67     Returns the call graph of a grammar: the list of rules directly called by each rule of the grammar.
68     The graph is represented as a bool[string][string] associative array, the string holding
69     the rules names. graph["ruleName"] contains all rules called by ruleName, as a set (a bool[string] AA).
70 
71     graph.keys thus gives the grammar's rules names.
72 
73     If a rule calls itself, its own name will appear in the called set. If a rule calls an external rule, it will
74     also appear in the call graph when the rule has a name: hence, calls to predefined rules like 'identifier' or
75     'digit' will appear, but not a call to '[0-9]+', considered here as an anonymous rule.
76     */
77     bool[string][string] callGraph(ParseTree p)
78     {
79         bool[string] findIdentifiers(ParseTree p)
80         {
81             bool[string] idList;
82             if (p.name == "Pegged.Identifier")
83                 idList[p.matches[0]] = true;
84             else
85                 foreach(child; p.children)
86                     foreach(name; findIdentifiers(child).keys)
87                         idList[name] = true;
88 
89             return idList;
90         }
91 
92         bool[string][string] graph;
93 
94         foreach(definition; p.children)
95             if (definition.name == "Pegged.Definition")
96             {
97                 auto ids = findIdentifiers(definition.children[2]);
98                 graph[definition.matches[0]] = ids;
99                 foreach(id, _; ids) // getting possible external calls
100                     if (id !in graph)
101                         graph[id] = (bool[string]).init;
102             }
103 
104         return graph;
105     }
106 
107     /**
108     The transitive closure of a call graph.
109     It will propagate the calls to find all rules called by a given rule,
110     directly (already in the call graph) or indirectly (through another rule).
111     */
112     bool[string][string] closure(bool[string][string] graph)
113     {
114         bool[string][string] path;
115         foreach(rule, children; graph) // deep-dupping, to avoid children aliasing
116             path[rule] = children.dup;
117 
118         bool changed = true;
119 
120         while(changed)
121         {
122             changed = false;
123             foreach(rule1; graph.keys)
124                 foreach(rule2; graph.keys)
125                     if (rule2 in path[rule1])
126                         foreach(rule3; graph.keys)
127                             if (rule3 in path[rule2] && rule3 !in path[rule1])
128                             {
129                                 path[rule1][rule3] = true;
130                                 changed = true;
131                             }
132         }
133 
134         return path;
135     }
136 
137     Recursive[string] recursions(bool[string][string] graph)
138     {
139         bool[string][string] path = closure(graph);
140 
141         Recursive[string] result;
142         foreach(rule, children; path)
143         {
144             result[rule] = Recursive.no;
145             if (rule in children)
146             {
147                 if (rule in graph[rule])
148                     result[rule] = Recursive.direct;
149                 else
150                     result[rule] = Recursive.indirect;
151             }
152         }
153 
154         return result;
155     }
156 
157     NullMatch nullMatching(ParseTree p)
158     {
159         switch (p.name)
160         {
161             case "Pegged.Expression": // choice expressions null-match whenever one of their components can null-match
162                 foreach(seq; p.children)
163                 {
164                     auto nm = nullMatching(seq);
165                     if (nm == NullMatch.yes)
166                         return NullMatch.yes;
167                     if (nm == NullMatch.indeterminate)
168                         return NullMatch.indeterminate;
169                 }
170                 return NullMatch.no;
171             case "Pegged.Sequence": // sequence expressions can null-match when all their components can null-match
172                 foreach(seq; p.children)
173                 {
174                     auto nm = nullMatching(seq);
175                     if (nm == NullMatch.indeterminate)
176                         return NullMatch.indeterminate;
177                     if (nm == NullMatch.no)
178                         return NullMatch.no;
179                 }
180                 return NullMatch.yes;
181             case "Pegged.Prefix":
182                 foreach(pref; p.children[0..$-1])
183                     if (pref.name == "Pegged.POS" || pref.name == "Pegged.NEG")
184                         return NullMatch.yes;
185                 return nullMatching(p.children[$-1]);
186             case "Pegged.Suffix":
187                 foreach(pref; p.children[1..$])
188                     if (pref.name == "Pegged.ZEROORMORE" || pref.name == "Pegged.OPTION")
189                         return NullMatch.yes;
190                 return nullMatching(p.children[0]);
191             case "Pegged.Primary":
192                 return nullMatching(p.children[0]);
193             case "Pegged.RhsName":
194                 if (p.matches[0] in result)
195                     return result[p.matches[0]].nullMatch;
196                 else
197                     return nullMatching(p.children[0]);
198             case "Pegged.Literal":
199                 if (p.matches[0].length == 0) // Empty literal, '' or ""
200                     return NullMatch.yes;
201                 else
202                     return NullMatch.no;
203             case "Pegged.CharClass":
204                 return NullMatch.no;
205             case "Pegged.ANY":
206                 return NullMatch.no;
207             case "eps":
208                 return NullMatch.yes;
209             case "eoi":
210                 return NullMatch.yes;
211             default:
212                 return NullMatch.indeterminate;
213         }
214     }
215 
216     InfiniteLoop infiniteLooping(ParseTree p)
217     {
218         switch (p.name)
219         {
220             case "Pegged.Expression": // choice expressions loop whenever one of their components can loop
221                 foreach(seq; p.children)
222                 {
223                     auto nm = infiniteLooping(seq);
224                     if (nm == InfiniteLoop.yes)
225                         return InfiniteLoop.yes;
226                     if (nm == InfiniteLoop.indeterminate)
227                         return InfiniteLoop.indeterminate;
228                 }
229                 return InfiniteLoop.no;
230             case "Pegged.Sequence": // sequence expressions can loop when one of their components can loop
231                 foreach(seq; p.children)
232                 {
233                     auto nm = infiniteLooping(seq);
234                     if (nm == InfiniteLoop.yes)
235                         return InfiniteLoop.yes;
236                     if (nm == InfiniteLoop.indeterminate)
237                         return InfiniteLoop.indeterminate;
238                 }
239                 return InfiniteLoop.no;
240             case "Pegged.Prefix":
241                 return infiniteLooping(p.children[$-1]);
242             case "Pegged.Suffix":
243                 foreach(pref; p.children[1..$])
244                     if ((  pref.name == "Pegged.ZEROORMORE" || pref.name == "Pegged.ONEORMORE")
245                         && p.matches[0] in result
246                         && result[p.matches[0]].nullMatch == NullMatch.yes)
247                         return InfiniteLoop.yes;
248                 return infiniteLooping(p.children[0]);
249             case "Pegged.Primary":
250                 return infiniteLooping(p.children[0]);
251             case "Pegged.RhsName":
252                 if (p.matches[0] in result)
253                     return result[p.matches[0]].infiniteLoop;
254                 else
255                     return infiniteLooping(p.children[0]);
256             case "Pegged.Literal":
257                 return InfiniteLoop.no;
258             case "Pegged.CharClass":
259                 return InfiniteLoop.no;
260             case "Pegged.ANY":
261                 return InfiniteLoop.no;
262             case "eps":
263                 return InfiniteLoop.no;
264             case "eoi":
265                 return InfiniteLoop.no;
266             default:
267                 return InfiniteLoop.indeterminate;
268         }
269     }
270 
271     LeftRecursive leftRecursion(ParseTree p, string target)
272     {
273         switch (p.name)
274         {
275             case "Pegged.Expression": // Choices are left-recursive is any choice is left-recursive
276                 foreach(seq; p.children)
277                 {
278                     auto lr = leftRecursion(seq, target);
279                     if (lr != LeftRecursive.no)
280                         return lr;
281                 }
282                 return LeftRecursive.no;
283             case "Pegged.Sequence": // Sequences are left-recursive when the leftmost member is left-recursive
284                                     // or behind null-matching members
285                 foreach(i, seq; p.children)
286                 {
287                     auto lr = leftRecursion(seq, target);
288                     if (lr == LeftRecursive.direct)
289                         return (i == 0 ? LeftRecursive.direct : LeftRecursive.hidden);
290                     else if (lr == LeftRecursive.hidden || lr == LeftRecursive.indirect)
291                         return lr;
292                     else if (nullMatching(seq) == NullMatch.yes)
293                         continue;
294                     else
295                         return LeftRecursive.no;
296                 }
297                 return LeftRecursive.no; // found only null-matching rules!
298             case "Pegged.Prefix":
299                 return leftRecursion(p.children[$-1], target);
300             case "Pegged.Suffix":
301                 return leftRecursion(p.children[0], target);
302             case "Pegged.Primary":
303                 return leftRecursion(p.children[0], target);
304             case "Pegged.RhsName":
305                 if (p.matches[0] == target) // ?? Or generateCode(p) ?
306                     return LeftRecursive.direct;
307                 else if ((p.matches[0] in rules) && (leftRecursion(rules[p.matches[0]], target) != LeftRecursive.no))
308                     return LeftRecursive.hidden;
309                 else
310                     return LeftRecursive.no;
311             case "Pegged.Literal":
312                 return LeftRecursive.no;
313             case "Pegged.CharClass":
314                 return LeftRecursive.no;
315             case "Pegged.ANY":
316                 return LeftRecursive.no;
317             case "eps":
318                 return LeftRecursive.no;
319             case "eoi":
320                 return LeftRecursive.no;
321             default:
322                 return LeftRecursive.no;
323         }
324     }
325 
326     ParseTree p = Pegged(grammar).children[0];
327     foreach(definition; p.children)
328         if (definition.name == "Pegged.Definition")
329         {
330             rules[definition.matches[0]] = definition.children[2];
331             result[definition.matches[0]] = RuleInfo(Recursive.no, LeftRecursive.no,
332                                                      NullMatch.indeterminate,InfiniteLoop.indeterminate);
333         }
334 
335     auto rec = recursions(callGraph(p));
336     foreach(rule, recursionType; rec)
337         if (rule in result) // external rules are in rec, but not in result
338             result[rule].recursion = recursionType;
339 
340     foreach(name, tree; rules)
341     {
342         if (result[name].recursion != Recursive.no)
343         {
344             result[name].leftRecursion = leftRecursion(tree, name);
345         }
346     }
347 
348     bool changed = true;
349 
350     while(changed) // while something new happened, the process is not over
351     {
352         changed = false;
353         foreach(name, tree; rules)
354             if (result[name].nullMatch == NullMatch.indeterminate) // not done yet
355             {
356                 result [name].nullMatch = nullMatching(tree); // try to find if it's null-matching
357                 if (result[name].nullMatch != NullMatch.indeterminate)
358                     changed = true;
359             }
360     }
361 
362     changed = true;
363 
364     while(changed) // while something new happened, the process is not over
365     {
366         changed = false;
367         foreach(name, tree; rules)
368             if (result[name].infiniteLoop == InfiniteLoop.indeterminate) // not done yet
369             {
370                 result [name].infiniteLoop = infiniteLooping(tree); // try to find if it's looping
371                 if (result[name].infiniteLoop != InfiniteLoop.indeterminate)
372                     changed = true;
373             }
374     }
375 
376     return result;
377 }
378 
379 /**
380 Act on rules parse tree as produced by pegged.parser.
381 Replace every occurence of child in parent by child's parse tree
382 */
383 ParseTree replaceInto(ParseTree parent, ParseTree child)
384 {
385     if (parent.name == "Pegged.RhsName" && parent.matches[0] == child.matches[0])
386         return ParseTree("Pegged.Named", true, child.matches[0..1], "",0,0,
387                        [child.children[2],
388                         ParseTree("Pegged.Identifier", true, child.matches[0..1])]);
389     else
390         foreach(ref branch; parent.children)
391             branch = replaceInto(branch, child);
392     return parent;
393 }