1 /** 2 This module contains function to inspect a Pegged grammar. 3 */ 4 module pegged.dev.introspection; 5 6 import std.typecons; 7 8 import pegged.grammar; 9 import pegged.parser; 10 11 /** 12 The different kinds of recursion for a rule. 13 'direct' means the rule name appears in its own definition. 'indirect' means the rule calls itself through another rule (the call chain can be long). 14 */ 15 enum Recursive { no, direct, indirect } 16 17 /** 18 Left-recursion diagnostic for a rule. A rule is left-recursive when its own name appears at the beginning of its definition or behind possibly-null-matching rules (see below for null matches). 19 For example A <- A 'a' is left-recursive, whereas A <- 'a' A is not. *But* A <- 'a'? A is left-recursive, since if the input does not begin 20 with 'a', then the parsing will continue by invoking A again, at the same position. 21 22 'direct' means the rule invokes itself in the first position of its definition (A <- A ...). 'hidden' means the rule names appears after a possibly-null other rule (A <- 'a'? A ...). 'indirect' means the rule calls itself trough another rule. 23 */ 24 enum LeftRecursive { no, direct, hidden, indirect } 25 26 /** 27 NullMatch.yes means a rule can succeed while consuming no input. For example e? or e*, for all expressions e. 28 Nullmatch.no means a rule will always consume at least a token while succeeding. 29 Nullmatch.indeterminate means the algorithm could not converge. 30 */ 31 enum NullMatch { no, yes, indeterminate } 32 33 /** 34 InfiniteLoop.yes means a rule can loop indefinitely while consuming nothing. 35 InfiniteLoop.no means a rule cannot loop indefinitely. 36 InfiniteLoop.indeterminate means the algorithm could not converge. 37 */ 38 enum InfiniteLoop { no, yes, indeterminate } 39 40 /** 41 Struct holding the introspection info on a rule. 42 */ 43 struct RuleInfo 44 { 45 Recursive recursion; /// Is the rule recursive? 46 LeftRecursive leftRecursion; /// Is the rule left-recursive? 47 NullMatch nullMatch; /// Can the rule succeed while consuming nothing? 48 InfiniteLoop infiniteLoop; /// Can the rule loop while indefinitely, while consuming nothing? 49 } 50 51 /** 52 Returns for all grammar rule: 53 54 - the recursion type (no recursion, direct or indirect recursion). 55 - the left-recursion type (no left-recursion, direct left-recursion, hidden, or indirect) 56 - the null-match for a grammar's rules: whether the rule can succeed while consuming nothing. 57 - the possibility of an infinite loop (if 'e' can null-match, then 'e*' can enter an infinite loop). 58 59 This kind of potential problem can be detected statically and should be transmitted to the grammar designer. 60 */ 61 RuleInfo[string] ruleInfo(string grammar) 62 { 63 RuleInfo[string] result; 64 ParseTree[string] rules; 65 66 /** 67 Returns the call graph of a grammar: the list of rules directly called by each rule of the grammar. 68 The graph is represented as a bool[string][string] associative array, the string holding 69 the rules names. graph["ruleName"] contains all rules called by ruleName, as a set (a bool[string] AA). 70 71 graph.keys thus gives the grammar's rules names. 72 73 If a rule calls itself, its own name will appear in the called set. If a rule calls an external rule, it will 74 also appear in the call graph when the rule has a name: hence, calls to predefined rules like 'identifier' or 75 'digit' will appear, but not a call to '[0-9]+', considered here as an anonymous rule. 76 */ 77 bool[string][string] callGraph(ParseTree p) 78 { 79 bool[string] findIdentifiers(ParseTree p) 80 { 81 bool[string] idList; 82 if (p.name == "Pegged.Identifier") 83 idList[p.matches[0]] = true; 84 else 85 foreach(child; p.children) 86 foreach(name; findIdentifiers(child).keys) 87 idList[name] = true; 88 89 return idList; 90 } 91 92 bool[string][string] graph; 93 94 foreach(definition; p.children) 95 if (definition.name == "Pegged.Definition") 96 { 97 auto ids = findIdentifiers(definition.children[2]); 98 graph[definition.matches[0]] = ids; 99 foreach(id, _; ids) // getting possible external calls 100 if (id !in graph) 101 graph[id] = (bool[string]).init; 102 } 103 104 return graph; 105 } 106 107 /** 108 The transitive closure of a call graph. 109 It will propagate the calls to find all rules called by a given rule, 110 directly (already in the call graph) or indirectly (through another rule). 111 */ 112 bool[string][string] closure(bool[string][string] graph) 113 { 114 bool[string][string] path; 115 foreach(rule, children; graph) // deep-dupping, to avoid children aliasing 116 path[rule] = children.dup; 117 118 bool changed = true; 119 120 while(changed) 121 { 122 changed = false; 123 foreach(rule1; graph.keys) 124 foreach(rule2; graph.keys) 125 if (rule2 in path[rule1]) 126 foreach(rule3; graph.keys) 127 if (rule3 in path[rule2] && rule3 !in path[rule1]) 128 { 129 path[rule1][rule3] = true; 130 changed = true; 131 } 132 } 133 134 return path; 135 } 136 137 Recursive[string] recursions(bool[string][string] graph) 138 { 139 bool[string][string] path = closure(graph); 140 141 Recursive[string] result; 142 foreach(rule, children; path) 143 { 144 result[rule] = Recursive.no; 145 if (rule in children) 146 { 147 if (rule in graph[rule]) 148 result[rule] = Recursive.direct; 149 else 150 result[rule] = Recursive.indirect; 151 } 152 } 153 154 return result; 155 } 156 157 NullMatch nullMatching(ParseTree p) 158 { 159 switch (p.name) 160 { 161 case "Pegged.Expression": // choice expressions null-match whenever one of their components can null-match 162 foreach(seq; p.children) 163 { 164 auto nm = nullMatching(seq); 165 if (nm == NullMatch.yes) 166 return NullMatch.yes; 167 if (nm == NullMatch.indeterminate) 168 return NullMatch.indeterminate; 169 } 170 return NullMatch.no; 171 case "Pegged.Sequence": // sequence expressions can null-match when all their components can null-match 172 foreach(seq; p.children) 173 { 174 auto nm = nullMatching(seq); 175 if (nm == NullMatch.indeterminate) 176 return NullMatch.indeterminate; 177 if (nm == NullMatch.no) 178 return NullMatch.no; 179 } 180 return NullMatch.yes; 181 case "Pegged.Prefix": 182 foreach(pref; p.children[0..$-1]) 183 if (pref.name == "Pegged.POS" || pref.name == "Pegged.NEG") 184 return NullMatch.yes; 185 return nullMatching(p.children[$-1]); 186 case "Pegged.Suffix": 187 foreach(pref; p.children[1..$]) 188 if (pref.name == "Pegged.ZEROORMORE" || pref.name == "Pegged.OPTION") 189 return NullMatch.yes; 190 return nullMatching(p.children[0]); 191 case "Pegged.Primary": 192 return nullMatching(p.children[0]); 193 case "Pegged.RhsName": 194 if (p.matches[0] in result) 195 return result[p.matches[0]].nullMatch; 196 else 197 return nullMatching(p.children[0]); 198 case "Pegged.Literal": 199 if (p.matches[0].length == 0) // Empty literal, '' or "" 200 return NullMatch.yes; 201 else 202 return NullMatch.no; 203 case "Pegged.CharClass": 204 return NullMatch.no; 205 case "Pegged.ANY": 206 return NullMatch.no; 207 case "eps": 208 return NullMatch.yes; 209 case "eoi": 210 return NullMatch.yes; 211 default: 212 return NullMatch.indeterminate; 213 } 214 } 215 216 InfiniteLoop infiniteLooping(ParseTree p) 217 { 218 switch (p.name) 219 { 220 case "Pegged.Expression": // choice expressions loop whenever one of their components can loop 221 foreach(seq; p.children) 222 { 223 auto nm = infiniteLooping(seq); 224 if (nm == InfiniteLoop.yes) 225 return InfiniteLoop.yes; 226 if (nm == InfiniteLoop.indeterminate) 227 return InfiniteLoop.indeterminate; 228 } 229 return InfiniteLoop.no; 230 case "Pegged.Sequence": // sequence expressions can loop when one of their components can loop 231 foreach(seq; p.children) 232 { 233 auto nm = infiniteLooping(seq); 234 if (nm == InfiniteLoop.yes) 235 return InfiniteLoop.yes; 236 if (nm == InfiniteLoop.indeterminate) 237 return InfiniteLoop.indeterminate; 238 } 239 return InfiniteLoop.no; 240 case "Pegged.Prefix": 241 return infiniteLooping(p.children[$-1]); 242 case "Pegged.Suffix": 243 foreach(pref; p.children[1..$]) 244 if (( pref.name == "Pegged.ZEROORMORE" || pref.name == "Pegged.ONEORMORE") 245 && p.matches[0] in result 246 && result[p.matches[0]].nullMatch == NullMatch.yes) 247 return InfiniteLoop.yes; 248 return infiniteLooping(p.children[0]); 249 case "Pegged.Primary": 250 return infiniteLooping(p.children[0]); 251 case "Pegged.RhsName": 252 if (p.matches[0] in result) 253 return result[p.matches[0]].infiniteLoop; 254 else 255 return infiniteLooping(p.children[0]); 256 case "Pegged.Literal": 257 return InfiniteLoop.no; 258 case "Pegged.CharClass": 259 return InfiniteLoop.no; 260 case "Pegged.ANY": 261 return InfiniteLoop.no; 262 case "eps": 263 return InfiniteLoop.no; 264 case "eoi": 265 return InfiniteLoop.no; 266 default: 267 return InfiniteLoop.indeterminate; 268 } 269 } 270 271 LeftRecursive leftRecursion(ParseTree p, string target) 272 { 273 switch (p.name) 274 { 275 case "Pegged.Expression": // Choices are left-recursive is any choice is left-recursive 276 foreach(seq; p.children) 277 { 278 auto lr = leftRecursion(seq, target); 279 if (lr != LeftRecursive.no) 280 return lr; 281 } 282 return LeftRecursive.no; 283 case "Pegged.Sequence": // Sequences are left-recursive when the leftmost member is left-recursive 284 // or behind null-matching members 285 foreach(i, seq; p.children) 286 { 287 auto lr = leftRecursion(seq, target); 288 if (lr == LeftRecursive.direct) 289 return (i == 0 ? LeftRecursive.direct : LeftRecursive.hidden); 290 else if (lr == LeftRecursive.hidden || lr == LeftRecursive.indirect) 291 return lr; 292 else if (nullMatching(seq) == NullMatch.yes) 293 continue; 294 else 295 return LeftRecursive.no; 296 } 297 return LeftRecursive.no; // found only null-matching rules! 298 case "Pegged.Prefix": 299 return leftRecursion(p.children[$-1], target); 300 case "Pegged.Suffix": 301 return leftRecursion(p.children[0], target); 302 case "Pegged.Primary": 303 return leftRecursion(p.children[0], target); 304 case "Pegged.RhsName": 305 if (p.matches[0] == target) // ?? Or generateCode(p) ? 306 return LeftRecursive.direct; 307 else if ((p.matches[0] in rules) && (leftRecursion(rules[p.matches[0]], target) != LeftRecursive.no)) 308 return LeftRecursive.hidden; 309 else 310 return LeftRecursive.no; 311 case "Pegged.Literal": 312 return LeftRecursive.no; 313 case "Pegged.CharClass": 314 return LeftRecursive.no; 315 case "Pegged.ANY": 316 return LeftRecursive.no; 317 case "eps": 318 return LeftRecursive.no; 319 case "eoi": 320 return LeftRecursive.no; 321 default: 322 return LeftRecursive.no; 323 } 324 } 325 326 ParseTree p = Pegged(grammar).children[0]; 327 foreach(definition; p.children) 328 if (definition.name == "Pegged.Definition") 329 { 330 rules[definition.matches[0]] = definition.children[2]; 331 result[definition.matches[0]] = RuleInfo(Recursive.no, LeftRecursive.no, 332 NullMatch.indeterminate,InfiniteLoop.indeterminate); 333 } 334 335 auto rec = recursions(callGraph(p)); 336 foreach(rule, recursionType; rec) 337 if (rule in result) // external rules are in rec, but not in result 338 result[rule].recursion = recursionType; 339 340 foreach(name, tree; rules) 341 { 342 if (result[name].recursion != Recursive.no) 343 { 344 result[name].leftRecursion = leftRecursion(tree, name); 345 } 346 } 347 348 bool changed = true; 349 350 while(changed) // while something new happened, the process is not over 351 { 352 changed = false; 353 foreach(name, tree; rules) 354 if (result[name].nullMatch == NullMatch.indeterminate) // not done yet 355 { 356 result [name].nullMatch = nullMatching(tree); // try to find if it's null-matching 357 if (result[name].nullMatch != NullMatch.indeterminate) 358 changed = true; 359 } 360 } 361 362 changed = true; 363 364 while(changed) // while something new happened, the process is not over 365 { 366 changed = false; 367 foreach(name, tree; rules) 368 if (result[name].infiniteLoop == InfiniteLoop.indeterminate) // not done yet 369 { 370 result [name].infiniteLoop = infiniteLooping(tree); // try to find if it's looping 371 if (result[name].infiniteLoop != InfiniteLoop.indeterminate) 372 changed = true; 373 } 374 } 375 376 return result; 377 } 378 379 /** 380 Act on rules parse tree as produced by pegged.parser. 381 Replace every occurence of child in parent by child's parse tree 382 */ 383 ParseTree replaceInto(ParseTree parent, ParseTree child) 384 { 385 if (parent.name == "Pegged.RhsName" && parent.matches[0] == child.matches[0]) 386 return ParseTree("Pegged.Named", true, child.matches[0..1], "",0,0, 387 [child.children[2], 388 ParseTree("Pegged.Identifier", true, child.matches[0..1])]); 389 else 390 foreach(ref branch; parent.children) 391 branch = replaceInto(branch, child); 392 return parent; 393 }