1 /** 2 This module contains function to inspect a Pegged grammar. 3 */ 4 module pegged.introspection; 5 6 import std.algorithm : equal; 7 import std.typecons; 8 9 import pegged.parser; 10 11 /** 12 The different kinds of recursion for a rule. 13 'direct' means the rule name appears in its own definition. 'indirect' means the rule calls itself through another rule (the call chain can be long). 14 */ 15 enum Recursive { no, direct, indirect } 16 17 /** 18 Left-recursion diagnostic for a rule. A rule is left-recursive when its own name appears at the beginning of its definition or behind possibly-null-matching rules (see below for null matches). 19 For example A <- A 'a' is left-recursive, whereas A <- 'a' A is not. *But* A <- 'a'? A is left-recursive, since if the input does not begin 20 with 'a', then the parsing will continue by invoking A again, at the same position. 21 22 'direct' means the rule invokes itself in the first position of its definition (A <- A ...). 'hidden' means the rule names appears after a possibly-null other rule (A <- 'a'? A ...). 'indirect' means the rule calls itself trough another rule. 23 */ 24 enum LeftRecursive { no, direct, hidden, indirect } 25 26 /** 27 NullMatch.yes means a rule can succeed while consuming no input. For example e? or e*, for all expressions e. 28 Nullmatch.no means a rule will always consume at least a token while succeeding. 29 Nullmatch.indeterminate means the algorithm could not converge. 30 */ 31 enum NullMatch { no, yes, indeterminate } 32 33 /** 34 InfiniteLoop.yes means a rule can loop indefinitely while consuming nothing. 35 InfiniteLoop.no means a rule cannot loop indefinitely. 36 InfiniteLoop.indeterminate means the algorithm could not converge. 37 */ 38 enum InfiniteLoop { no, yes, indeterminate } 39 40 /** 41 Struct holding the introspection info on a rule. 42 */ 43 struct RuleInfo 44 { 45 Recursive recursion; /// Is the rule recursive? 46 LeftRecursive leftRecursion; /// Is the rule left-recursive? 47 NullMatch nullMatch; /// Can the rule succeed while consuming nothing? 48 InfiniteLoop infiniteLoop; /// Can the rule loop indefinitely, while consuming nothing? 49 string[] leftRecursiveCycle; /// The path of rules traversed before indirect left-recursion recurses. 50 } 51 52 /** 53 Struct holding the introspection info on a grammar. 54 */ 55 struct GrammarInfo 56 { 57 RuleInfo[string] ruleInfo; 58 immutable(string[])[] leftRecursiveCycles; 59 } 60 61 pure void appendCycleIfUnique(ref immutable(string[])[] cycles, const string[] cycle) 62 { 63 bool exists() 64 { 65 import std.algorithm : countUntil; 66 foreach (c; cycles) 67 { 68 if (c.length != cycle.length) 69 continue; 70 auto pos = countUntil(c, cycle[0]); 71 if (pos < 0) 72 continue; 73 if (equal!equal(c[pos .. $] ~ c[0 .. pos], cycle)) 74 return true; 75 } 76 return false; 77 } 78 if (!exists()) 79 cycles ~= cycle.idup; 80 } 81 82 /** 83 Returns for all grammar rules: 84 85 - the recursion type (no recursion, direct or indirect recursion). 86 - the left-recursion type (no left-recursion, direct left-recursion, hidden, or indirect) 87 - the null-match for a grammar's rules: whether the rule can succeed while consuming nothing. 88 - the possibility of an infinite loop (if 'e' can null-match, then 'e*' can enter an infinite loop). 89 90 This kind of potential problem can be detected statically and should be transmitted to the grammar designer. 91 */ 92 pure GrammarInfo grammarInfo(ParseTree p) 93 { 94 if (p.name == "Pegged") 95 return grammarInfo(p.children[0]); 96 assert(p.name == "Pegged.Grammar"); 97 98 GrammarInfo result; 99 ParseTree[string] rules; 100 101 /** 102 Returns the call graph of a grammar: the list of rules directly called by each rule of the grammar. 103 The graph is represented as a bool[string][string] associative array, the string holding 104 the rules names. graph["ruleName"] contains all rules called by ruleName, as a set (a bool[string] AA). 105 106 graph.keys thus gives the grammar's rules names. 107 108 If a rule calls itself, its own name will appear in the called set. If a rule calls an external rule, it will 109 also appear in the call graph when the rule has a name: hence, calls to predefined rules like 'identifier' or 110 'digit' will appear, but not a call to '[0-9]+', considered here as an anonymous rule. 111 */ 112 bool[string][string] callGraph(ParseTree p) 113 { 114 bool[string] findIdentifiers(ParseTree p) 115 { 116 bool[string] idList; 117 if (p.name == "Pegged.Identifier") 118 idList[p.matches[0]] = true; 119 else 120 foreach(child; p.children) 121 foreach(name; findIdentifiers(child).keys) 122 idList[name] = true; 123 124 return idList; 125 } 126 127 bool[string][string] graph; 128 129 foreach(definition; p.children) 130 if (definition.name == "Pegged.Definition") 131 { 132 auto ids = findIdentifiers(definition.children[2]); 133 graph[definition.matches[0]] = ids; 134 foreach(id, _; ids) // getting possible external calls 135 if (id !in graph) 136 graph[id] = (bool[string]).init; 137 } 138 139 return graph; 140 } 141 142 /** 143 The transitive closure of a call graph. 144 It will propagate the calls to find all rules called by a given rule, 145 directly (already in the call graph) or indirectly (through another rule). 146 */ 147 bool[string][string] closure(bool[string][string] graph) 148 { 149 bool[string][string] path; 150 foreach(rule, children; graph) // deep-dupping, to avoid children aliasing 151 path[rule] = children.dup; 152 153 bool changed = true; 154 155 while(changed) 156 { 157 changed = false; 158 foreach(rule1; graph.keys) 159 foreach(rule2; graph.keys) 160 if (rule2 in path[rule1]) 161 foreach(rule3; graph.keys) 162 if (rule3 in path[rule2] && rule3 !in path[rule1]) 163 { 164 path[rule1][rule3] = true; 165 changed = true; 166 } 167 } 168 169 return path; 170 } 171 172 Recursive[string] recursions(bool[string][string] graph) 173 { 174 bool[string][string] path = closure(graph); 175 176 Recursive[string] result; 177 foreach(rule, children; path) 178 { 179 result[rule] = Recursive.no; 180 if (rule in children) 181 { 182 if (rule in graph[rule]) 183 result[rule] = Recursive.direct; 184 else 185 result[rule] = Recursive.indirect; 186 } 187 } 188 189 return result; 190 } 191 192 NullMatch nullMatching(ParseTree p) 193 { 194 switch (p.name) 195 { 196 case "Pegged.Expression": 197 return nullMatching(p.children[0]); 198 case "Pegged.FirstExpression", 199 "Pegged.LongestExpression": // choice expressions null-match whenever one of their components can null-match 200 foreach(seq; p.children) 201 if (nullMatching(seq) == NullMatch.yes) 202 return NullMatch.yes; 203 foreach(seq; p.children) 204 if (nullMatching(seq) == NullMatch.indeterminate) 205 return NullMatch.indeterminate; 206 return NullMatch.no; 207 case "Pegged.Sequence": // sequence expressions can null-match when all their components can null-match 208 foreach(pref; p.children) 209 if (nullMatching(pref) == NullMatch.no) 210 return NullMatch.no; 211 foreach(pref; p.children) 212 if (nullMatching(pref) == NullMatch.indeterminate) 213 return NullMatch.indeterminate; 214 return NullMatch.yes; 215 case "Pegged.Prefix": 216 foreach(pref; p.children[0..$-1]) 217 if (pref.name == "Pegged.POS" || pref.name == "Pegged.NEG") 218 return NullMatch.yes; 219 return nullMatching(p.children[$-1]); 220 case "Pegged.Suffix": 221 foreach(suff; p.children[1..$]) 222 if (suff.name == "Pegged.ZEROORMORE" || suff.name == "Pegged.OPTION") 223 return NullMatch.yes; 224 return nullMatching(p.children[0]); 225 case "Pegged.Primary": 226 return nullMatching(p.children[0]); 227 case "Pegged.RhsName": 228 if (p.matches[0] in result.ruleInfo) 229 if (result.ruleInfo[p.matches[0]].nullMatch != NullMatch.indeterminate) 230 return result.ruleInfo[p.matches[0]].nullMatch; 231 return nullMatching(p.children[0]); 232 case "Pegged.Identifier": 233 if (p.matches[0] == "eps" || 234 p.matches[0] == "eoi") 235 return NullMatch.yes; 236 return NullMatch.indeterminate; 237 case "Pegged.Literal": 238 if (p.matches[0].length == 0) // Empty literal, '' or "" 239 return NullMatch.yes; 240 else 241 return NullMatch.no; 242 case "Pegged.CharClass": 243 case "Pegged.ANY": 244 return NullMatch.no; 245 case "eps": 246 case "eoi": 247 return NullMatch.yes; 248 default: 249 return NullMatch.indeterminate; 250 } 251 } 252 253 InfiniteLoop infiniteLooping(ParseTree p) 254 { 255 switch (p.name) 256 { 257 case "Pegged.Expression": // choice expressions loop whenever one of their components can loop 258 case "Pegged.Sequence": // sequence expressions can loop when one of their components can loop 259 foreach(seq; p.children) 260 { 261 auto nm = infiniteLooping(seq); 262 if (nm == InfiniteLoop.yes) 263 return InfiniteLoop.yes; 264 if (nm == InfiniteLoop.indeterminate) 265 return InfiniteLoop.indeterminate; 266 } 267 return InfiniteLoop.no; 268 case "Pegged.Prefix": 269 return infiniteLooping(p.children[$-1]); 270 case "Pegged.Suffix": 271 foreach(pref; p.children[1..$]) 272 if (( pref.name == "Pegged.ZEROORMORE" || pref.name == "Pegged.ONEORMORE") 273 && p.matches[0] in result.ruleInfo 274 && result.ruleInfo[p.matches[0]].nullMatch == NullMatch.yes) 275 return InfiniteLoop.yes; 276 return infiniteLooping(p.children[0]); 277 case "Pegged.Primary": 278 return infiniteLooping(p.children[0]); 279 case "Pegged.RhsName": 280 if (p.matches[0] in result.ruleInfo) 281 return result.ruleInfo[p.matches[0]].infiniteLoop; 282 else 283 return infiniteLooping(p.children[0]); 284 case "Pegged.Literal": 285 case "Pegged.CharClass": 286 case "Pegged.ANY": 287 case "eps": 288 case "eoi": 289 return InfiniteLoop.no; 290 default: 291 return InfiniteLoop.indeterminate; 292 } 293 } 294 295 LeftRecursive leftRecursion(ParseTree p, ref string[] cycle) 296 { 297 import std.algorithm.searching: countUntil; 298 switch (p.name) 299 { 300 case "Pegged.Expression": 301 return leftRecursion(p.children[0], cycle); 302 case "Pegged.FirstExpression", 303 "Pegged.LongestExpression": // Choices are left-recursive if any choice is left-recursive 304 // Because memoized left-recursion handling needs to know about all left-recursive cycles, 305 // we consider all choices, not just one. 306 auto any_lr = LeftRecursive.no; 307 size_t current = cycle.length; 308 foreach(seq; p.children) 309 { 310 cycle = cycle[0..current]; 311 auto lr = leftRecursion(seq, cycle); 312 if (lr != LeftRecursive.no && any_lr == LeftRecursive.no) 313 any_lr = lr; 314 } 315 cycle = cycle[0..current]; 316 return any_lr; 317 case "Pegged.Sequence": // Sequences are left-recursive when the leftmost member is left-recursive 318 // or behind null-matching members 319 size_t current = cycle.length; 320 foreach(i, seq; p.children) 321 { 322 auto lr = leftRecursion(seq, cycle); 323 if (lr == LeftRecursive.direct) 324 return (i == 0 ? LeftRecursive.direct : LeftRecursive.hidden); 325 if (lr == LeftRecursive.hidden || lr == LeftRecursive.indirect) 326 return lr; 327 if (nullMatching(seq) == NullMatch.yes) 328 continue; 329 else 330 { 331 cycle = cycle[0..current]; 332 return LeftRecursive.no; 333 } 334 } 335 cycle = cycle[0..current]; 336 return LeftRecursive.no; // found only null-matching rules! 337 case "Pegged.Prefix": 338 return leftRecursion(p.children[$-1], cycle); 339 case "Pegged.Suffix": 340 case "Pegged.Primary": 341 return leftRecursion(p.children[0], cycle); 342 case "Pegged.RhsName": 343 auto dejavu = cycle.countUntil(p.matches[0]); 344 if (dejavu >= 0) 345 { 346 if (cycle.length == 1) 347 { 348 result.leftRecursiveCycles.appendCycleIfUnique(cycle); 349 return LeftRecursive.direct; 350 } 351 // Strictly spoken, the rules before dejavu are not actively left-recursive, but since they 352 // can call into a left-recursive cycle, they would still left-recurse if left-recursion 353 // wasn't handled. 354 result.leftRecursiveCycles.appendCycleIfUnique(cycle[dejavu..$]); 355 return LeftRecursive.indirect; 356 } 357 size_t current = cycle.length; 358 cycle ~= p.matches[0]; 359 if ((p.matches[0] in rules) && (leftRecursion(rules[p.matches[0]], cycle) != LeftRecursive.no)) 360 return LeftRecursive.indirect; 361 cycle = cycle[0..current]; 362 return LeftRecursive.no; 363 default: 364 return LeftRecursive.no; 365 } 366 } 367 368 // Initialize rules and result. 369 foreach(definition; p.children) 370 if (definition.name == "Pegged.Definition") 371 { 372 rules[definition.matches[0]] = definition.children[2]; 373 result.ruleInfo[definition.matches[0]] = RuleInfo(Recursive.no, LeftRecursive.no, 374 NullMatch.indeterminate, InfiniteLoop.indeterminate); 375 } 376 377 // Detect recursions. 378 foreach(rule, recursionType; recursions(callGraph(p))) 379 if (rule in result.ruleInfo) // external rules are in recursions, but not in result 380 result.ruleInfo[rule].recursion = recursionType; 381 382 // Detect left-recursions. 383 foreach(name, tree; rules) 384 if (result.ruleInfo[name].recursion != Recursive.no) 385 { 386 result.ruleInfo[name].leftRecursiveCycle ~= name; 387 result.ruleInfo[name].leftRecursion = leftRecursion(tree, result.ruleInfo[name].leftRecursiveCycle); 388 } 389 390 // Detect null matches. 391 bool changed = true; 392 while(changed) // while something new happened, the process is not over 393 { 394 changed = false; 395 foreach(name, tree; rules) 396 if (result.ruleInfo[name].nullMatch == NullMatch.indeterminate) // not done yet 397 { 398 result.ruleInfo[name].nullMatch = nullMatching(tree); // try to find if it's null-matching 399 if (result.ruleInfo[name].nullMatch != NullMatch.indeterminate) 400 changed = true; 401 } 402 } 403 404 // Detect infinite loops. 405 changed = true; 406 while(changed) // while something new happened, the process is not over 407 { 408 changed = false; 409 foreach(name, tree; rules) 410 if (result.ruleInfo[name].infiniteLoop == InfiniteLoop.indeterminate) // not done yet 411 { 412 result.ruleInfo[name].infiniteLoop = infiniteLooping(tree); // try to find if it's looping 413 if (result.ruleInfo[name].infiniteLoop != InfiniteLoop.indeterminate) 414 changed = true; 415 } 416 } 417 418 return result; 419 } 420 421 /** 422 Returns for all grammar rules: 423 424 - the recursion type (no recursion, direct or indirect recursion). 425 - the left-recursion type (no left-recursion, direct left-recursion, hidden, or indirect) 426 - the null-match for a grammar's rules: whether the rule can succeed while consuming nothing. 427 - the possibility of an infinite loop (if 'e' can null-match, then 'e*' can enter an infinite loop). 428 429 This kind of potential problem can be detected statically and should be transmitted to the grammar designer. 430 */ 431 pure RuleInfo[string] ruleInfo(ParseTree p) 432 { 433 return grammarInfo(p).ruleInfo; 434 } 435 436 /** ditto */ 437 RuleInfo[string] ruleInfo(string grammar) 438 { 439 return ruleInfo(Pegged(grammar).children[0]); 440 } 441 442 unittest 443 { 444 auto info = ruleInfo(` 445 Test: 446 A <- A 'a' 447 `); 448 assert(info["A"].leftRecursion == LeftRecursive.direct); 449 450 info = ruleInfo(` 451 Test: 452 A <- B? A 'a' 453 B <- 'b' 454 `); 455 assert(info["A"].leftRecursion == LeftRecursive.hidden); 456 457 info = ruleInfo(` 458 Test: 459 A <- B 'a' 460 B <- A 461 `); 462 assert(info["A"].leftRecursion == LeftRecursive.indirect); 463 } 464 465 // Test against infinite recursion in detection of indirect left-recursion. 466 unittest 467 { 468 auto info = ruleInfo(` 469 Test: 470 A <- B / C 'a' 471 B <- A 472 C <- A 473 `); 474 assert(info["A"].leftRecursion == LeftRecursive.indirect); 475 } 476 477 // Test against compile-time infinite recursion. 478 unittest // Mutual left-recursion 479 { 480 enum ct = ruleInfo(` 481 Left: 482 A <- L 483 L <- P 484 P <- P / L 485 `); 486 static assert(ct["A"].leftRecursion == LeftRecursive.no); 487 static assert(ct["L"].leftRecursion == LeftRecursive.indirect); 488 static assert(ct["P"].leftRecursion == LeftRecursive.direct); 489 490 auto rt = ruleInfo(` 491 Left: 492 A <- L 493 L <- P 494 P <- P / L 495 `); 496 assert(rt["A"].leftRecursion == LeftRecursive.no); 497 assert(rt["L"].leftRecursion == LeftRecursive.indirect); 498 assert(rt["P"].leftRecursion == LeftRecursive.direct); 499 } 500 501 unittest // Intersecting cycles of left-recursion 502 { 503 enum ct = ruleInfo(` 504 Left: 505 C <- A 506 A <- B* C 507 B <- A 508 `); 509 static assert(ct["C"].leftRecursion == LeftRecursive.indirect); 510 static assert(ct["A"].leftRecursion == LeftRecursive.indirect); 511 static assert(ct["B"].leftRecursion == LeftRecursive.indirect); 512 auto rt = ruleInfo(` 513 Left: 514 C <- A 515 A <- B* C 516 B <- A 517 `); 518 assert(rt["C"].leftRecursion == LeftRecursive.indirect); 519 assert(rt["A"].leftRecursion == LeftRecursive.indirect); 520 assert(rt["B"].leftRecursion == LeftRecursive.indirect); 521 } 522 523 unittest // Null-matching 524 { 525 enum ct = ruleInfo(` 526 NM: 527 NMM <- NML eoi 528 NML <- 'x'? 529 `); 530 static assert(ct["NML"].nullMatch == NullMatch.yes); 531 static assert(ct["NMM"].nullMatch == NullMatch.yes); 532 auto rt = ruleInfo(` 533 NM: 534 NMM <- NML eoi 535 NML <- 'x'? 536 `); 537 assert(rt["NML"].nullMatch == NullMatch.yes); 538 assert(rt["NMM"].nullMatch == NullMatch.yes); 539 } 540 541 unittest // Not null-matching 542 { 543 enum ct = ruleInfo(` 544 Left: 545 M <- L eoi 546 L <- P '.x' / 'x' 547 P <- P '(n)' / L 548 `); 549 static assert(ct["M"].nullMatch == NullMatch.no); 550 static assert(ct["L"].nullMatch == NullMatch.no); 551 static assert(ct["P"].nullMatch == NullMatch.no); 552 auto rt = ruleInfo(` 553 Left: 554 M <- L eoi 555 L <- P '.x' / 'x' 556 P <- P '(n)' / L 557 `); 558 assert(rt["M"].nullMatch == NullMatch.no); 559 assert(rt["L"].nullMatch == NullMatch.no); 560 assert(rt["P"].nullMatch == NullMatch.no); 561 } 562 563 unittest // Left-recursive null-matching 564 { 565 enum ct = ruleInfo(` 566 Left: 567 M <- L eoi 568 L <- P? '.x'? / 'x' 569 P <- P '(n)' / L 570 `); 571 static assert(ct["M"].nullMatch == NullMatch.yes); 572 static assert(ct["L"].nullMatch == NullMatch.yes); 573 static assert(ct["P"].nullMatch == NullMatch.yes); 574 auto rt = ruleInfo(` 575 Left: 576 M <- L eoi 577 L <- P? '.x'? / 'x' 578 P <- P '(n)' / L 579 `); 580 assert(rt["M"].nullMatch == NullMatch.yes); 581 assert(rt["L"].nullMatch == NullMatch.yes); 582 assert(rt["P"].nullMatch == NullMatch.yes); 583 } 584 585 586 /** 587 Act on rules parse tree as produced by pegged.parser. 588 Replace every occurence of child in parent by child's parse tree 589 */ 590 ParseTree replaceInto(ParseTree parent, ParseTree child) 591 { 592 if (parent.name == "Pegged.RhsName" && parent.matches[0] == child.matches[0]) 593 return ParseTree("Pegged.Named", true, child.matches[0..1], "",0,0, 594 [child.children[2], 595 ParseTree("Pegged.Identifier", true, child.matches[0..1])]); 596 else 597 foreach(ref branch; parent.children) 598 branch = replaceInto(branch, child); 599 return parent; 600 }