1 /** 2 This module contains function to inspect a Pegged grammar. 3 */ 4 module pegged.introspection; 5 6 import std.algorithm : equal; 7 import std.typecons; 8 9 import pegged.parser; 10 11 @safe: 12 13 /** 14 The different kinds of recursion for a rule. 15 'direct' means the rule name appears in its own definition. 'indirect' means the rule calls itself through another rule (the call chain can be long). 16 */ 17 enum Recursive { no, direct, indirect } 18 19 /** 20 Left-recursion diagnostic for a rule. A rule is left-recursive when its own name appears at the beginning of its definition or behind possibly-null-matching rules (see below for null matches). 21 For example A <- A 'a' is left-recursive, whereas A <- 'a' A is not. *But* A <- 'a'? A is left-recursive, since if the input does not begin 22 with 'a', then the parsing will continue by invoking A again, at the same position. 23 24 'direct' means the rule invokes itself in the first position of its definition (A <- A ...). 'hidden' means the rule names appears after a possibly-null other rule (A <- 'a'? A ...). 'indirect' means the rule calls itself trough another rule. 25 */ 26 enum LeftRecursive { no, direct, hidden, indirect } 27 28 /** 29 NullMatch.yes means a rule can succeed while consuming no input. For example e? or e*, for all expressions e. 30 Nullmatch.no means a rule will always consume at least a token while succeeding. 31 Nullmatch.indeterminate means the algorithm could not converge. 32 */ 33 enum NullMatch { no, yes, indeterminate } 34 35 /** 36 InfiniteLoop.yes means a rule can loop indefinitely while consuming nothing. 37 InfiniteLoop.no means a rule cannot loop indefinitely. 38 InfiniteLoop.indeterminate means the algorithm could not converge. 39 */ 40 enum InfiniteLoop { no, yes, indeterminate } 41 42 /** 43 Struct holding the introspection info on a rule. 44 */ 45 struct RuleInfo 46 { 47 Recursive recursion; /// Is the rule recursive? 48 LeftRecursive leftRecursion; /// Is the rule left-recursive? 49 NullMatch nullMatch; /// Can the rule succeed while consuming nothing? 50 InfiniteLoop infiniteLoop; /// Can the rule loop indefinitely, while consuming nothing? 51 string[] leftRecursiveCycle; /// The path of rules traversed before indirect left-recursion recurses. 52 } 53 54 /** 55 Struct holding the introspection info on a grammar. 56 */ 57 struct GrammarInfo 58 { 59 RuleInfo[string] ruleInfo; 60 immutable(string[])[] leftRecursiveCycles; 61 } 62 63 pure void appendCycleIfUnique(ref immutable(string[])[] cycles, const string[] cycle) 64 { 65 bool exists() 66 { 67 import std.algorithm : countUntil; 68 foreach (c; cycles) 69 { 70 if (c.length != cycle.length) 71 continue; 72 auto pos = countUntil(c, cycle[0]); 73 if (pos < 0) 74 continue; 75 if (equal!equal(c[pos .. $] ~ c[0 .. pos], cycle)) 76 return true; 77 } 78 return false; 79 } 80 if (!exists()) 81 cycles ~= cycle.idup; 82 } 83 84 /** 85 Returns for all grammar rules: 86 87 - the recursion type (no recursion, direct or indirect recursion). 88 - the left-recursion type (no left-recursion, direct left-recursion, hidden, or indirect) 89 - the null-match for a grammar's rules: whether the rule can succeed while consuming nothing. 90 - the possibility of an infinite loop (if 'e' can null-match, then 'e*' can enter an infinite loop). 91 92 This kind of potential problem can be detected statically and should be transmitted to the grammar designer. 93 */ 94 pure GrammarInfo grammarInfo(ParseTree p) 95 { 96 if (p.name == "Pegged") 97 return grammarInfo(p.children[0]); 98 assert(p.name == "Pegged.Grammar"); 99 100 GrammarInfo result; 101 ParseTree[string] rules; 102 103 /** 104 Returns the call graph of a grammar: the list of rules directly called by each rule of the grammar. 105 The graph is represented as a bool[string][string] associative array, the string holding 106 the rules names. graph["ruleName"] contains all rules called by ruleName, as a set (a bool[string] AA). 107 108 graph.keys thus gives the grammar's rules names. 109 110 If a rule calls itself, its own name will appear in the called set. If a rule calls an external rule, it will 111 also appear in the call graph when the rule has a name: hence, calls to predefined rules like 'identifier' or 112 'digit' will appear, but not a call to '[0-9]+', considered here as an anonymous rule. 113 */ 114 bool[string][string] callGraph(ParseTree p) @safe 115 { 116 bool[string] findIdentifiers(ParseTree p) @safe 117 { 118 bool[string] idList; 119 if (p.name == "Pegged.Identifier") 120 idList[p.matches[0]] = true; 121 else 122 foreach(child; p.children) 123 { 124 auto ids = findIdentifiers(child); 125 static if (hasSystemAAKeys) 126 auto keys = () @trusted { return ids.keys; } (); 127 else 128 auto keys = ids.keys; 129 foreach(name; keys) 130 idList[name] = true; 131 } 132 133 return idList; 134 } 135 136 bool[string][string] graph; 137 138 foreach(definition; p.children) 139 if (definition.name == "Pegged.Definition") 140 { 141 auto ids = findIdentifiers(definition.children[2]); 142 graph[definition.matches[0]] = ids; 143 foreach(id, _; ids) // getting possible external calls 144 if (id !in graph) 145 graph[id] = (bool[string]).init; 146 } 147 148 return graph; 149 } 150 151 /** 152 The transitive closure of a call graph. 153 It will propagate the calls to find all rules called by a given rule, 154 directly (already in the call graph) or indirectly (through another rule). 155 */ 156 bool[string][string] closure(bool[string][string] graph) @safe 157 { 158 bool[string][string] path; 159 foreach(rule, children; graph) // deep-dupping, to avoid children aliasing 160 path[rule] = children.dup; 161 162 bool changed = true; 163 164 while(changed) 165 { 166 changed = false; 167 static if (hasSystemAAKeys) 168 auto keys = () @trusted { return graph.keys; } (); 169 else 170 auto keys = graph.keys; 171 foreach(rule1; keys) 172 foreach(rule2; keys) 173 if (rule2 in path[rule1]) 174 foreach(rule3; keys) 175 if (rule3 in path[rule2] && rule3 !in path[rule1]) 176 { 177 path[rule1][rule3] = true; 178 changed = true; 179 } 180 } 181 182 return path; 183 } 184 185 Recursive[string] recursions(bool[string][string] graph) @safe 186 { 187 bool[string][string] path = closure(graph); 188 189 Recursive[string] result; 190 foreach(rule, children; path) 191 { 192 result[rule] = Recursive.no; 193 if (rule in children) 194 { 195 if (rule in graph[rule]) 196 result[rule] = Recursive.direct; 197 else 198 result[rule] = Recursive.indirect; 199 } 200 } 201 202 return result; 203 } 204 205 NullMatch nullMatching(ParseTree p) @safe 206 { 207 switch (p.name) 208 { 209 case "Pegged.Expression": 210 return nullMatching(p.children[0]); 211 case "Pegged.FirstExpression", 212 "Pegged.LongestExpression": // choice expressions null-match whenever one of their components can null-match 213 foreach(seq; p.children) 214 if (nullMatching(seq) == NullMatch.yes) 215 return NullMatch.yes; 216 foreach(seq; p.children) 217 if (nullMatching(seq) == NullMatch.indeterminate) 218 return NullMatch.indeterminate; 219 return NullMatch.no; 220 case "Pegged.Sequence": // sequence expressions can null-match when all their components can null-match 221 foreach(pref; p.children) 222 if (nullMatching(pref) == NullMatch.no) 223 return NullMatch.no; 224 foreach(pref; p.children) 225 if (nullMatching(pref) == NullMatch.indeterminate) 226 return NullMatch.indeterminate; 227 return NullMatch.yes; 228 case "Pegged.Prefix": 229 foreach(pref; p.children[0..$-1]) 230 if (pref.name == "Pegged.POS" || pref.name == "Pegged.NEG") 231 return NullMatch.yes; 232 return nullMatching(p.children[$-1]); 233 case "Pegged.Suffix": 234 foreach(suff; p.children[1..$]) 235 if (suff.name == "Pegged.ZEROORMORE" || suff.name == "Pegged.OPTION") 236 return NullMatch.yes; 237 return nullMatching(p.children[0]); 238 case "Pegged.Primary": 239 return nullMatching(p.children[0]); 240 case "Pegged.RhsName": 241 if (p.matches[0] in result.ruleInfo) 242 if (result.ruleInfo[p.matches[0]].nullMatch != NullMatch.indeterminate) 243 return result.ruleInfo[p.matches[0]].nullMatch; 244 return nullMatching(p.children[0]); 245 case "Pegged.Identifier": 246 if (p.matches[0] == "eps" || 247 p.matches[0] == "eoi") 248 return NullMatch.yes; 249 return NullMatch.indeterminate; 250 case "Pegged.Literal": 251 if (p.matches[0].length == 0) // Empty literal, '' or "" 252 return NullMatch.yes; 253 else 254 return NullMatch.no; 255 case "Pegged.CharClass": 256 case "Pegged.ANY": 257 return NullMatch.no; 258 case "eps": 259 case "eoi": 260 return NullMatch.yes; 261 default: 262 return NullMatch.indeterminate; 263 } 264 } 265 266 InfiniteLoop infiniteLooping(ParseTree p) @safe 267 { 268 switch (p.name) 269 { 270 case "Pegged.Expression": // choice expressions loop whenever one of their components can loop 271 case "Pegged.Sequence": // sequence expressions can loop when one of their components can loop 272 foreach(seq; p.children) 273 { 274 auto nm = infiniteLooping(seq); 275 if (nm == InfiniteLoop.yes) 276 return InfiniteLoop.yes; 277 if (nm == InfiniteLoop.indeterminate) 278 return InfiniteLoop.indeterminate; 279 } 280 return InfiniteLoop.no; 281 case "Pegged.Prefix": 282 return infiniteLooping(p.children[$-1]); 283 case "Pegged.Suffix": 284 foreach(pref; p.children[1..$]) 285 if (( pref.name == "Pegged.ZEROORMORE" || pref.name == "Pegged.ONEORMORE") 286 && p.matches[0] in result.ruleInfo 287 && result.ruleInfo[p.matches[0]].nullMatch == NullMatch.yes) 288 return InfiniteLoop.yes; 289 return infiniteLooping(p.children[0]); 290 case "Pegged.Primary": 291 return infiniteLooping(p.children[0]); 292 case "Pegged.RhsName": 293 if (p.matches[0] in result.ruleInfo) 294 return result.ruleInfo[p.matches[0]].infiniteLoop; 295 else 296 return infiniteLooping(p.children[0]); 297 case "Pegged.Literal": 298 case "Pegged.CharClass": 299 case "Pegged.ANY": 300 case "eps": 301 case "eoi": 302 return InfiniteLoop.no; 303 default: 304 return InfiniteLoop.indeterminate; 305 } 306 } 307 308 LeftRecursive leftRecursion(ParseTree p, ref string[] cycle) @safe 309 { 310 import std.algorithm.searching: countUntil; 311 switch (p.name) 312 { 313 case "Pegged.Expression": 314 return leftRecursion(p.children[0], cycle); 315 case "Pegged.FirstExpression", 316 "Pegged.LongestExpression": // Choices are left-recursive if any choice is left-recursive 317 // Because memoized left-recursion handling needs to know about all left-recursive cycles, 318 // we consider all choices, not just one. 319 auto any_lr = LeftRecursive.no; 320 size_t current = cycle.length; 321 foreach(seq; p.children) 322 { 323 cycle = cycle[0..current]; 324 auto lr = leftRecursion(seq, cycle); 325 if (lr != LeftRecursive.no && any_lr == LeftRecursive.no) 326 any_lr = lr; 327 } 328 cycle = cycle[0..current]; 329 return any_lr; 330 case "Pegged.Sequence": // Sequences are left-recursive when the leftmost member is left-recursive 331 // or behind null-matching members 332 size_t current = cycle.length; 333 foreach(i, seq; p.children) 334 { 335 auto lr = leftRecursion(seq, cycle); 336 if (lr == LeftRecursive.direct) 337 return (i == 0 ? LeftRecursive.direct : LeftRecursive.hidden); 338 if (lr == LeftRecursive.hidden || lr == LeftRecursive.indirect) 339 return lr; 340 if (nullMatching(seq) == NullMatch.yes) 341 continue; 342 else 343 { 344 cycle = cycle[0..current]; 345 return LeftRecursive.no; 346 } 347 } 348 cycle = cycle[0..current]; 349 return LeftRecursive.no; // found only null-matching rules! 350 case "Pegged.Prefix": 351 return leftRecursion(p.children[$-1], cycle); 352 case "Pegged.Suffix": 353 case "Pegged.Primary": 354 return leftRecursion(p.children[0], cycle); 355 case "Pegged.RhsName": 356 auto dejavu = cycle.countUntil(p.matches[0]); 357 if (dejavu >= 0) 358 { 359 if (cycle.length == 1) 360 { 361 result.leftRecursiveCycles.appendCycleIfUnique(cycle); 362 return LeftRecursive.direct; 363 } 364 // Strictly spoken, the rules before dejavu are not actively left-recursive, but since they 365 // can call into a left-recursive cycle, they would still left-recurse if left-recursion 366 // wasn't handled. 367 result.leftRecursiveCycles.appendCycleIfUnique(cycle[dejavu..$]); 368 return LeftRecursive.indirect; 369 } 370 size_t current = cycle.length; 371 cycle ~= p.matches[0]; 372 if ((p.matches[0] in rules) && (leftRecursion(rules[p.matches[0]], cycle) != LeftRecursive.no)) 373 return LeftRecursive.indirect; 374 cycle = cycle[0..current]; 375 return LeftRecursive.no; 376 default: 377 return LeftRecursive.no; 378 } 379 } 380 381 // Initialize rules and result. 382 foreach(definition; p.children) 383 if (definition.name == "Pegged.Definition") 384 { 385 rules[definition.matches[0]] = definition.children[2]; 386 result.ruleInfo[definition.matches[0]] = RuleInfo(Recursive.no, LeftRecursive.no, 387 NullMatch.indeterminate, InfiniteLoop.indeterminate); 388 } 389 390 // Detect recursions. 391 foreach(rule, recursionType; recursions(callGraph(p))) 392 if (rule in result.ruleInfo) // external rules are in recursions, but not in result 393 result.ruleInfo[rule].recursion = recursionType; 394 395 // Detect left-recursions. 396 foreach(name, tree; rules) 397 if (result.ruleInfo[name].recursion != Recursive.no) 398 { 399 result.ruleInfo[name].leftRecursiveCycle ~= name; 400 result.ruleInfo[name].leftRecursion = leftRecursion(tree, result.ruleInfo[name].leftRecursiveCycle); 401 } 402 403 // Detect null matches. 404 bool changed = true; 405 while(changed) // while something new happened, the process is not over 406 { 407 changed = false; 408 foreach(name, tree; rules) 409 if (result.ruleInfo[name].nullMatch == NullMatch.indeterminate) // not done yet 410 { 411 result.ruleInfo[name].nullMatch = nullMatching(tree); // try to find if it's null-matching 412 if (result.ruleInfo[name].nullMatch != NullMatch.indeterminate) 413 changed = true; 414 } 415 } 416 417 // Detect infinite loops. 418 changed = true; 419 while(changed) // while something new happened, the process is not over 420 { 421 changed = false; 422 foreach(name, tree; rules) 423 if (result.ruleInfo[name].infiniteLoop == InfiniteLoop.indeterminate) // not done yet 424 { 425 result.ruleInfo[name].infiniteLoop = infiniteLooping(tree); // try to find if it's looping 426 if (result.ruleInfo[name].infiniteLoop != InfiniteLoop.indeterminate) 427 changed = true; 428 } 429 } 430 431 return result; 432 } 433 434 /** 435 Returns for all grammar rules: 436 437 - the recursion type (no recursion, direct or indirect recursion). 438 - the left-recursion type (no left-recursion, direct left-recursion, hidden, or indirect) 439 - the null-match for a grammar's rules: whether the rule can succeed while consuming nothing. 440 - the possibility of an infinite loop (if 'e' can null-match, then 'e*' can enter an infinite loop). 441 442 This kind of potential problem can be detected statically and should be transmitted to the grammar designer. 443 */ 444 pure RuleInfo[string] ruleInfo(ParseTree p) 445 { 446 return grammarInfo(p).ruleInfo; 447 } 448 449 /** ditto */ 450 RuleInfo[string] ruleInfo(string grammar) 451 { 452 return ruleInfo(Pegged(grammar).children[0]); 453 } 454 455 unittest 456 { 457 auto info = ruleInfo(` 458 Test: 459 A <- A 'a' 460 `); 461 assert(info["A"].leftRecursion == LeftRecursive.direct); 462 463 info = ruleInfo(` 464 Test: 465 A <- B? A 'a' 466 B <- 'b' 467 `); 468 assert(info["A"].leftRecursion == LeftRecursive.hidden); 469 470 info = ruleInfo(` 471 Test: 472 A <- B 'a' 473 B <- A 474 `); 475 assert(info["A"].leftRecursion == LeftRecursive.indirect); 476 } 477 478 // Test against infinite recursion in detection of indirect left-recursion. 479 unittest 480 { 481 auto info = ruleInfo(` 482 Test: 483 A <- B / C 'a' 484 B <- A 485 C <- A 486 `); 487 assert(info["A"].leftRecursion == LeftRecursive.indirect); 488 } 489 490 // Test against compile-time infinite recursion. 491 unittest // Mutual left-recursion 492 { 493 enum ct = ruleInfo(` 494 Left: 495 A <- L 496 L <- P 497 P <- P / L 498 `); 499 static assert(ct["A"].leftRecursion == LeftRecursive.no); 500 static assert(ct["L"].leftRecursion == LeftRecursive.indirect); 501 static assert(ct["P"].leftRecursion == LeftRecursive.direct); 502 503 auto rt = ruleInfo(` 504 Left: 505 A <- L 506 L <- P 507 P <- P / L 508 `); 509 assert(rt["A"].leftRecursion == LeftRecursive.no); 510 assert(rt["L"].leftRecursion == LeftRecursive.indirect); 511 assert(rt["P"].leftRecursion == LeftRecursive.direct); 512 } 513 514 unittest // Intersecting cycles of left-recursion 515 { 516 enum ct = ruleInfo(` 517 Left: 518 C <- A 519 A <- B* C 520 B <- A 521 `); 522 static assert(ct["C"].leftRecursion == LeftRecursive.indirect); 523 static assert(ct["A"].leftRecursion == LeftRecursive.indirect); 524 static assert(ct["B"].leftRecursion == LeftRecursive.indirect); 525 auto rt = ruleInfo(` 526 Left: 527 C <- A 528 A <- B* C 529 B <- A 530 `); 531 assert(rt["C"].leftRecursion == LeftRecursive.indirect); 532 assert(rt["A"].leftRecursion == LeftRecursive.indirect); 533 assert(rt["B"].leftRecursion == LeftRecursive.indirect); 534 } 535 536 unittest // Null-matching 537 { 538 enum ct = ruleInfo(` 539 NM: 540 NMM <- NML eoi 541 NML <- 'x'? 542 `); 543 static assert(ct["NML"].nullMatch == NullMatch.yes); 544 static assert(ct["NMM"].nullMatch == NullMatch.yes); 545 auto rt = ruleInfo(` 546 NM: 547 NMM <- NML eoi 548 NML <- 'x'? 549 `); 550 assert(rt["NML"].nullMatch == NullMatch.yes); 551 assert(rt["NMM"].nullMatch == NullMatch.yes); 552 } 553 554 unittest // Not null-matching 555 { 556 enum ct = ruleInfo(` 557 Left: 558 M <- L eoi 559 L <- P '.x' / 'x' 560 P <- P '(n)' / L 561 `); 562 static assert(ct["M"].nullMatch == NullMatch.no); 563 static assert(ct["L"].nullMatch == NullMatch.no); 564 static assert(ct["P"].nullMatch == NullMatch.no); 565 auto rt = ruleInfo(` 566 Left: 567 M <- L eoi 568 L <- P '.x' / 'x' 569 P <- P '(n)' / L 570 `); 571 assert(rt["M"].nullMatch == NullMatch.no); 572 assert(rt["L"].nullMatch == NullMatch.no); 573 assert(rt["P"].nullMatch == NullMatch.no); 574 } 575 576 unittest // Left-recursive null-matching 577 { 578 enum ct = ruleInfo(` 579 Left: 580 M <- L eoi 581 L <- P? '.x'? / 'x' 582 P <- P '(n)' / L 583 `); 584 static assert(ct["M"].nullMatch == NullMatch.yes); 585 static assert(ct["L"].nullMatch == NullMatch.yes); 586 static assert(ct["P"].nullMatch == NullMatch.yes); 587 auto rt = ruleInfo(` 588 Left: 589 M <- L eoi 590 L <- P? '.x'? / 'x' 591 P <- P '(n)' / L 592 `); 593 assert(rt["M"].nullMatch == NullMatch.yes); 594 assert(rt["L"].nullMatch == NullMatch.yes); 595 assert(rt["P"].nullMatch == NullMatch.yes); 596 } 597 598 599 /** 600 Act on rules parse tree as produced by pegged.parser. 601 Replace every occurence of child in parent by child's parse tree 602 */ 603 ParseTree replaceInto(ParseTree parent, ParseTree child) 604 { 605 if (parent.name == "Pegged.RhsName" && parent.matches[0] == child.matches[0]) 606 return ParseTree("Pegged.Named", true, child.matches[0..1], "",0,0, 607 [child.children[2], 608 ParseTree("Pegged.Identifier", true, child.matches[0..1])]); 609 else 610 foreach(ref branch; parent.children) 611 branch = replaceInto(branch, child); 612 return parent; 613 } 614 615 /* .keys is @safe after compiler version >= 2.098.0. 616 * 617 * See: 618 * https://dlang.org/changelog/2.098.0.html#bugfix-list and 619 * https://issues.dlang.org/show_bug.cgi?id=14439 */ 620 enum bool hasSystemAAKeys = __VERSION__ < 2098;