1 /** 2 Parser generation module for Pegged. 3 The Pegged parser itself is in pegged.parser, generated from pegged.examples.peggedgrammar. 4 5 The documentation is in the /docs directory. 6 */ 7 module pegged.grammar; 8 9 import std.algorithm: startsWith; 10 import std.conv: to; 11 import std.functional: toDelegate; 12 import std.stdio; 13 14 public import pegged.peg; 15 import pegged.parser; 16 17 18 19 /** 20 Option enum to get internal memoization (parse results storing). 21 */ 22 enum Memoization { no, yes } 23 24 /** 25 This function takes a (future) module name, a (future) file name and a grammar as a string or a file. 26 It writes the corresponding parser inside a module with the given name. 27 */ 28 void asModule(Memoization withMemo = Memoization.yes)(string moduleName, string fileName, string grammarString, string optHeader = "") 29 { 30 import std.stdio; 31 auto f = File(fileName ~ ".d","w"); 32 33 f.write("/++\nThis module was automatically generated from the following grammar:\n\n"); 34 f.write(grammarString); 35 f.write("\n\n+/\n"); 36 37 f.write("module " ~ moduleName ~ ";\n\n"); 38 39 if (optHeader.length > 0) 40 f.write(optHeader ~ "\n\n"); 41 42 f.write("public import pegged.peg;\n"); 43 f.write("import std.algorithm: startsWith;\n"); 44 f.write("import std.functional: toDelegate;\n\n"); 45 f.write(grammar!(withMemo)(grammarString)); 46 } 47 48 /// ditto 49 void asModule(Memoization withMemo = Memoization.yes)(string moduleName, File file, string optHeader = "") 50 { 51 string grammarDefinition; 52 foreach(line; file.byLine) 53 { 54 grammarDefinition ~= line ~ '\n'; 55 } 56 asModule!(withMemo)(moduleName, grammarDefinition, optHeader); 57 } 58 59 // Helper to insert 'Spacing' before and after Primaries 60 ParseTree spaceArrow(ParseTree input) 61 { 62 ParseTree wrapInSpaces(ParseTree p) 63 { 64 ParseTree spacer = 65 ParseTree("Pegged.Prefix", true, null, null, 0,0, [ 66 ParseTree("Pegged.Suffix", true, null, null, 0, 0, [ 67 ParseTree("Pegged.Primary", true, null, null, 0, 0, [ 68 ParseTree("Pegged.RhsName", true, null, null, 0,0, [ 69 ParseTree("Pegged.Identifier", true, ["Spacing"]) 70 ]) 71 ]) 72 ]) 73 ]); 74 ParseTree result = ParseTree("Pegged.WrapAround", true, p.matches, p.input, p.begin, p.end, p.children); 75 result.children = spacer ~ result.children ~ spacer; 76 return result; 77 } 78 return modify!( p => p.name == "Pegged.Primary", 79 wrapInSpaces)(input); 80 } 81 82 83 /** 84 Generate a parser from a PEG definition. 85 The parser is a string containing D code, to be mixed in or written in a file. 86 87 ---- 88 enum string def = " 89 Gram: 90 A <- 'a' B* 91 B <- 'b' / 'c' 92 "; 93 94 mixin(grammar(def)); 95 96 ParseTree p = Gram("abcbccbcd"); 97 ---- 98 */ 99 string grammar(Memoization withMemo = Memoization.yes)(string definition) 100 { 101 ParseTree defAsParseTree = Pegged(definition); 102 103 if (!defAsParseTree.successful) 104 { 105 // To work around a strange bug with ParseTree printing at compile time 106 string result = "static assert(false, `" ~ defAsParseTree.toString("") ~ "`);"; 107 return result; 108 } 109 return grammar!(withMemo)(defAsParseTree); 110 } 111 /// ditto 112 string grammar(Memoization withMemo = Memoization.yes)(ParseTree defAsParseTree) 113 { 114 string[] composedGrammars; 115 116 // Grammar analysis in support of left-recursion. 117 import pegged.introspection; 118 import std.algorithm : canFind; 119 GrammarInfo grammarInfo = grammarInfo(defAsParseTree.children[0]); 120 string[] stoppers; // Keys are the rules that stop left-recursion and the 121 // values are arrays of strings containing the corresponding 122 // rules for which memoization needs to be blocked. 123 124 /* 125 I once considered that if two left-recursive cycles intersect, unbounded left-recursion 126 would be prevented in both cycles if only the intersection rule would be a stopper. Although 127 true, it causes other problems, as documented in the "Mutual left-recursion" unittest below. 128 Therefore, we simply make the first rule in every left-recursive cycle a stopper. 129 Also, one might think that it suffices to prevent ordinary memoization in just the rules 130 that are part of the cycle. However, some larger input files for pegged/examples/extended_pascal 131 would fail to parse. So memoization for all left-recursive rules is disabled during 132 left-recursion. 133 */ 134 string[] allLeftRecursiveRules; 135 foreach (cycle; grammarInfo.leftRecursiveCycles) 136 foreach (rule; cycle) 137 if (!canFind(allLeftRecursiveRules, rule)) 138 allLeftRecursiveRules ~= rule; 139 foreach (cycle; grammarInfo.leftRecursiveCycles) 140 if (!stoppers.canFind(cycle[0])) 141 stoppers ~= cycle[0]; 142 143 // Prints comment showing detected left-recursive cycles. 144 string printLeftRecursiveCycles() 145 { 146 string result; 147 foreach (cycle; grammarInfo.leftRecursiveCycles) 148 { 149 result ~= cycle[0]; 150 foreach (rule; cycle[1..$]) 151 result ~= " <- " ~ rule; 152 result ~= "\n"; 153 } 154 return result.length > 0 ? "/** Left-recursive cycles:\n" ~ result ~ "*/\n\n" : ""; 155 } 156 157 // Prints comment showing rules that stop left-recursive cycles and rules for which memoization is blocked during recursion. 158 string printLeftRecursionStoppers() 159 { 160 import std.array: join; 161 string result; 162 foreach (stopper; stoppers) 163 result ~= stopper ~ ": " ~ allLeftRecursiveRules.join(", ") ~ "\n"; 164 return result.length > 0 ? 165 "/** Rules that stop left-recursive cycles, followed by rules for which\n" 166 ~ " * memoization is blocked during recursion:\n" ~ result ~ "*/\n\n" : ""; 167 } 168 // Analysis completed. 169 170 string generateForgetMemo() 171 { 172 string result; 173 result ~= " 174 static void forgetMemo() 175 {"; 176 if (withMemo == Memoization.yes) 177 result ~= " 178 memo = null;"; 179 if (composedGrammars.length > 0) 180 result ~= " 181 import std.traits;"; 182 foreach (composed; composedGrammars) 183 result ~= " 184 static if (is(typeof(" ~ composed ~ ".forgetMemo))) 185 " ~ composed ~ ".forgetMemo();"; 186 result ~= " 187 }\n"; 188 return result; 189 } 190 191 string generateCode(ParseTree p, string propagatedName = "") 192 { 193 string result; 194 195 switch (p.name) 196 { 197 case "Pegged": 198 result = generateCode(p.children[0]); 199 break; 200 case "Pegged.Grammar": 201 string grammarName = generateCode(p.children[0]); 202 string shortGrammarName = p.children[0].matches[0]; 203 //string invokedGrammarName = generateCode(transformName(p.children[0])); 204 string firstRuleName = generateCode(p.children[1].children[0]); 205 206 result = 207 "struct Generic" ~ shortGrammarName ~ "(TParseTree) 208 { 209 import std.functional : toDelegate; 210 import pegged.dynamic.grammar; 211 static import pegged.peg; 212 struct " ~ grammarName ~ "\n { 213 enum name = \"" ~ shortGrammarName ~ "\"; 214 static ParseTree delegate(ParseTree)[string] before; 215 static ParseTree delegate(ParseTree)[string] after; 216 static ParseTree delegate(ParseTree)[string] rules;"; 217 218 if (withMemo == Memoization.yes) { 219 result ~= " 220 import std.typecons:Tuple, tuple; 221 static TParseTree[Tuple!(string, size_t)] memo;"; 222 if (grammarInfo.leftRecursiveCycles.length > 0) 223 result ~= " 224 import std.algorithm: canFind, countUntil, remove; 225 static size_t[] blockMemoAtPos;"; 226 } 227 228 result ~= " 229 static this()\n {\n"; 230 231 ParseTree[] definitions = p.children[1 .. $]; 232 bool userDefinedSpacing; 233 foreach(i,def; definitions) 234 { 235 if (def.children[0].children.length == 1) // Non-parameterized ruleName 236 result ~= " rules[\"" ~ def.matches[0] ~ "\"] = toDelegate(&" ~ def.matches[0] ~ ");\n"; 237 if (def.matches[0] == "Spacing") // user-defined spacing 238 { 239 userDefinedSpacing = true; 240 break; 241 } 242 } 243 if(!userDefinedSpacing) 244 result ~= " rules[\"Spacing\"] = toDelegate(&Spacing);\n"; 245 246 result ~= 247 " } 248 249 template hooked(alias r, string name) 250 { 251 static ParseTree hooked(ParseTree p) 252 { 253 ParseTree result; 254 255 if (name in before) 256 { 257 result = before[name](p); 258 if (result.successful) 259 return result; 260 } 261 262 result = r(p); 263 if (result.successful || name !in after) 264 return result; 265 266 result = after[name](p); 267 return result; 268 } 269 270 static ParseTree hooked(string input) 271 { 272 return hooked!(r, name)(ParseTree(\"\",false,[],input)); 273 } 274 } 275 276 static void addRuleBefore(string parentRule, string ruleSyntax) 277 { 278 // enum name is the current grammar name 279 DynamicGrammar dg = pegged.dynamic.grammar.grammar(name ~ \": \" ~ ruleSyntax, rules); 280 foreach(ruleName,rule; dg.rules) 281 if (ruleName != \"Spacing\") // Keep the local Spacing rule, do not overwrite it 282 rules[ruleName] = rule; 283 before[parentRule] = rules[dg.startingRule]; 284 } 285 286 static void addRuleAfter(string parentRule, string ruleSyntax) 287 { 288 // enum name is the current grammar named 289 DynamicGrammar dg = pegged.dynamic.grammar.grammar(name ~ \": \" ~ ruleSyntax, rules); 290 foreach(name,rule; dg.rules) 291 { 292 if (name != \"Spacing\") 293 rules[name] = rule; 294 } 295 after[parentRule] = rules[dg.startingRule]; 296 } 297 298 static bool isRule(string s) 299 { 300 import std.algorithm : startsWith; 301 return s.startsWith(\"" ~ shortGrammarName ~ ".\"); 302 } 303 "; 304 305 306 /+ 307 ~ " switch(s)\n" 308 ~ " {\n"; 309 310 bool[string] ruleNames; // to avoid duplicates, when using parameterized rules 311 string parameterizedRulesSpecialCode; // because param rules need to be put in the 'default' part of the switch 312 313 string paramRuleHandler(string target) 314 { 315 return "if (s.length >= "~to!string(shortGrammarName.length + target.length + 3) 316 ~" && s[0.."~to!string(shortGrammarName.length + target.length + 3)~"] == \"" 317 ~shortGrammarName ~ "." ~ target~"!(\") return true;"; 318 } 319 320 foreach(i,def; definitions) 321 { 322 /* 323 if (def.matches[0] !in ruleNames) 324 { 325 ruleNames[def.matches[0]] = true; 326 327 if (def.children[0].children.length > 1) // Parameterized rule 328 parameterizedRulesSpecialCode ~= " " ~ paramRuleHandler(def.matches[0])~ "\n"; 329 else 330 result ~= " case \"" ~ shortGrammarName ~ "." ~ def.matches[0] ~ "\":\n"; 331 } 332 */ 333 if (def.matches[0] == "Spacing") // user-defined spacing 334 { 335 userDefinedSpacing = true; 336 break; 337 } 338 } 339 result ~= " return true;\n" 340 ~ " default:\n" 341 ~ parameterizedRulesSpecialCode 342 ~ " return false;\n }\n }\n"; 343 +/ 344 result ~= " mixin decimateTree;\n\n"; 345 346 // If the grammar provides a Spacing rule, then this will be used. 347 // else, the predefined 'spacing' rule is used. 348 result ~= userDefinedSpacing ? "" : " alias spacing Spacing;\n\n"; 349 350 // Creating the inner functions, each corresponding to a grammar rule 351 foreach(def; definitions) 352 result ~= generateCode(def, shortGrammarName); 353 354 // if the first rule is parameterized (a template), it's impossible to get an opCall 355 // because we don't know with which template arguments it should be called. 356 // So no opCall is generated in this case. 357 if (p.children[1].children[0].children.length == 1) 358 { 359 // General calling interface 360 result ~= " static TParseTree opCall(TParseTree p)\n" 361 ~ " {\n" 362 ~ " TParseTree result = decimateTree(" ~ firstRuleName ~ "(p));\n" 363 ~ " result.children = [result];\n" 364 ~ " result.name = \"" ~ shortGrammarName ~ "\";\n" 365 ~ " return result;\n" 366 ~ " }\n\n" 367 ~ " static TParseTree opCall(string input)\n" 368 ~ " {\n"; 369 370 if (withMemo == Memoization.no) 371 result ~= " forgetMemo();\n" 372 ~ " return " ~ shortGrammarName ~ "(TParseTree(``, false, [], input, 0, 0));\n" 373 ~ " }\n"; 374 else 375 result ~= " if(__ctfe)\n" 376 ~ " {\n" 377 ~ " return " ~ shortGrammarName ~ "(TParseTree(``, false, [], input, 0, 0));\n" 378 ~ " }\n" 379 ~ " else\n" 380 ~ " {\n" 381 ~ " forgetMemo();\n" 382 ~ " return " ~ shortGrammarName ~ "(TParseTree(``, false, [], input, 0, 0));\n" 383 ~ " }\n" 384 ~ " }\n"; 385 386 result ~= " static string opCall(GetName g)\n" 387 ~ " {\n" 388 ~ " return \"" ~ shortGrammarName ~ "\";\n" 389 ~ " }\n\n"; 390 } 391 result ~= generateForgetMemo(); 392 result ~= " }\n" // end of grammar struct definition 393 ~ "}\n\n" // end of template definition 394 ~ "alias Generic" ~ shortGrammarName ~ "!(ParseTree)." 395 ~ shortGrammarName ~ " " ~ shortGrammarName ~ ";\n\n"; 396 break; 397 case "Pegged.Definition": 398 // children[0]: name 399 // children[1]: arrow (arrow type as first child) 400 // children[2]: description 401 402 string code; 403 404 switch(p.children[1].children[0].name) 405 { 406 case "Pegged.LEFTARROW": 407 code ~= generateCode(p.children[2]); 408 break; 409 case "Pegged.FUSEARROW": 410 code ~= "pegged.peg.fuse!(" ~ generateCode(p.children[2]) ~ ")"; 411 break; 412 case "Pegged.DISCARDARROW": 413 code ~= "pegged.peg.discard!(" ~ generateCode(p.children[2]) ~ ")"; 414 break; 415 case "Pegged.KEEPARROW": 416 code ~= "pegged.peg.keep!("~ generateCode(p.children[2]) ~ ")"; 417 break; 418 case "Pegged.DROPARROW": 419 code ~= "pegged.peg.drop!("~ generateCode(p.children[2]) ~ ")"; 420 break; 421 case "Pegged.PROPAGATEARROW": 422 code ~= "pegged.peg.propagate!("~ generateCode(p.children[2]) ~ ")"; 423 break; 424 case "Pegged.SPACEARROW": 425 ParseTree modified = spaceArrow(p.children[2]); 426 code ~= generateCode(modified); 427 break; 428 case "Pegged.ACTIONARROW": 429 auto actionResult = generateCode(p.children[2]); 430 foreach(action; p.children[1].matches[1..$]) 431 actionResult = "pegged.peg.action!(" ~ actionResult ~ ", " ~ action ~ ")"; 432 code ~= actionResult; 433 break; 434 default: 435 break; 436 } 437 438 bool parameterizedRule = p.children[0].children.length > 1; 439 string completeName = generateCode(p.children[0]); 440 string shortName = p.matches[0]; 441 string innerName; 442 string hookedName = p.matches[0]; 443 444 if (parameterizedRule) 445 { 446 result = " template " ~ completeName ~ "\n" 447 ~ " {\n"; 448 innerName ~= "\"" ~ shortName ~ "!(\" ~ "; 449 hookedName ~= "_" ~ to!string(p.children[0].children[1].children.length); 450 foreach(i,param; p.children[0].children[1].children) 451 innerName ~= "pegged.peg.getName!("~ param.children[0].matches[0] 452 ~ (i<p.children[0].children[1].children.length-1 ? ")() ~ \", \" ~ " 453 : ")"); 454 innerName ~= " ~ \")\""; 455 } 456 else 457 { 458 innerName ~= "`" ~ completeName ~ "`"; 459 } 460 461 string ctfeCode = " pegged.peg.defined!(" ~ code ~ ", \"" ~ propagatedName ~ "." ~ innerName[1..$-1] ~ "\")"; 462 code = "hooked!(pegged.peg.defined!(" ~ code ~ ", \"" ~ propagatedName ~ "." ~ innerName[1..$-1] ~ "\"), \"" ~ hookedName ~ "\")"; 463 464 if (withMemo == Memoization.no) 465 result ~= " static TParseTree " ~ shortName ~ "(TParseTree p)\n" 466 ~ " {\n" 467 ~ " if(__ctfe)\n" 468 ~ " {\n" 469 ~ (stoppers.canFind(shortName) ? 470 " assert(false, \"" ~ shortName ~ " is left-recursive, which is not supported " 471 ~ "at compile-time. Consider using asModule().\");\n" 472 : 473 " return " ~ ctfeCode ~ "(p);\n" 474 ) 475 ~ " }\n" 476 ~ " else\n" 477 ~ " {\n" 478 ~ (stoppers.canFind(shortName) ? 479 // This rule needs to prevent infinite left-recursion. 480 " static TParseTree[size_t /*position*/] seed;\n" 481 ~ " if (auto s = p.end in seed)\n" 482 ~ " return *s;\n" 483 ~ " auto current = fail(p);\n" 484 ~ " seed[p.end] = current;\n" 485 ~ " while(true)\n" 486 ~ " {\n" 487 ~ " auto result = " ~ code ~ "(p);\n" 488 ~ (grammarInfo.ruleInfo[shortName].nullMatch == NullMatch.no ? 489 " if (result.end > current.end)\n" 490 : 491 " if (result.end > current.end ||\n" 492 ~ " (!current.successful && result.successful) /* null-match */)\n" 493 ) 494 ~ " {\n" 495 ~ " current = result;\n" 496 ~ " seed[p.end] = current;\n" 497 ~ " } else {\n" 498 ~ " seed.remove(p.end);\n" 499 ~ " return current;\n" 500 ~ " }\n" 501 ~ " }\n" 502 : 503 // Possibly left-recursive rule, but infinite recursion is already prevented by another rule in the same cycle. 504 " return " ~ code ~ "(p);\n" 505 ) 506 ~ " }\n" 507 ~ " }\n" 508 ~ " static TParseTree " ~ shortName ~ "(string s)\n" 509 ~ " {\n" 510 ~ " if(__ctfe)\n" 511 ~ " return " ~ ctfeCode ~ "(TParseTree(\"\", false,[], s));\n" 512 ~ " else\n" 513 ~ " {\n" 514 ~ " forgetMemo();\n" 515 ~ " return " ~ code ~ "(TParseTree(\"\", false,[], s));\n" 516 ~ " }\n" 517 ~ " }\n"; 518 else // Memoization.yes 519 result ~= " static TParseTree " ~ shortName ~ "(TParseTree p)\n" 520 ~ " {\n" 521 ~ " if(__ctfe)\n" 522 ~ " {\n" 523 ~ (stoppers.canFind(shortName) ? 524 " assert(false, \"" ~ shortName ~ " is left-recursive, which is not supported " 525 ~ "at compile-time. Consider using asModule().\");\n" 526 : 527 " return " ~ ctfeCode ~ "(p);\n" 528 ) 529 ~ " }\n" 530 ~ " else\n" 531 ~ " {\n" 532 ~ (stoppers.canFind(shortName) ? 533 // This rule needs to prevent infinite left-recursion. 534 " static TParseTree[size_t /*position*/] seed;\n" 535 ~ " if (auto s = p.end in seed)\n" 536 ~ " return *s;\n" 537 ~ " if (!blockMemoAtPos.canFind(p.end))\n" 538 ~ " if (auto m = tuple(" ~ innerName ~ ", p.end) in memo)\n" 539 ~ " return *m;\n" 540 ~ " auto current = fail(p);\n" 541 ~ " seed[p.end] = current;\n" 542 ~ " blockMemoAtPos ~= p.end;\n" 543 ~ " while (true)\n" 544 ~ " {\n" 545 ~ " auto result = " ~ code ~ "(p);\n" 546 ~ (grammarInfo.ruleInfo[shortName].nullMatch == NullMatch.no ? 547 " if (result.end > current.end)\n" 548 : 549 " if (result.end > current.end ||\n" 550 ~ " (!current.successful && result.successful) /* null-match */)\n" 551 ) 552 ~ " {\n" 553 ~ " current = result;\n" 554 ~ " seed[p.end] = current;\n" 555 ~ " } else {\n" 556 // Since seed is local, it cannot be reset between parses the way memo is reset. 557 // We can get away with this by removing elements from it, done below, so that 558 // seed is empty again when left-recursion has ended and all recursive calls have 559 // returned. The advantage of a local seed is that it remains small and fast. The 560 // disadvantage is that seed doesn't memoize the final result, so that must be taken 561 // care of by memo. Note that p.end remains constant for the course of recursion, 562 // and the length of seed only grows when nested recursion occurs. 563 ~ " seed.remove(p.end);\n" 564 // TODO investigate if p.end is always the last element of blockMemoAtPos. 565 ~ " assert(blockMemoAtPos.canFind(p.end));\n" 566 ~ " blockMemoAtPos = blockMemoAtPos.remove(countUntil(blockMemoAtPos, p.end));\n" 567 ~ " memo[tuple(" ~ innerName ~ ", p.end)] = current;\n" 568 ~ " return current;\n" 569 ~ " }\n" 570 ~ " }\n" 571 : 572 // Possibly left-recursive rule, but infinite recursion is already prevented by another rule in the same cycle. 573 (allLeftRecursiveRules.canFind(shortName) ? 574 " if (blockMemoAtPos.canFind(p.end))\n" 575 ~ " return " ~ code ~ "(p);\n" 576 : "" 577 ) 578 ~ " if (auto m = tuple(" ~ innerName ~ ", p.end) in memo)\n" 579 ~ " return *m;\n" 580 ~ " else\n" 581 ~ " {\n" 582 ~ " TParseTree result = " ~ code ~ "(p);\n" 583 ~ " memo[tuple(" ~ innerName ~ ", p.end)] = result;\n" 584 ~ " return result;\n" 585 ~ " }\n" 586 ) 587 ~ " }\n" 588 ~ " }\n\n" 589 ~ " static TParseTree " ~ shortName ~ "(string s)\n" 590 ~ " {\n" 591 ~ " if(__ctfe)\n" 592 ~ " {\n" 593 ~ " return " ~ ctfeCode ~ "(TParseTree(\"\", false,[], s));\n" 594 ~ " }\n" 595 ~ " else\n" 596 ~ " {\n" 597 ~ " forgetMemo();\n" 598 ~ " return " ~ code ~ "(TParseTree(\"\", false,[], s));\n" 599 ~ " }\n" 600 ~ " }\n"; 601 602 result ~= " static string " ~ shortName ~ "(GetName g)\n" 603 ~ " {\n" 604 ~ " return \"" ~ propagatedName ~ "." ~ innerName[1..$-1] ~ "\";\n" 605 ~ " }\n\n"; 606 607 if (parameterizedRule) 608 result ~= " }\n"; 609 610 break; 611 case "Pegged.GrammarName": 612 result = generateCode(p.children[0]); 613 if (p.children.length == 2) 614 result ~= generateCode(p.children[1]); 615 break; 616 case "Pegged.LhsName": 617 result = generateCode(p.children[0]); 618 if (p.children.length == 2) 619 result ~= generateCode(p.children[1]); 620 break; 621 case "Pegged.ParamList": 622 result = "("; 623 foreach(i,child; p.children) 624 result ~= generateCode(child) ~ ", "; 625 result = result[0..$-2] ~ ")"; 626 break; 627 case "Pegged.Param": 628 result = "alias " ~ generateCode(p.children[0]); 629 break; 630 case "Pegged.SingleParam": 631 result = p.matches[0]; 632 break; 633 case "Pegged.DefaultParam": 634 result = p.matches[0] ~ " = " ~ generateCode(p.children[1]); 635 break; 636 case "Pegged.Expression": 637 result ~= generateCode(p.children[0]); 638 break; 639 case "Pegged.FirstExpression", 640 "Pegged.LongestExpression": 641 if (p.children.length > 1) // [LONGEST_]OR expression 642 { 643 // Keyword list detection: "abstract"/"alias"/... 644 bool isLiteral(ParseTree p) 645 { 646 return ( p.name == "Pegged.Sequence" 647 && p.children.length == 1 648 && p.children[0].children.length == 1 649 && p.children[0].children[0].children.length == 1 650 && p.children[0].children[0].children[0].children.length == 1 651 && p.children[0].children[0].children[0].children[0].name == "Pegged.Literal"); 652 } 653 bool keywordList = true; 654 foreach(child;p.children) 655 if (!isLiteral(child)) 656 { 657 keywordList = false; 658 break; 659 } 660 661 if (keywordList) 662 { 663 result = "pegged.peg.keywords!("; 664 foreach(seq; p.children) 665 result ~= "\"" ~ (seq.matches.length == 3 ? seq.matches[1] : "") ~ "\", "; 666 result = result[0..$-2] ~ ")"; 667 } 668 else 669 { 670 result = p.name == "Pegged.FirstExpression" ? "pegged.peg.or!(" : "pegged.peg.longest_match!("; 671 foreach(seq; p.children) 672 result ~= generateCode(seq) ~ ", "; 673 result = result[0..$-2] ~ ")"; 674 } 675 } 676 else // One child -> just a sequence, no need for an or!( , ) 677 { 678 result = generateCode(p.children[0]); 679 } 680 break; 681 case "Pegged.Sequence": 682 if (p.children.length > 1) // real sequence 683 { 684 result = "pegged.peg.and!("; 685 foreach(seq; p.children) 686 { 687 string elementCode = generateCode(seq); 688 // flattening inner sequences 689 if (elementCode.length > 6 && elementCode[0..5] == "pegged.peg.and!(") 690 elementCode = elementCode[5..$-1]; // cutting 'and!(' and ')' 691 result ~= elementCode ~ ", "; 692 } 693 result = result[0..$-2] ~ ")"; 694 } 695 else // One child -> just a Suffix, no need for a and!( , ) 696 { 697 result = generateCode(p.children[0]); 698 } 699 break; 700 case "Pegged.Prefix": 701 result = generateCode(p.children[$-1]); 702 foreach(child; p.children[0..$-1]) 703 result = generateCode(child) ~ result ~ ")"; 704 break; 705 case "Pegged.Suffix": 706 result = generateCode(p.children[0]); 707 foreach(child; p.children[1..$]) 708 { 709 switch (child.name) 710 { 711 case "Pegged.OPTION": 712 result = "pegged.peg.option!(" ~ result ~ ")"; 713 break; 714 case "Pegged.ZEROORMORE": 715 result = "pegged.peg.zeroOrMore!(" ~ result ~ ")"; 716 break; 717 case "Pegged.ONEORMORE": 718 result = "pegged.peg.oneOrMore!(" ~ result ~ ")"; 719 break; 720 case "Pegged.Action": 721 foreach(action; child.matches) 722 result = "pegged.peg.action!(" ~ result ~ ", " ~ action ~ ")"; 723 break; 724 default: 725 break; 726 } 727 } 728 break; 729 case "Pegged.Primary": 730 result = generateCode(p.children[0]); 731 break; 732 case "Pegged.RhsName": 733 result = ""; 734 string composedGrammar = ""; 735 foreach(child; p.children) 736 { 737 result ~= generateCode(child); 738 if (child.name == "Pegged.NAMESEP") 739 composedGrammar = p.input[p.begin .. child.begin]; 740 } 741 if (composedGrammar.length > 0 && !composedGrammars.canFind(composedGrammar)) 742 composedGrammars ~= composedGrammar; 743 break; 744 case "Pegged.ArgList": 745 result = "!("; 746 foreach(child; p.children) 747 result ~= generateCode(child) ~ ", "; // Allow A <- List('A'*,',') 748 result = result[0..$-2] ~ ")"; 749 break; 750 case "Pegged.Identifier": 751 result = p.matches[0]; 752 break; 753 case "Pegged.NAMESEP": 754 result = "."; 755 break; 756 case "Pegged.Literal": 757 if(p.matches.length == 3) // standard case 758 result = "pegged.peg.literal!(\"" ~ p.matches[1] ~ "\")"; 759 else // only two children -> empty literal 760 result = "pegged.peg.literal!(``)"; 761 break; 762 case "Pegged.CILiteral": 763 if(p.matches.length == 3) // standard case 764 result = "pegged.peg.caseInsensitiveLiteral!(\"" ~ p.matches[1] ~ "\")"; 765 else // only two children -> empty literal 766 result = "pegged.peg.caseInsensitiveLiteral!(``)"; 767 break; 768 case "Pegged.CharClass": 769 if (p.children.length > 1) 770 { 771 result = "pegged.peg.or!("; 772 foreach(seq; p.children) 773 result ~= generateCode(seq) ~ ", "; 774 result = result[0..$-2] ~ ")"; 775 } 776 else // One child -> just a sequence, no need for an or!( , ) 777 { 778 result = generateCode(p.children[0]); 779 } 780 break; 781 case "Pegged.CharRange": 782 /// Make the generation at the Char level: directly what is needed, be it `` or "" or whatever 783 if (p.children.length > 1) // a-b range 784 { 785 result = "pegged.peg.charRange!('" ~ generateCode(p.children[0]) 786 ~ "', '" 787 ~ generateCode(p.children[1]) 788 ~ "')"; 789 } 790 else // lone char 791 { 792 result = "pegged.peg.literal!("; 793 string ch = p.matches[0]; 794 switch (ch) 795 { 796 case "\\[": 797 case "\\]": 798 case "\\-": 799 result ~= "\"" ~ ch[1..$] ~ "\")"; 800 break; 801 case "\\\'": 802 result ~= "\"'\")"; 803 break; 804 case "\\`": 805 result ~= q{"`")}; 806 break; 807 case "\\": 808 case "\\\\": 809 result ~= "`\\`)"; 810 break; 811 case "\"": 812 case "\\\"": 813 result ~= "`\"`)"; 814 break; 815 case "\n": 816 case "\r": 817 case "\t": 818 result ~= "\"" ~ to!string(to!dchar(ch)) ~ "\")"; 819 break; 820 default: 821 result ~= "\"" ~ ch ~ "\")"; 822 } 823 } 824 break; 825 case "Pegged.Char": 826 string ch = p.matches[0]; 827 switch (ch) 828 { 829 case "\\[": 830 case "\\]": 831 case "\\-": 832 833 case "\\\'": 834 case "\\\"": 835 case "\\`": 836 case "\\\\": 837 result = ch[1..$]; 838 break; 839 case "\n": 840 case "\r": 841 case "\t": 842 result = to!string(to!dchar(ch)); 843 break; 844 default: 845 result = ch; 846 } 847 break; 848 case "Pegged.POS": 849 result = "pegged.peg.posLookahead!("; 850 break; 851 case "Pegged.NEG": 852 result = "pegged.peg.negLookahead!("; 853 break; 854 case "Pegged.FUSE": 855 result = "pegged.peg.fuse!("; 856 break; 857 case "Pegged.DISCARD": 858 result = "pegged.peg.discard!("; 859 break; 860 //case "Pegged.CUT": 861 // result = "discardChildren!("; 862 // break; 863 case "Pegged.KEEP": 864 result = "pegged.peg.keep!("; 865 break; 866 case "Pegged.DROP": 867 result = "pegged.peg.drop!("; 868 break; 869 case "Pegged.PROPAGATE": 870 result = "pegged.peg.propagate!("; 871 break; 872 case "Pegged.OPTION": 873 result = "pegged.peg.option!("; 874 break; 875 case "Pegged.ZEROORMORE": 876 result = "pegged.peg.zeroOrMore!("; 877 break; 878 case "Pegged.ONEORMORE": 879 result = "pegged.peg.oneOrMore!("; 880 break; 881 case "Pegged.Action": 882 result = generateCode(p.children[0]); 883 foreach(action; p.matches[1..$]) 884 result = "pegged.peg.action!(" ~ result ~ ", " ~ action ~ ")"; 885 break; 886 case "Pegged.ANY": 887 result = "pegged.peg.any"; 888 break; 889 case "Pegged.WrapAround": 890 result = "pegged.peg.wrapAround!(" ~ generateCode(p.children[0]) ~ ", " 891 ~ generateCode(p.children[1]) ~ ", " 892 ~ generateCode(p.children[2]) ~ ")"; 893 break; 894 default: 895 result = "Bad tree: " ~ p.toString(); 896 break; 897 } 898 return result; 899 } 900 901 902 903 return printLeftRecursiveCycles() ~ printLeftRecursionStoppers() ~ generateCode(defAsParseTree); 904 } 905 906 /** 907 Mixin to get what a failed rule expected as input. 908 Not used by Pegged yet. 909 */ 910 mixin template expected() 911 { 912 string expected(ParseTree p) 913 { 914 915 switch(p.name) 916 { 917 case "Pegged.Expression": 918 string expectation; 919 foreach(i, child; p.children) 920 expectation ~= "(" ~ expected(child) ~ ")" ~ (i < p.children.length -1 ? " or " : ""); 921 return expectation; 922 case "Pegged.Sequence": 923 string expectation; 924 foreach(i, expr; p.children) 925 expectation ~= "(" ~ expected(expr) ~ ")" ~ (i < p.children.length -1 ? " followed by " : ""); 926 return expectation; 927 case "Pegged.Prefix": 928 return expected(p.children[$-1]); 929 case "Pegged.Suffix": 930 string expectation; 931 string end; 932 foreach(prefix; p.children[1..$]) 933 switch(prefix.name) 934 { 935 case "Pegged.ZEROORMORE": 936 expectation ~= "zero or more times ("; 937 end ~= ")"; 938 break; 939 case "Pegged.ONEORMORE": 940 expectation ~= "one or more times ("; 941 end ~= ")"; 942 break; 943 case "Pegged.OPTION": 944 expectation ~= "optionally ("; 945 end ~= ")"; 946 break; 947 case "Pegged.Action": 948 break; 949 default: 950 break; 951 } 952 return expectation ~ expected(p.children[0]) ~ end; 953 case "Pegged.Primary": 954 return expected(p.children[0]); 955 //case "Pegged.RhsName": 956 // return "RhsName, not implemented."; 957 case "Pegged.Literal": 958 return "the literal `" ~ p.matches[0] ~ "`"; 959 case "Pegged.CharClass": 960 string expectation; 961 foreach(i, child; p.children) 962 expectation ~= expected(child) ~ (i < p.children.length -1 ? " or " : ""); 963 return expectation; 964 case "Pegged.CharRange": 965 if (p.children.length == 1) 966 return expected(p.children[0]); 967 else 968 return "any character between '" ~ p.matches[0] ~ "' and '" ~ p.matches[2] ~ "'"; 969 case "Pegged.Char": 970 return "the character '" ~ p.matches[0] ~ "'"; 971 case "Pegged.ANY": 972 return "any character"; 973 default: 974 return "unknow rule (" ~ p.matches[0] ~ ")"; 975 } 976 } 977 } 978 979 unittest // 'grammar' unit test: low-level functionalities 980 { 981 mixin(grammar(` 982 Test1: 983 Rule1 <- 'a' 984 Rule2 <- 'b' 985 `)); 986 987 assert(is(Test1 == struct), "A struct name Test1 is created."); 988 assert(is(typeof(Test1("a"))), "Test1 is callable with a string arg"); 989 assert(__traits(hasMember, Test1, "Rule1"), "Test1 has a member named Rule1."); 990 assert(__traits(hasMember, Test1, "Rule2"), "Test1 has a member named Rule2."); 991 assert(is(typeof(Test1.Rule1("a"))), "Test1.Rule1 is callable with a string arg"); 992 assert(is(typeof(Test1.Rule2("a"))), "Test1.Rule2 is callable with a string arg"); 993 994 assert(__traits(hasMember, Test1, "decimateTree"), "Test1 has a member named decimateTree."); 995 assert(__traits(hasMember, Test1, "name"), "Test1 has a member named name."); 996 assert(__traits(hasMember, Test1, "isRule"), "Test1 has a member named isRule."); 997 } 998 999 unittest // 'grammar' unit test: PEG syntax 1000 { 1001 // Here we do not test pegged.peg.*, just the grammar transformations 1002 // From a PEG to a Pegged expression template. 1003 1004 mixin(grammar(` 1005 Terminals: 1006 Literal1 <- "abc" 1007 Literal2 <- 'abc' 1008 EmptyLiteral1 <- "" 1009 EmptyLiteral2 <- '' 1010 1011 Any <- . 1012 Eps <- eps 1013 Letter <- [a-z] 1014 Digit <- [0-9] 1015 ABC <- [abc] 1016 Alpha1 <- [a-zA-Z_] 1017 Alpha2 <- [_a-zA-Z] 1018 Chars1 <- [\0-\127] 1019 Chars2 <- [\x00-\xFF] 1020 Chars3 <- [\u0000-\u00FF] 1021 Chars4 <- [\U00000000-\U000000FF] 1022 `)); 1023 1024 ParseTree result = Terminals("abc"); 1025 1026 assert(result.name == "Terminals", "Grammar name test."); 1027 assert(result.children[0].name == "Terminals.Literal1", "First rule name test."); 1028 assert(result.begin == 0); 1029 assert(result.end == 3); 1030 assert(result.matches == ["abc"]); 1031 1032 ParseTree reference = Terminals.decimateTree(Terminals.Literal1("abc")); 1033 1034 assert(result.children[0] == reference, "Invoking a grammar is like invoking its first rule."); 1035 1036 assert(Terminals.Literal1("abc").successful, "Standard terminal test. Double quote syntax."); 1037 assert(Terminals.Literal2("abc").successful, "Standard terminal test. Simple quote syntax."); 1038 assert(Terminals.EmptyLiteral1("").successful , "Standard terminal test. Double quote syntax."); 1039 assert(Terminals.EmptyLiteral2("").successful, "Standard terminal test. Simple quote syntax."); 1040 1041 foreach(char c; char.min .. char.max) 1042 assert(Terminals.Any(""~c).successful, "Any terminal ('.') test."); 1043 1044 assert(Terminals.Eps("").successful, "Eps test."); 1045 assert(Terminals.Eps("abc").successful, "Eps test."); 1046 1047 string lower = "abcdefghijklmnopqrstuvwxyz"; 1048 string upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 1049 string under = "_"; 1050 string digits = "0123456789"; 1051 string others = "?./,;:!*&()[]<>"; 1052 1053 foreach(dchar dc; lower) 1054 assert(Terminals.Letter(to!string(dc)).successful); 1055 foreach(dchar dc; upper) 1056 assert(!Terminals.Letter(to!string(dc)).successful); 1057 foreach(dchar dc; digits) 1058 assert(!Terminals.Letter(to!string(dc)).successful); 1059 foreach(dchar dc; others) 1060 assert(!Terminals.Letter(to!string(dc)).successful); 1061 1062 foreach(dchar dc; lower) 1063 assert(!Terminals.Digit(to!string(dc)).successful); 1064 foreach(dchar dc; upper) 1065 assert(!Terminals.Digit(to!string(dc)).successful); 1066 foreach(dchar dc; digits) 1067 assert(Terminals.Digit(to!string(dc)).successful); 1068 foreach(dchar dc; others) 1069 assert(!Terminals.Letter(to!string(dc)).successful); 1070 1071 foreach(dchar dc; lower ~ upper ~ under) 1072 assert(Terminals.Alpha1(to!string(dc)).successful); 1073 foreach(dchar dc; digits ~ others) 1074 assert(!Terminals.Alpha1(to!string(dc)).successful); 1075 1076 foreach(dchar dc; lower ~ upper ~ under) 1077 assert(Terminals.Alpha2(to!string(dc)).successful); 1078 foreach(dchar dc; digits ~ others) 1079 assert(!Terminals.Alpha2(to!string(dc)).successful); 1080 1081 foreach(size_t index, dchar dc; lower ~ upper ~ under) 1082 assert( (index < 3 && Terminals.ABC(to!string(dc)).successful) 1083 || (index >= 3 && !Terminals.ABC(to!string(dc)).successful)); 1084 1085 foreach(dchar dc; 0..256) 1086 { 1087 string s = to!string(dc); 1088 if (dc <= '\127') 1089 assert(Terminals.Chars1(s).successful); 1090 else 1091 assert(!Terminals.Chars1(s).successful); 1092 1093 assert(Terminals.Chars2(s).successful); 1094 assert(Terminals.Chars3(s).successful); 1095 assert(Terminals.Chars4(s).successful); 1096 } 1097 1098 mixin(grammar(` 1099 Structure: 1100 Rule1 <- Rule2 / Rule3 / Rule4 # Or test 1101 Rule2 <- Rule3 Rule4 # And test 1102 Rule3 <- "abc" 1103 Rule4 <- "def" 1104 1105 Rule5 <- (Rule2 / Rule4) Rule3 # parenthesis test 1106 Rule6 <- Rule2 / Rule4 Rule3 1107 Rule7 <- Rule2 / (Rule4 Rule3) 1108 `)); 1109 1110 // Invoking Rule2 (and hence, Rule3 Rule4) 1111 result = Structure("abcdef"); 1112 1113 assert(result.successful, "Calling Rule2."); 1114 assert(result.name == "Structure", "Grammar name test."); 1115 assert(result.children[0].name == "Structure.Rule1", "First rule name test"); 1116 assert(result.children[0].children.length == 1); 1117 assert(result.children[0].children[0].name == "Structure.Rule2"); 1118 assert(result.children[0].children[0].children[0].name == "Structure.Rule3"); 1119 assert(result.children[0].children[0].children[1].name == "Structure.Rule4"); 1120 assert(result.matches == ["abc", "def"]); 1121 assert(result.begin ==0); 1122 assert(result.end == 6); 1123 1124 // Invoking Rule3 1125 result = Structure("abc"); 1126 1127 assert(result.successful, "Calling Rule3."); 1128 assert(result.name == "Structure", "Grammar name test."); 1129 assert(result.children[0].name == "Structure.Rule1", "First rule name test"); 1130 assert(result.children[0].children.length == 1); 1131 assert(result.children[0].children[0].name == "Structure.Rule3"); 1132 assert(result.children[0].children[0].children.length == 0); 1133 assert(result.matches == ["abc"]); 1134 assert(result.begin ==0); 1135 assert(result.end == 3); 1136 1137 // Invoking Rule4 1138 result = Structure("def"); 1139 1140 assert(result.successful, "Calling Rule2."); 1141 assert(result.name == "Structure", "Grammar name test."); 1142 assert(result.children[0].name == "Structure.Rule1", "First rule name test"); 1143 assert(result.children[0].children.length == 1); 1144 assert(result.children[0].children[0].name == "Structure.Rule4"); 1145 assert(result.children[0].children[0].children.length == 0); 1146 assert(result.matches == ["def"]); 1147 assert(result.begin ==0); 1148 assert(result.end == 3); 1149 1150 // Failure 1151 result =Structure("ab_def"); 1152 assert(!result.successful); 1153 assert(result.name == "Structure", "Grammar name test."); 1154 assert(result.begin == 0); 1155 assert(result.end == 0); 1156 1157 // Parenthesis test 1158 // Rule5 <- (Rule2 / Rule4) Rule3 1159 result = Structure.decimateTree(Structure.Rule5("abcdefabc")); 1160 assert(result.successful); 1161 assert(result.children.length == 2, "Two children: (Rule2 / Rule4), followed by Rule3."); 1162 assert(result.children[0].name == "Structure.Rule2"); 1163 assert(result.children[1].name == "Structure.Rule3"); 1164 1165 result = Structure.decimateTree(Structure.Rule5("defabc")); 1166 assert(result.successful); 1167 assert(result.children.length == 2, "Two children: (Rule2 / Rule4), followed by Rule3."); 1168 assert(result.children[0].name == "Structure.Rule4"); 1169 assert(result.children[1].name == "Structure.Rule3"); 1170 1171 // Rule6 <- Rule2 / Rule4 Rule3 1172 result = Structure.decimateTree(Structure.Rule6("abcdef")); 1173 assert(result.successful); 1174 assert(result.children.length == 1, "One child: Rule2."); 1175 assert(result.children[0].name == "Structure.Rule2"); 1176 1177 result = Structure.decimateTree(Structure.Rule6("defabc")); 1178 assert(result.successful); 1179 assert(result.children.length == 2, "Two children: Rule4, followed by Rule3."); 1180 assert(result.children[0].name == "Structure.Rule4"); 1181 assert(result.children[1].name == "Structure.Rule3"); 1182 1183 // Rule7 <- Rule2 / (Rule4 Rule3) 1184 // That is, like Rule6 1185 result = Structure.decimateTree(Structure.Rule7("abcdef")); 1186 assert(result.successful); 1187 assert(result.children.length == 1, "One child: Rule2."); 1188 assert(result.children[0].name == "Structure.Rule2"); 1189 1190 result = Structure.decimateTree(Structure.Rule7("defabc")); 1191 assert(result.successful); 1192 assert(result.children.length == 2, "Two children: Rule4, followed by Rule3."); 1193 assert(result.children[0].name == "Structure.Rule4"); 1194 assert(result.children[1].name == "Structure.Rule3"); 1195 1196 // Prefixes and Suffixes 1197 mixin(grammar(` 1198 PrefixSuffix: 1199 Rule1 <- &"abc" 1200 Rule2 <- !"abc" 1201 Rule3 <- "abc"? 1202 Rule4 <- "abc"* 1203 Rule5 <- "abc"+ 1204 `)); 1205 1206 // Verifying &"abc" creates a positive look-ahead construct 1207 result = PrefixSuffix.Rule1("abc"); 1208 reference = posLookahead!(literal!"abc")("abc"); 1209 1210 assert(result.matches == reference.matches); 1211 assert(result.begin == reference.begin); 1212 assert(result.end == reference.end); 1213 assert(result.children[0].children == reference.children); 1214 1215 result = PrefixSuffix.Rule1("def"); 1216 reference = posLookahead!(literal!"abc")("def"); 1217 1218 assert(result.matches == reference.matches); 1219 assert(result.begin == reference.begin); 1220 assert(result.end == reference.end); 1221 assert(result.children[0].children == reference.children); 1222 1223 1224 // Verifying !"abc" creates a negative look-ahead construct 1225 result = PrefixSuffix.Rule2("abc"); 1226 reference = negLookahead!(literal!"abc")("abc"); 1227 1228 assert(result.matches == reference.matches); 1229 assert(result.begin == reference.begin); 1230 assert(result.end == reference.end); 1231 assert(result.children[0].children == reference.children); 1232 1233 result = PrefixSuffix.Rule2("def"); 1234 reference = negLookahead!(literal!"abc")("def"); 1235 1236 assert(result.matches == reference.matches); 1237 assert(result.begin == reference.begin); 1238 assert(result.end == reference.end); 1239 assert(result.children[0].children == reference.children); 1240 1241 // Verifying "abc"? creates an optional construct 1242 result = PrefixSuffix.Rule3("abc"); 1243 reference = option!(literal!"abc")("abc"); 1244 1245 assert(result.matches == reference.matches); 1246 assert(result.begin == reference.begin); 1247 assert(result.end == reference.end); 1248 assert(result.children[0].children == reference.children); 1249 1250 result = PrefixSuffix.Rule3("def"); 1251 reference = option!(literal!"abc")("def"); 1252 1253 assert(result.matches == reference.matches); 1254 assert(result.begin == reference.begin); 1255 assert(result.end == reference.end); 1256 assert(result.children[0].children == reference.children); 1257 1258 // Verifying "abc"* creates a zero or more construct 1259 result = PrefixSuffix.Rule4(""); 1260 reference = zeroOrMore!(literal!"abc")(""); 1261 1262 assert(result.matches == reference.matches); 1263 assert(result.begin == reference.begin); 1264 assert(result.end == reference.end); 1265 assert(result.children[0].children == reference.children); 1266 1267 result = PrefixSuffix.Rule4("abc"); 1268 reference = zeroOrMore!(literal!"abc")("abc"); 1269 1270 assert(result.matches == reference.matches); 1271 assert(result.begin == reference.begin); 1272 assert(result.end == reference.end); 1273 assert(result.children[0].children == reference.children); 1274 1275 result = PrefixSuffix.Rule4("abcabc"); 1276 reference = zeroOrMore!(literal!"abc")("abcabc"); 1277 1278 assert(result.matches == reference.matches); 1279 assert(result.begin == reference.begin); 1280 assert(result.end == reference.end); 1281 assert(result.children[0].children == reference.children); 1282 1283 // Verifying "abc"+ creates a one or more construct 1284 result = PrefixSuffix.Rule5(""); 1285 reference = oneOrMore!(literal!"abc")(""); 1286 1287 assert(result.matches == reference.matches); 1288 assert(result.begin == reference.begin); 1289 assert(result.end == reference.end); 1290 assert(result.children[0].children == reference.children); 1291 1292 result = PrefixSuffix.Rule5("abc"); 1293 reference = oneOrMore!(literal!"abc")("abc"); 1294 1295 assert(result.matches == reference.matches); 1296 assert(result.begin == reference.begin); 1297 assert(result.end == reference.end); 1298 assert(result.children[0].children == reference.children); 1299 1300 result = PrefixSuffix.Rule5("abcabc"); 1301 reference = oneOrMore!(literal!"abc")("abcabc"); 1302 1303 assert(result.matches == reference.matches); 1304 assert(result.begin == reference.begin); 1305 assert(result.end == reference.end); 1306 assert(result.children[0].children == reference.children); 1307 1308 // Verifying that the case insensitive literal i suffix does not clash with a rule named i. 1309 mixin(grammar(` 1310 CaseInsensitive: 1311 S <- CI i 1312 CI <- "abc"i 1313 i <- "i" 1314 `)); 1315 assert(CaseInsensitive("aBci").successful); 1316 1317 // Verifying that ordinary literals are case sensitive. 1318 mixin(grammar(` 1319 CaseSensitive: 1320 S <- CS i 1321 CS <- "abc" 1322 i <- "i" 1323 `)); 1324 assert(!CaseSensitive("aBci").successful); 1325 assert(CaseSensitive("abci").successful); 1326 } 1327 1328 unittest // Multilines rules 1329 { 1330 mixin(grammar(` 1331 Indentation: 1332 Rule1 < 1333 'a' 1334 'b' 1335 'c' 1336 Rule2 1337 <- 1338 'd' 1339 Rule3 1340 < 1341 'e' 1342 Rule4 <- 'f' Rule5 # Rule4 ends with 'f', then it's Rule5 1343 <- 'g' 1344 1345 1346 1347 1348 'h' 1349 `)); 1350 1351 1352 assert(Indentation("abc").successful); 1353 assert(Indentation.Rule2("d").successful); 1354 assert(Indentation.Rule3("e").successful); 1355 assert(Indentation.Rule4("f").successful); 1356 assert(Indentation.Rule5("gh").successful); 1357 } 1358 1359 unittest // Parsing at compile-time 1360 { 1361 mixin(grammar(` 1362 Test: 1363 Rule1 <- 'a' Rule2('b') 1364 Rule2(B) <- B 1365 `)); 1366 1367 // Equality on success 1368 ParseTree result = Test("ab"); 1369 1370 enum CTsuccess = Test("ab"); 1371 1372 assert(CTsuccess == result, "Compile-time parsing is equal to runtime parsing on success."); 1373 1374 // Equality on failure 1375 result = Test("ac"); 1376 enum CTfailure = Test("ac"); 1377 1378 assert(CTfailure == result, "Compile-time parsing is equal to runtime parsing on failure."); 1379 } 1380 1381 unittest // PEG extensions (arrows, prefixes, suffixes) 1382 { 1383 mixin(grammar(` 1384 Arrows: 1385 Rule1 <- ABC DEF # Standard arrow 1386 Rule2 < ABC DEF # Space arrow 1387 Rule3 < ABC DEF* # Space arrow 1388 Rule4 < ABC+ DEF # Space arrow 1389 1390 Rule5 <- ABC* 1391 Rule6 <~ ABC* # Fuse arrow 1392 1393 Rule7 <: ABC DEF # Discard arrow 1394 Rule8 <^ ABC DEF # Keep arrow 1395 Rule9 <; ABC DEF # Drop arrow 1396 Rule10 <% ABC Rule1 DEF # Propagate arrow 1397 1398 ABC <- "abc" 1399 DEF <- "def" 1400 `)); 1401 1402 // Comparing <- ABC DEF and < ABC DEF 1403 ParseTree result = Arrows.decimateTree(Arrows.Rule1("abcdef")); 1404 assert(result.successful); 1405 assert(result.begin == 0); 1406 assert(result.end ==6); 1407 assert(result.matches == ["abc", "def"]); 1408 assert(result.children.length == 2); 1409 assert(result.children[0].name == "Arrows.ABC"); 1410 assert(result.children[1].name == "Arrows.DEF"); 1411 1412 result = Arrows.decimateTree(Arrows.Rule1("abc def")); 1413 assert(!result.successful); 1414 1415 result = Arrows.decimateTree(Arrows.Rule2("abcdef")); 1416 assert(result.successful); 1417 assert(result.begin == 0); 1418 assert(result.end == 6); 1419 assert(result.matches == ["abc", "def"]); 1420 assert(result.children.length == 2); 1421 assert(result.children[0].name == "Arrows.ABC"); 1422 assert(result.children[1].name == "Arrows.DEF"); 1423 1424 result = Arrows.decimateTree(Arrows.Rule2("abc def ")); 1425 assert(result.successful, "space arrows consume spaces."); 1426 assert(result.begin == 0); 1427 assert(result.end == "abc def ".length, "The entire input is parsed."); 1428 assert(result.matches == ["abc", "def"]); 1429 assert(result.children.length == 2); 1430 assert(result.children[0].name == "Arrows.ABC"); 1431 assert(result.children[1].name == "Arrows.DEF"); 1432 1433 result = Arrows.decimateTree(Arrows.Rule3("abcdefdef")); 1434 assert(result.successful); 1435 assert(result.begin == 0); 1436 assert(result.end == "abcdefdef".length); 1437 assert(result.matches == ["abc", "def", "def"]); 1438 assert(result.children.length == 3); 1439 assert(result.children[0].name == "Arrows.ABC"); 1440 assert(result.children[1].name == "Arrows.DEF"); 1441 assert(result.children[2].name == "Arrows.DEF"); 1442 1443 result = Arrows.decimateTree(Arrows.Rule3("abc def defdef")); 1444 assert(result.successful, "space arrows consume spaces."); 1445 assert(result.begin == 0); 1446 assert(result.end == "abc def defdef".length, "The entire input is parsed."); 1447 assert(result.matches == ["abc", "def", "def", "def"]); 1448 assert(result.children.length == 4); 1449 assert(result.children[0].name == "Arrows.ABC"); 1450 assert(result.children[1].name == "Arrows.DEF"); 1451 assert(result.children[2].name == "Arrows.DEF"); 1452 assert(result.children[3].name == "Arrows.DEF"); 1453 1454 result = Arrows.decimateTree(Arrows.Rule4("abcabcdef")); 1455 assert(result.successful); 1456 assert(result.begin == 0); 1457 assert(result.end == "abcabcdef".length); 1458 assert(result.matches == ["abc", "abc", "def"]); 1459 assert(result.children.length == 3); 1460 assert(result.children[0].name == "Arrows.ABC"); 1461 assert(result.children[1].name == "Arrows.ABC"); 1462 assert(result.children[2].name == "Arrows.DEF"); 1463 1464 result = Arrows.decimateTree(Arrows.Rule4(" abc abcabc def ")); 1465 assert(result.successful, "space arrows consume spaces."); 1466 assert(result.begin == 0); 1467 assert(result.end == " abc abcabc def ".length, "The entire input is parsed."); 1468 assert(result.matches == ["abc", "abc", "abc", "def"]); 1469 assert(result.children.length == 4); 1470 assert(result.children[0].name == "Arrows.ABC"); 1471 assert(result.children[1].name == "Arrows.ABC"); 1472 assert(result.children[2].name == "Arrows.ABC"); 1473 assert(result.children[3].name == "Arrows.DEF"); 1474 1475 //Comparing <- ABC* and <~ ABC* 1476 result = Arrows.decimateTree(Arrows.Rule5("abcabcabc")); 1477 assert(result.successful); 1478 assert(result.begin == 0); 1479 assert(result.end == "abcabcabc".length, "The entire input is parsed."); 1480 assert(result.matches == ["abc", "abc", "abc"]); 1481 assert(result.children.length == 3, "With the * operator, all children are kept."); 1482 assert(result.children[0].name == "Arrows.ABC"); 1483 assert(result.children[1].name == "Arrows.ABC"); 1484 assert(result.children[2].name == "Arrows.ABC"); 1485 1486 result = Arrows.decimateTree(Arrows.Rule6("abcabcabc")); 1487 assert(result.successful); 1488 assert(result.begin == 0); 1489 assert(result.end == "abcabcabc".length, "The entire input is parsed."); 1490 assert(result.matches == ["abcabcabc"], "Matches are fused."); 1491 assert(result.children.length == 0, "The <~ arrow cuts children."); 1492 1493 // Comparing <- ABC DEF and <: ABC DEF 1494 result = Arrows.decimateTree(Arrows.Rule7("abcdef")); 1495 assert(result.successful); 1496 assert(result.begin == 0); 1497 assert(result.end == "abcdef".length, "The entire input is parsed."); 1498 assert(result.matches is null, "No match with the discard arrow."); 1499 assert(result.children.length == 0, "No children with the discard arrow."); 1500 1501 // Comparing <- ABC DEF and <^ ABC DEF 1502 //But <^ is not very useful anyways. It does not distribute ^ among the subrules. 1503 result = Arrows.decimateTree(Arrows.Rule8("abcdef")); 1504 assert(result.successful); 1505 assert(result.begin == 0); 1506 assert(result.end == "abcdef".length, "The entire input is parsed."); 1507 assert(result.matches == ["abc", "def"]); 1508 assert(result.children[0].children.length == 2); 1509 1510 // Comparing <- ABC DEF and <; ABC DEF 1511 result = Arrows.decimateTree(Arrows.Rule9("abcdef")); 1512 assert(result.successful); 1513 assert(result.begin == 0); 1514 assert(result.end == "abcdef".length, "The entire input is parsed."); 1515 assert(result.matches == ["abc", "def"], "The drop arrow keeps the matches."); 1516 assert(result.children.length == 0, "The drop arrow drops the children."); 1517 1518 // Comparing <- ABC DEF and <% ABC Rule1 DEF 1519 //But <% is not very useful anyways. It does not distribute % among the subrules. 1520 result = Arrows.decimateTree(Arrows.Rule10("abcabcdefdef")); 1521 assert(result.successful); 1522 assert(result.begin == 0); 1523 assert(result.end == "abcabcdefdef".length, "The entire input is parsed."); 1524 assert(result.matches == ["abc", "abc", "def", "def"]); 1525 assert(result.children.length == 3); 1526 assert(result.children[0].name == "Arrows.ABC"); 1527 assert(result.children[1].name == "Arrows.Rule1", "No rule replacement by its own children. See % for that."); 1528 assert(result.children[2].name == "Arrows.DEF"); 1529 } 1530 1531 unittest //More space arrow tests 1532 { 1533 mixin(grammar(` 1534 Spaces: 1535 Rule1 < A (B C)+ 1536 A <- 'a' 1537 B <- 'b' 1538 C <- 'c' 1539 `)); 1540 1541 ParseTree result = Spaces.decimateTree(Spaces.Rule1("abcbc")); 1542 1543 assert(result.successful); 1544 assert(result.begin == 0); 1545 assert(result.end == "abcbc".length); 1546 assert(result.children.length == 5); 1547 assert(result.children[0].name == "Spaces.A"); 1548 assert(result.children[1].name == "Spaces.B"); 1549 assert(result.children[2].name == "Spaces.C"); 1550 assert(result.children[3].name == "Spaces.B"); 1551 assert(result.children[4].name == "Spaces.C"); 1552 1553 result = Spaces.decimateTree(Spaces.Rule1(" a bc b c ")); 1554 1555 assert(result.successful); 1556 assert(result.begin == 0); 1557 assert(result.end == " a bc b c ".length); 1558 assert(result.children.length == 5); 1559 assert(result.children[0].name == "Spaces.A"); 1560 assert(result.children[1].name == "Spaces.B"); 1561 assert(result.children[2].name == "Spaces.C"); 1562 assert(result.children[3].name == "Spaces.B"); 1563 assert(result.children[4].name == "Spaces.C"); 1564 } 1565 1566 unittest // Prefix and suffix tests 1567 { 1568 mixin(grammar(` 1569 PrefixSuffix: 1570 # Reference 1571 Rule1 <- ABC DEF 1572 Rule2 <- "abc" "def" 1573 Rule3 <- ABC* 1574 Rule4 <- "abc"* 1575 1576 # Discard operator 1577 Rule5 <- :ABC DEF 1578 Rule6 <- ABC :DEF 1579 Rule7 <- :"abc" "def" 1580 Rule8 <- "abc" :"def" 1581 1582 # Drop operator 1583 Rule9 <- ;ABC DEF 1584 Rule10 <- ABC ;DEF 1585 Rule11 <- ;"abc" "def" 1586 Rule12 <- "abc" ;"def" 1587 1588 # Fuse operator 1589 1590 Rule13 <- ~( ABC* ) 1591 Rule14 <- ~("abc"*) 1592 1593 # Keep operator 1594 Rule15 <- ^"abc" ^"def" 1595 1596 # Propagate operator 1597 Rule16 <- ABC Rule1 DEF 1598 Rule17 <- ABC %Rule1 DEF 1599 1600 1601 ABC <- "abc" 1602 DEF <- "def" 1603 `)); 1604 1605 1606 // Comparing standard and discarded rules 1607 auto result = PrefixSuffix.decimateTree(PrefixSuffix.Rule1("abcdef")); 1608 1609 assert(result.successful); 1610 assert(result.begin == 0); 1611 assert(result.end == 6); 1612 assert(result.matches == ["abc", "def"]); 1613 assert(result.children.length == 2); 1614 assert(result.children[0].name == "PrefixSuffix.ABC"); 1615 assert(result.children[1].name == "PrefixSuffix.DEF"); 1616 1617 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule5("abcdef")); 1618 1619 assert(result.successful); 1620 assert(result.begin == 0); 1621 assert(result.end == 6); 1622 assert(result.matches == ["def"]); 1623 assert(result.children.length == 1, "The first child is discarded."); 1624 assert(result.children[0].name == "PrefixSuffix.DEF"); 1625 1626 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule6("abcdef")); 1627 1628 assert(result.successful); 1629 assert(result.begin == 0); 1630 assert(result.end == 6); 1631 assert(result.matches == ["abc"]); 1632 assert(result.children.length == 1, "The second child is discarded."); 1633 assert(result.children[0].name == "PrefixSuffix.ABC"); 1634 1635 1636 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule2("abcdef")); 1637 1638 assert(result.successful); 1639 assert(result.begin == 0); 1640 assert(result.end == 6); 1641 assert(result.matches == ["abc", "def"]); 1642 assert(result.children.length == 0, "Literals do not create children."); 1643 1644 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule7("abcdef")); 1645 1646 assert(result.successful); 1647 assert(result.begin == 0); 1648 assert(result.end == 6); 1649 assert(result.matches == ["def"]); 1650 assert(result.children.length == 0); 1651 1652 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule8("abcdef")); 1653 1654 assert(result.successful); 1655 assert(result.begin == 0); 1656 assert(result.end == 6); 1657 assert(result.matches == ["abc"]); 1658 assert(result.children.length == 0); 1659 1660 // Comparing standard and dropped rules 1661 1662 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule9("abcdef")); 1663 1664 assert(result.successful); 1665 assert(result.begin == 0); 1666 assert(result.end == 6); 1667 assert(result.matches == ["abc", "def"], "All matches are there."); 1668 assert(result.children.length == 1, "The first child is discarded."); 1669 assert(result.children[0].name == "PrefixSuffix.DEF"); 1670 1671 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule10("abcdef")); 1672 1673 assert(result.successful); 1674 assert(result.begin == 0); 1675 assert(result.end == 6); 1676 assert(result.matches == ["abc", "def"], "All matches are there."); 1677 assert(result.children.length == 1, "The second child is discarded."); 1678 assert(result.children[0].name == "PrefixSuffix.ABC"); 1679 1680 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule11("abcdef")); 1681 1682 assert(result.successful); 1683 assert(result.begin == 0); 1684 assert(result.end == 6); 1685 assert(result.matches == ["abc", "def"], "All matches are there."); 1686 assert(result.children.length == 0); 1687 1688 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule12("abcdef")); 1689 1690 assert(result.successful); 1691 assert(result.begin == 0); 1692 assert(result.end == 6); 1693 assert(result.matches == ["abc", "def"], "All matches are there."); 1694 assert(result.children.length == 0); 1695 1696 1697 // Comparing standard and fused rules 1698 1699 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule3("abcabcabc")); 1700 1701 assert(result.successful); 1702 assert(result.begin == 0); 1703 assert(result.end == "abcabcabc".length); 1704 assert(result.matches == ["abc", "abc", "abc"]); 1705 assert(result.children.length == 3, "Standard '*': 3 children."); 1706 assert(result.children[0].name == "PrefixSuffix.ABC"); 1707 assert(result.children[1].name == "PrefixSuffix.ABC"); 1708 assert(result.children[2].name == "PrefixSuffix.ABC"); 1709 1710 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule4("abcabcabc")); 1711 1712 assert(result.successful); 1713 assert(result.begin == 0); 1714 assert(result.end == "abcabcabc".length); 1715 assert(result.matches == ["abc", "abc", "abc"]); 1716 assert(result.children.length == 0, "All literals are discarded by the tree decimation."); 1717 1718 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule13("abcabcabc")); 1719 1720 assert(result.successful); 1721 assert(result.begin == 0); 1722 assert(result.end == "abcabcabc".length); 1723 assert(result.matches == ["abcabcabc"], "All matches are fused."); 1724 assert(result.children.length == 0, "Children are discarded by '~'."); 1725 1726 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule14("abcabcabc")); 1727 1728 assert(result.successful); 1729 assert(result.begin == 0); 1730 assert(result.end == "abcabcabc".length); 1731 assert(result.matches == ["abcabcabc"], "All matches are there."); 1732 assert(result.children.length == 0, "Children are discarded by '~'."); 1733 1734 // Testing the keep (^) operator 1735 1736 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule15("abcdef")); 1737 1738 assert(result.successful); 1739 assert(result.begin == 0); 1740 assert(result.end == "abcdef".length); 1741 assert(result.matches == ["abc", "def"], "All matches are there."); 1742 assert(result.children.length == 2, "Both children are kept by '^'."); 1743 assert(result.children[0].name == `literal!("abc")`, 1744 `literal!("abc") is kept even though it's not part of the grammar rules.`); 1745 assert(result.children[1].name == `literal!("def")`, 1746 `literal!("def") is kept even though it's not part of the grammar rules.`); 1747 1748 // Comparing standard and propagated (%) rules. 1749 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule16("abcabcdefdef")); 1750 1751 assert(result.successful); 1752 assert(result.begin == 0); 1753 assert(result.end == "abcabcdefdef".length); 1754 assert(result.matches == ["abc", "abc", "def", "def"], "All matches are there."); 1755 assert(result.children.length == 3, "Standard rule: three children."); 1756 assert(result.children[0].name == "PrefixSuffix.ABC"); 1757 assert(result.children[1].name == "PrefixSuffix.Rule1"); 1758 assert(result.children[1].children.length == 2, "Rule1 creates two children."); 1759 assert(result.children[1].children[0].name, "PrefixSuffix.ABC"); 1760 assert(result.children[1].children[1].name, "PrefixSuffix.DEF"); 1761 assert(result.children[2].name == "PrefixSuffix.DEF"); 1762 1763 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule17("abcabcdefdef")); 1764 1765 // From (ABC, Rule1(ABC,DEF), DEF) to (ABC,ABC,DEF,DEF) 1766 assert(result.successful); 1767 assert(result.begin == 0); 1768 assert(result.end == "abcabcdefdef".length); 1769 assert(result.matches == ["abc", "abc", "def", "def"], "All matches are there."); 1770 assert(result.children.length == 4, "%-affected rule: four children."); 1771 assert(result.children[0].name == "PrefixSuffix.ABC"); 1772 assert(result.children[1].name == "PrefixSuffix.ABC"); 1773 assert(result.children[2].name == "PrefixSuffix.DEF"); 1774 assert(result.children[2].name == "PrefixSuffix.DEF"); 1775 1776 // Testing % and < together 1777 mixin(grammar(` 1778 PropTest: 1779 Rule1 < B C+ 1780 Rule2 <- B (%C)+ 1781 Rule3 < B (%C)+ 1782 Rule4 < B %(D E)+ 1783 1784 B <- 'b' 1785 C <- D E 1786 D <- 'd' 1787 E <- 'e' 1788 `)); 1789 1790 result = PropTest.decimateTree(PropTest.Rule1("bdedede")); 1791 assert(result.successful); 1792 assert(result.begin == 0); 1793 assert(result.end == "bdedede".length); 1794 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1795 assert(result.children.length == 4, "b and de, de, de"); 1796 assert(result.children[0].name == "PropTest.B"); 1797 assert(result.children[1].name == "PropTest.C"); 1798 assert(result.children[2].name == "PropTest.C"); 1799 assert(result.children[3].name == "PropTest.C"); 1800 1801 result = PropTest.decimateTree(PropTest.Rule2("bdedede")); 1802 assert(result.successful); 1803 assert(result.begin == 0); 1804 assert(result.end == "bdedede".length); 1805 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1806 assert(result.children.length == 7, "b and (d and e), thrice."); 1807 assert(result.children[0].name == "PropTest.B"); 1808 assert(result.children[1].name == "PropTest.D"); 1809 assert(result.children[2].name == "PropTest.E"); 1810 assert(result.children[3].name == "PropTest.D"); 1811 assert(result.children[4].name == "PropTest.E"); 1812 assert(result.children[5].name == "PropTest.D"); 1813 assert(result.children[6].name == "PropTest.E"); 1814 1815 result = PropTest.decimateTree(PropTest.Rule3("bdedede")); 1816 assert(result.successful); 1817 assert(result.begin == 0); 1818 assert(result.end == "bdedede".length); 1819 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1820 assert(result.children.length == 7, "b and (d and e), thrice."); 1821 assert(result.children[0].name == "PropTest.B"); 1822 assert(result.children[1].name == "PropTest.D"); 1823 assert(result.children[2].name == "PropTest.E"); 1824 assert(result.children[3].name == "PropTest.D"); 1825 assert(result.children[4].name == "PropTest.E"); 1826 assert(result.children[5].name == "PropTest.D"); 1827 assert(result.children[6].name == "PropTest.E"); 1828 1829 result = PropTest.decimateTree(PropTest.Rule3(" b de de de ")); 1830 assert(result.successful); 1831 assert(result.begin == 0); 1832 assert(result.end == " b de de de ".length); 1833 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1834 assert(result.children.length == 7, "b and (d and e), thrice."); 1835 assert(result.children[0].name == "PropTest.B"); 1836 assert(result.children[1].name == "PropTest.D"); 1837 assert(result.children[2].name == "PropTest.E"); 1838 assert(result.children[3].name == "PropTest.D"); 1839 assert(result.children[4].name == "PropTest.E"); 1840 assert(result.children[5].name == "PropTest.D"); 1841 assert(result.children[6].name == "PropTest.E"); 1842 1843 result = PropTest.decimateTree(PropTest.Rule4("bdedede")); 1844 assert(result.successful); 1845 assert(result.begin == 0); 1846 assert(result.end == "bdedede".length); 1847 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1848 assert(result.children.length == 7, "b and (d and e), thrice."); 1849 assert(result.children[0].name == "PropTest.B"); 1850 assert(result.children[1].name == "PropTest.D"); 1851 assert(result.children[2].name == "PropTest.E"); 1852 assert(result.children[3].name == "PropTest.D"); 1853 assert(result.children[4].name == "PropTest.E"); 1854 assert(result.children[5].name == "PropTest.D"); 1855 assert(result.children[6].name == "PropTest.E"); 1856 1857 result = PropTest.decimateTree(PropTest.Rule4(" b de de de ")); 1858 assert(result.successful); 1859 assert(result.begin == 0); 1860 assert(result.end == " b de de de ".length); 1861 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1862 assert(result.children.length == 7, "b and (d and e), thrice."); 1863 assert(result.children[0].name == "PropTest.B"); 1864 assert(result.children[1].name == "PropTest.D"); 1865 assert(result.children[2].name == "PropTest.E"); 1866 assert(result.children[3].name == "PropTest.D"); 1867 assert(result.children[4].name == "PropTest.E"); 1868 assert(result.children[5].name == "PropTest.D"); 1869 assert(result.children[6].name == "PropTest.E"); 1870 1871 // More than one prefix, more than one suffixes 1872 mixin(grammar(` 1873 MoreThanOne: 1874 Rule1 <- ~:("abc"*) # Two prefixes (nothing left for ~, after :) 1875 Rule2 <- :~("abc"*) # Two prefixes (: will discard everything ~ did) 1876 Rule3 <- ;:~"abc" # Many prefixes 1877 Rule4 <- ~~~("abc"*) # Many fuses (no global effect) 1878 1879 Rule5 <- "abc"+* # Many suffixes 1880 Rule6 <- "abc"+? # Many suffixes 1881 1882 Rule7 <- !!"abc" # Double negation, equivalent to '&' 1883 Rule8 <- &"abc" 1884 1885 Rule9 <- ^^"abc"+* # Many suffixes and prefixes 1886 `)); 1887 1888 assert(is(MoreThanOne), "This compiles all right."); 1889 1890 result = MoreThanOne.decimateTree(MoreThanOne.Rule1("abcabcabc")); 1891 assert(result.successful); 1892 assert(result.begin == 0); 1893 assert(result.end == "abcabcabc".length); 1894 assert(result.matches is null); 1895 assert(result.children.length == 0); 1896 1897 result = MoreThanOne.decimateTree(MoreThanOne.Rule2("abcabcabc")); 1898 assert(result.successful); 1899 assert(result.begin == 0); 1900 assert(result.end == "abcabcabc".length); 1901 assert(result.matches is null); 1902 assert(result.children.length == 0); 1903 1904 result = MoreThanOne.decimateTree(MoreThanOne.Rule3("abcabcabc")); 1905 assert(result.successful); 1906 assert(result.begin == 0); 1907 assert(result.end == "abc".length); 1908 assert(result.matches is null); 1909 assert(result.children.length == 0); 1910 1911 result = MoreThanOne.decimateTree(MoreThanOne.Rule4("abcabcabc")); 1912 assert(result.successful); 1913 assert(result.begin == 0); 1914 assert(result.end == "abcabcabc".length); 1915 assert(result.matches == ["abcabcabc"]); 1916 assert(result.children.length == 0); 1917 1918 // +* and +? 1919 result = MoreThanOne.decimateTree(MoreThanOne.Rule5("abcabcabc")); 1920 assert(result.successful); 1921 assert(result.begin == 0); 1922 assert(result.end == "abcabcabc".length); 1923 assert(result.matches == ["abc", "abc", "abc"]); 1924 assert(result.children.length == 0); 1925 1926 result = MoreThanOne.decimateTree(MoreThanOne.Rule6("abcabcabc")); 1927 assert(result.successful); 1928 assert(result.begin == 0); 1929 assert(result.end == "abcabcabc".length); 1930 assert(result.matches == ["abc", "abc", "abc"]); 1931 assert(result.children.length == 0); 1932 1933 // !! is equivalent to & 1934 result = MoreThanOne.decimateTree(MoreThanOne.Rule7("abc")); 1935 assert(result.successful); 1936 assert(result.begin == 0); 1937 assert(result.end == 0); 1938 assert(result.matches is null); 1939 assert(result.children.length == 0); 1940 1941 result = MoreThanOne.decimateTree(MoreThanOne.Rule8("abc")); 1942 assert(result.successful); 1943 assert(result.begin == 0); 1944 assert(result.end == 0); 1945 assert(result.matches is null); 1946 assert(result.children.length == 0); 1947 1948 // ^^"abc"+* 1949 result = MoreThanOne.decimateTree(MoreThanOne.Rule9("abcabcabc")); 1950 assert(result.successful); 1951 assert(result.begin == 0); 1952 assert(result.end == 9); 1953 assert(result.matches == ["abc", "abc", "abc"]); 1954 assert(result.children.length == 1); 1955 assert(result.name == `MoreThanOne.Rule9`); 1956 assert(result.children[0].name == `keep!(zeroOrMore!(oneOrMore!(literal!("abc"))))`); 1957 assert(result.children[0].children.length == 1); 1958 assert(result.children[0].children[0].name == `zeroOrMore!(oneOrMore!(literal!("abc")))`); 1959 assert(result.children[0].children[0].children.length == 1); 1960 assert(result.children[0].children[0].children[0].name == `oneOrMore!(literal!("abc"))`); 1961 assert(result.children[0].children[0].children[0].children.length == 3); 1962 assert(result.children[0].children[0].children[0].children[0].name == `literal!("abc")`); 1963 assert(result.children[0].children[0].children[0].children[1].name == `literal!("abc")`); 1964 assert(result.children[0].children[0].children[0].children[2].name == `literal!("abc")`); 1965 } 1966 1967 unittest // Issue #88 unit test 1968 { 1969 enum gram = ` 1970 P: 1971 Rule1 <- (w 'a' w)* 1972 Rule2 <- (wx 'a' wx)* 1973 w <- :(' ' / '\n' / '\t' / '\r')* 1974 wx <- (:' ' / '\n' / '\t' / '\r')* 1975 `; 1976 1977 mixin(grammar(gram)); 1978 1979 string input = " a a a a a a "; 1980 1981 ParseTree p1 = P.decimateTree(P.Rule1(input)); 1982 ParseTree p2 = P.decimateTree(P.Rule2(input)); 1983 assert(softCompare(p1,p2)); 1984 1985 input = " a\n \011\012 a\n\t a\x09\x0A a "; 1986 p1 = P.decimateTree(P.Rule1(input)); 1987 p2 = P.decimateTree(P.Rule2(input)); 1988 assert(p1.end == input.length); // Parse the entire string 1989 assert(p2.end == input.length); 1990 } 1991 1992 unittest // Leading alternation 1993 { 1994 mixin(grammar(` 1995 LeadingAlternation: 1996 Rule1 <- / 'a' 1997 Rule2 <- / 'a' / 'b' 1998 Rule3 <- (/ 'a' / 'b') 1999 `)); 2000 2001 ParseTree result = LeadingAlternation.decimateTree(LeadingAlternation.Rule1("a")); 2002 assert(result.successful); 2003 assert(result.begin == 0); 2004 assert(result.end == 1); 2005 assert(result.matches == ["a"]); 2006 2007 result = LeadingAlternation.decimateTree(LeadingAlternation.Rule2("b")); 2008 assert(result.successful); 2009 assert(result.begin == 0); 2010 assert(result.end == 1); 2011 assert(result.matches == ["b"]); 2012 2013 result = LeadingAlternation.decimateTree(LeadingAlternation.Rule3("b")); 2014 assert(result.successful); 2015 assert(result.begin == 0); 2016 assert(result.end == 1); 2017 assert(result.matches == ["b"]); 2018 } 2019 2020 unittest // Extended chars tests 2021 { 2022 mixin(grammar(" 2023 Chars: 2024 # Lone chars 2025 Rule1 <- '\t' '0' 'A' '~' 2026 Rule2 <- '\11' '\60' '\101' '\176' # \t 0 A ~ in octal 2027 Rule3 <- '\011' '\060' '\101' '\176' # \t 0 A ~ in octal (prefix 0) 2028 Rule4 <- '\x09' '\x30' '\x41' '\x7E' # \t 0 A ~ in hexadecimal 2029 Rule5 <- '\u0009' '\u0030' '\u0041' '\u007E' # \t 0 A ~ in unicode 2030 Rule6 <- '\U00000009' '\U00000030' '\U00000041' '\U0000007E' # \t 0 A ~ in unicode 2031 2032 # Strings literals 2033 Rule7 <- '\t0A~' 2034 Rule8 <- '\11\60\101\176' # \t 0 A ~ in octal 2035 Rule9 <- '\011\060\101\176' # \t 0 A ~ in octal (prefix 0) 2036 Rule10 <- '\x09\x30\x41\x7E' # \t 0 A ~ in hexadecimal 2037 Rule11 <- '\u0009\u0030\u0041\u007E' # \t 0 A ~ in unicode 2038 Rule12 <- '\U00000009\U00000030\U00000041' '\U0000007E' # \t 0 A ~ in unicode 2039 2040 # Outside Latin 2041 Rule13 <- '\u03B1\u03B9\u03C6\u03B1' # alpha in greek 2042 Rule14 <- 'αιφα' 2043 2044 # Hello's 2045 English <- 'Hello' 2046 Russian <- 'Здравствуйте' 2047 Arabic <- 'السلام عليك' 2048 Chinese <- '你好' 2049 Japanese <- '今日は' 2050 Spanish <- '¡Hola!' 2051 ")); 2052 2053 2054 assert(Chars.decimateTree(Chars.Rule1("\t0A~")).successful); 2055 assert(Chars.decimateTree(Chars.Rule2("\t0A~")).successful); 2056 assert(Chars.decimateTree(Chars.Rule3("\t0A~")).successful); 2057 assert(Chars.decimateTree(Chars.Rule4("\t0A~")).successful); 2058 assert(Chars.decimateTree(Chars.Rule5("\t0A~")).successful); 2059 assert(Chars.decimateTree(Chars.Rule6("\t0A~")).successful); 2060 2061 assert(Chars.decimateTree(Chars.Rule7("\t0A~")).successful); 2062 assert(Chars.decimateTree(Chars.Rule8("\t0A~")).successful); 2063 assert(Chars.decimateTree(Chars.Rule9("\t0A~")).successful); 2064 assert(Chars.decimateTree(Chars.Rule10("\t0A~")).successful); 2065 assert(Chars.decimateTree(Chars.Rule11("\t0A~")).successful); 2066 assert(Chars.decimateTree(Chars.Rule12("\t0A~")).successful); 2067 2068 assert(Chars.decimateTree(Chars.Rule13("\u03B1\u03B9\u03C6\u03B1")).successful); 2069 assert(Chars.decimateTree(Chars.Rule13("αιφα")).successful); 2070 2071 assert(Chars.decimateTree(Chars.Rule14("\u03B1\u03B9\u03C6\u03B1")).successful); 2072 assert(Chars.decimateTree(Chars.Rule14("αιφα")).successful); 2073 2074 assert(Chars.decimateTree(Chars.English("Hello")).successful); 2075 assert(Chars.decimateTree(Chars.Russian("Здравствуйте")).successful); 2076 assert(Chars.decimateTree(Chars.Arabic("السلام عليك")).successful); 2077 assert(Chars.decimateTree(Chars.Chinese("你好")).successful); 2078 assert(Chars.decimateTree(Chars.Japanese("今日は'")).successful); 2079 assert(Chars.decimateTree(Chars.Spanish("¡Hola!")).successful); 2080 } 2081 2082 unittest // Extended char range tests 2083 { 2084 import std.conv; 2085 2086 mixin(grammar(` 2087 CharRanges: 2088 Rule1 <- [a-z] 2089 Rule2 <- [\141-\172] # a-z in octal 2090 Rule3 <- [\x61-\x7A] # a-z in hexadecimal 2091 Rule4 <- [\u0061-\u007A] # a-z in UTF16 2092 Rule5 <- [\U00000061-\U0000007A] # a-z in UTF32 2093 2094 Rule6 <- [\-\[\]\\\'\"\n\r\t] 2095 `)); 2096 2097 string lower = "abcdefghijklmnopqrstuvwxyz"; 2098 string upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 2099 string digits = "0123456789"; 2100 string others = "?./,;:!*&()[]<>"; 2101 string escapes = "-[]\\\'\"\n\r\t"; 2102 2103 foreach(dchar c; lower) 2104 { 2105 assert(CharRanges.Rule1(to!string(c)).successful); 2106 assert(CharRanges.Rule2(to!string(c)).successful); 2107 assert(CharRanges.Rule3(to!string(c)).successful); 2108 assert(CharRanges.Rule4(to!string(c)).successful); 2109 assert(CharRanges.Rule5(to!string(c)).successful); 2110 } 2111 2112 foreach(dchar c; upper ~ digits ~ others) 2113 { 2114 assert(!CharRanges.Rule1(to!string(c)).successful); 2115 assert(!CharRanges.Rule2(to!string(c)).successful); 2116 assert(!CharRanges.Rule3(to!string(c)).successful); 2117 assert(!CharRanges.Rule4(to!string(c)).successful); 2118 assert(!CharRanges.Rule5(to!string(c)).successful); 2119 } 2120 2121 foreach(dchar c; escapes) 2122 { 2123 assert(CharRanges.Rule6(to!string(c)).successful); 2124 } 2125 } 2126 2127 unittest // qualified names for rules 2128 { 2129 mixin(grammar(` 2130 First: 2131 Rule1 <- "abc" 2132 Rule2 <- "def" 2133 `)); 2134 2135 mixin(grammar(` 2136 Second: 2137 Rule1 <- First.Rule1 2138 Rule2 <- First.Rule2 2139 Rule3 <- pegged.peg.list(pegged.peg.identifier, ',') 2140 `)); 2141 2142 // Equal on success 2143 ParseTree reference = First("abc"); 2144 ParseTree result = Second("abc"); 2145 assert(reference.successful); 2146 assert(result.successful); 2147 assert(result.matches == reference.matches); 2148 assert(result.begin == reference.begin); 2149 assert(result.end == reference.end); 2150 2151 // Equal on failure 2152 reference = First("def"); 2153 result = Second("def"); 2154 assert(!reference.successful); 2155 assert(!result.successful); 2156 assert(result.matches == reference.matches); 2157 assert(result.begin == reference.begin); 2158 assert(result.end == reference.end); 2159 2160 // Second rule test 2161 reference = First.Rule2("def"); 2162 result = Second.Rule2("def"); 2163 assert(reference.successful); 2164 assert(result.matches == reference.matches); 2165 assert(result.begin == reference.begin); 2166 assert(result.end == reference.end); 2167 2168 // External (predefined) rule call: 2169 result = Second.Rule3("foo,bar,baz"); 2170 assert(result.successful); 2171 assert(result.begin == 0); 2172 assert(result.end == "foo,bar,baz".length); 2173 assert(result.matches == ["foo", "bar", "baz"]); 2174 } 2175 2176 unittest // Parameterized rules 2177 { 2178 mixin(grammar(` 2179 Parameterized: 2180 # Different arities 2181 Rule1(A) <- A+ 2182 Rule1(A,B) <- (A B)+ 2183 Rule1(A,B,C) <- (A B C)+ 2184 2185 # Inner call 2186 Call1 <- Rule1('a') 2187 Call2 <- Rule1('a','b') 2188 Call3 <- Rule1('a','b','c') 2189 Call4(A) <- Rule1(A, A, A) 2190 Call5(A) <- Rule1('a', A, 'c') 2191 2192 # Default values 2193 Rule2(A = 'a', B = 'b') <- (A B)+ 2194 2195 # Re-using the parameters 2196 Rule3(A,B) <- A B B A # Money money money! 2197 2198 # The standard parameterized rule 2199 List(Elem, Sep) < Elem (:Sep Elem)* 2200 2201 # Another common PEG pattern 2202 AllUntil(End) <~ (!End .)* :End 2203 `)); 2204 2205 alias Parameterized.Rule1!(literal!"a") R1; 2206 alias oneOrMore!(literal!"a") Ref1; 2207 2208 ParseTree reference = Ref1("aaaa"); 2209 ParseTree result = R1("aaaa"); 2210 2211 assert(result.name == `Parameterized.Rule1!(literal!("a"))`); 2212 assert(reference.successful); 2213 assert(result.successful); 2214 assert(result.matches == reference.matches); 2215 assert(result.begin == reference.begin); 2216 assert(result.end == reference.end); 2217 2218 result = Parameterized.Call1("aaaa"); 2219 2220 assert(result.name == `Parameterized.Call1`); 2221 assert(result.successful); 2222 assert(result.matches == reference.matches); 2223 assert(result.begin == reference.begin); 2224 assert(result.end == reference.end); 2225 2226 alias Parameterized.Rule1!(literal!"abc") R1long; 2227 alias oneOrMore!(literal!"abc") Ref1long; 2228 2229 reference = Ref1long("abcabcabcabc"); 2230 result = R1long("abcabcabcabc"); 2231 2232 assert(result.name == `Parameterized.Rule1!(literal!("abc"))`); 2233 assert(reference.successful); 2234 assert(result.successful); 2235 assert(result.matches == reference.matches); 2236 assert(result.begin == reference.begin); 2237 assert(result.end == reference.end); 2238 2239 alias Parameterized.Rule1!(literal!"a", literal!"b") R2; 2240 alias oneOrMore!(and!(literal!"a", literal!"b")) Ref2; 2241 2242 reference = Ref2("abababab"); 2243 result = R2("abababab"); 2244 2245 assert(result.name == `Parameterized.Rule1!(literal!("a"), literal!("b"))`); 2246 assert(reference.successful); 2247 assert(result.successful); 2248 assert(result.matches == reference.matches); 2249 assert(result.begin == reference.begin); 2250 assert(result.end == reference.end); 2251 2252 result = Parameterized.Call2("abababab"); 2253 2254 assert(result.name == `Parameterized.Call2`); 2255 assert(result.successful); 2256 assert(result.matches == reference.matches); 2257 assert(result.begin == reference.begin); 2258 assert(result.end == reference.end); 2259 2260 alias Parameterized.Rule1!(literal!"a", literal!"b", literal!"c") R3; 2261 alias oneOrMore!(and!(literal!"a", literal!"b", literal!"c")) Ref3; 2262 2263 reference = Ref3("abcabcabcabc"); 2264 result = R3("abcabcabcabc"); 2265 2266 assert(result.name == `Parameterized.Rule1!(literal!("a"), literal!("b"), literal!("c"))`); 2267 assert(reference.successful); 2268 assert(result.successful); 2269 assert(result.matches == reference.matches); 2270 assert(result.begin == reference.begin); 2271 assert(result.end == reference.end); 2272 2273 result = Parameterized.Call3("abcabcabcabc"); 2274 2275 assert(result.name == `Parameterized.Call3`); 2276 assert(result.successful); 2277 assert(result.matches == reference.matches); 2278 assert(result.begin == reference.begin); 2279 assert(result.end == reference.end); 2280 2281 result = Parameterized.Call4!(literal!"A")("AAAAAA"); 2282 2283 assert(result.name == `Parameterized.Call4!(literal!("A"))`); 2284 assert(result.successful); 2285 assert(result.begin == 0); 2286 assert(result.end == "AAAAAA".length); 2287 assert(result.matches == ["A","A","A","A","A","A"]); 2288 2289 result = Parameterized.Call5!(literal!"A")("aAcaAc"); 2290 2291 assert(result.name == `Parameterized.Call5!(literal!("A"))`); 2292 assert(result.successful); 2293 assert(result.begin == 0); 2294 assert(result.end == "aAcaAc".length); 2295 assert(result.matches == ["a","A","c","a","A","c"]); 2296 2297 // Default parameters 2298 alias Parameterized.Rule2!() R2_1; 2299 alias Parameterized.Rule2!(literal!"a") R2_2; 2300 alias Parameterized.Rule2!(literal!"a", literal!"b") R2_3; 2301 2302 assert(R2_1("ababab").successful); 2303 assert(R2_2("ababab").successful); 2304 assert(R2_3("ababab").successful); 2305 2306 // Re-using a parameter (A B B A) 2307 result = Parameterized.Rule3!(literal!"A", literal!"B")("ABBA"); 2308 2309 assert(result.name == `Parameterized.Rule3!(literal!("A"), literal!("B"))`); 2310 assert(result.successful); 2311 assert(result.begin == 0); 2312 assert(result.end == "ABBA".length); 2313 assert(result.matches == ["A", "B", "B", "A"]); 2314 2315 alias Parameterized.List!(identifier, literal!",") IdList; // Identifiers separated by ',' 2316 alias Parameterized.List!(IdList, literal!";") IdList2; // IdList's separated by ';' 2317 2318 result = IdList("foo, bar, baz"); 2319 2320 assert(result.name == `Parameterized.List!(identifier, literal!(","))`); 2321 assert(result.successful); 2322 assert(result.begin == 0); 2323 assert(result.end == "foo, bar, baz".length); 2324 assert(result.matches == ["foo", "bar", "baz"]); 2325 2326 result = Parameterized.decimateTree(IdList2("foo,bar,baz; abc, def, ghi")); 2327 2328 assert(result.name == `Parameterized.List!(Parameterized.List!(identifier, literal!(",")), literal!(";"))`); 2329 assert(result.successful); 2330 assert(result.begin == 0); 2331 assert(result.end == "foo,bar,baz; abc, def, ghi".length); 2332 assert(result.matches == ["foo", "bar", "baz", "abc", "def", "ghi"]); 2333 2334 assert(result.children.length == 2); 2335 2336 assert(result.children[0].name == `Parameterized.List!(identifier, literal!(","))`); 2337 assert(result.children[0].matches == ["foo", "bar", "baz"]); 2338 2339 assert(result.children[1].name == `Parameterized.List!(identifier, literal!(","))`); 2340 assert(result.children[1].matches == ["abc", "def", "ghi"]); 2341 2342 alias Parameterized.AllUntil!(or!(endOfLine)) Line; 2343 alias zeroOrMore!(Line) Lines; 2344 2345 string input = 2346 "This is an input text. 2347 Here is another line. 2348 2349 And the last one. 2350 "; 2351 2352 result = Lines(input); 2353 assert(result.successful); 2354 assert(result.children.length == 4); 2355 assert(result.children[0].matches == ["This is an input text."]); 2356 assert(result.children[1].matches == ["Here is another line."]); 2357 assert(result.children[2].matches is null); 2358 assert(result.children[3].matches == [" And the last one."]); 2359 2360 // Parameterized grammar test 2361 mixin(grammar(` 2362 Arithmetic(Atom) : 2363 Expr < Factor (('+'/'-') Factor)* 2364 Factor < Primary (('*'/'/') Primary)* 2365 Primary < '(' Expr ')' / '-' Expr / Atom 2366 `)); 2367 2368 alias Arithmetic!(identifier) Arith1; 2369 alias Arithmetic!(or!(identifier, digits)) Arith2; 2370 2371 assert(Arith1("x + y*z/foo").successful); 2372 assert(Arith2("x + y*z/foo").successful); 2373 2374 assert(!Arith1("1 + 2*3/456").successful); 2375 assert(Arith2("1 + 2*3/456").successful); 2376 assert(Arith2("1 + 2*3/z").successful); 2377 } 2378 2379 version(unittest) // Semantic actions 2380 { 2381 P doubler(P)(P p) 2382 { 2383 if (p.successful) 2384 p.matches ~= p.matches; 2385 return p; 2386 } 2387 } 2388 2389 unittest // Semantic actions, testing { foo } and { foo, bar, baz } 2390 { 2391 mixin(grammar(` 2392 Semantic: 2393 Rule1 <- 'a' {doubler} 2394 Rule2 <- 'b' {doubler, doubler} 2395 Rule3 <- 'b' {doubler} {doubler} # Same as Rule2 2396 Rule4 <- 'b' {doubler, doubler, doubler} 2397 Rule5 <- 'a' {doubler} 'b' 'c'{doubler} 2398 Rule6 <{doubler} 'a' # Rule Level actions 2399 Rule7 <{doubler} 'a' 'b' {doubler} # Rule Level actions 2400 `)); 2401 2402 ParseTree result = Semantic.decimateTree(Semantic.Rule1("a")); 2403 assert(result.successful); 2404 assert(result.matches == ["a", "a"]); 2405 2406 result = Semantic.decimateTree(Semantic.Rule1("b")); 2407 assert(!result.successful); 2408 assert(result.matches == [`"a"`]); 2409 2410 result = Semantic.decimateTree(Semantic.Rule2("b")); 2411 assert(result.successful); 2412 assert(result.matches == ["b", "b", "b", "b"]); 2413 2414 result = Semantic.decimateTree(Semantic.Rule3("b")); 2415 assert(result.successful); 2416 assert(result.matches == ["b", "b", "b", "b"]); 2417 2418 result = Semantic.decimateTree(Semantic.Rule4("b")); 2419 assert(result.successful); 2420 assert(result.matches == ["b", "b", "b", "b", "b", "b", "b", "b"]); 2421 2422 result = Semantic.decimateTree(Semantic.Rule5("abc")); 2423 assert(result.successful); 2424 assert(result.matches == ["a", "a", "b", "c", "c"]); 2425 2426 result = Semantic.decimateTree(Semantic.Rule6("abc")); 2427 assert(result.successful); 2428 assert(result.matches == ["a", "a"]); 2429 2430 result = Semantic.decimateTree(Semantic.Rule7("abc")); 2431 assert(result.successful); 2432 assert(result.matches == ["a", "b", "b", "a", "b", "b"]); 2433 2434 } 2435 2436 version(unittest) 2437 { 2438 P foo(P)(P p) { return p;} // for testing actions 2439 2440 void badGrammar(string s)() 2441 { 2442 assert(!__traits(compiles, {mixin(grammar(s));}), "This should fail: " ~ s); 2443 } 2444 2445 void goodGrammar(string s)() 2446 { 2447 assert(__traits(compiles, {mixin(grammar(s));}), "This should work: " ~ s); 2448 } 2449 } 2450 2451 2452 /+ Failed (commit 4cd177a), DMD crashed. Too many grammar istantiations, I guess. 2453 unittest // failure cases: unnamed grammar, no-rule grammar, syntax errors, etc. 2454 { 2455 // No grammar 2456 badGrammar!""; 2457 2458 // Name without colon nor rules 2459 badGrammar!"Name"; 2460 2461 // No rule 2462 badGrammar!"Name:"; 2463 badGrammar!"Name1 Name2"; 2464 2465 // Incomplete and badly formulated rules 2466 badGrammar!"Name: 2467 Rule1"; 2468 badGrammar!"Name: 2469 Rule1 Rule2"; 2470 badGrammar!"Name 2471 Rule1 Rule2"; 2472 badGrammar!"Name: 2473 Rule1 <-"; 2474 badGrammar!"Name: 2475 Rule1 <~"; 2476 badGrammar!"Name: 2477 Rule1 < "; 2478 badGrammar!"Name: 2479 Rule1 <%"; 2480 badGrammar!"Name: 2481 Rule1 <;"; 2482 badGrammar!"Name 2483 Rule1 <- <-"; 2484 2485 // Non-closing parenthesis, brackets and quotes 2486 badGrammar!"Name: 2487 Rule1 <- ('a'"; 2488 badGrammar!"Name: 2489 Rule1 <- 'a')"; 2490 badGrammar!"Name: 2491 Rule1 <- ('a'))"; 2492 badGrammar!"Name: 2493 Rule1 <- (('a')"; 2494 badGrammar!"Name: 2495 Rule1 <- 'a"; 2496 badGrammar!"Name: 2497 Rule1 <- a'"; 2498 badGrammar!`Name: 2499 Rule1 <- "a`; 2500 badGrammar!`Name: 2501 Rule1 <- a"`; 2502 badGrammar!`Name: 2503 Rule1 <- 'a"`; 2504 badGrammar!`Name: 2505 Rule1 <- "a'`; 2506 badGrammar!"Name: 2507 Rule1 <- [a"; 2508 badGrammar!"Name: 2509 Rule1 <- a]"; 2510 badGrammar!"Name: 2511 Rule1 <- [a]]"; 2512 // But <- [[a] is legal: matches '[' or 'a' 2513 goodGrammar!"Name: 2514 Rule1 <- [[a]"; 2515 2516 // Bad prefix/postfix 2517 badGrammar!"Name: 2518 Rule1 <- 'a'~"; 2519 badGrammar!"Name: 2520 Rule1 <- 'a'%"; 2521 badGrammar!"Name: 2522 Rule1 <- 'a'!"; 2523 badGrammar!"Name: 2524 Rule1 <- 'a'&"; 2525 badGrammar!"Name: 2526 Rule1 <- 'a';"; 2527 badGrammar!"Name: 2528 Rule1 <- *'a'"; 2529 badGrammar!"Name: 2530 Rule1 <- +'a'"; 2531 badGrammar!"Name: 2532 Rule1 <- ?'a'"; 2533 badGrammar!"Name: 2534 Rule1 <- 'a' {}"; 2535 // Foo is defined in a version(unittest) block 2536 badGrammar!"Name: 2537 Rule1 <- 'a' { foo"; 2538 badGrammar!"Name: 2539 Rule1 <- 'a' foo}"; 2540 badGrammar!"Name: 2541 Rule1 <- 'a' {foo}}"; // closing } 2542 badGrammar!"Name: 2543 Rule1 <- 'a' {{foo}"; // opening { 2544 badGrammar!"Name: 2545 Rule1 <- 'a' {foo,}"; // bad comma 2546 badGrammar!"Name: 2547 Rule1 <- 'a' {,foo}"; 2548 badGrammar!"Name: 2549 Rule1 <- {foo}"; // no rule before {}'s 2550 // DMD Bug :-( 2551 /+glue.c line 1150 dmd::virtual unsigned int Type::totym():Assertion `0' failed. 2552 badGrammar!"Name: 2553 Rule1 <- 'a' {bar}"; // bar not defined 2554 +/ 2555 2556 // choice ('/') syntax errors 2557 badGrammar!"Name: 2558 Rule1 <- 'a' /"; 2559 // But: <- / 'a' is legal (it's equivalent to: <- 'a') 2560 goodGrammar!"Name: 2561 Rule1 <- / 'a'"; 2562 badGrammar!"Name: 2563 Rule1 <- /"; 2564 badGrammar!"Name: 2565 Rule1 <- 'a' / / 'b'"; 2566 } 2567 +/ 2568 2569 unittest // Memoization testing 2570 { 2571 enum gram1 = ` 2572 Test1: 2573 Rule1 <- Rule2* 'b' # To force a long evaluation of aaaa... 2574 / Rule2* 'c' # before finding a 'b' or a 'c' 2575 Rule2 <- 'a' 2576 `; 2577 2578 enum gram2 = ` 2579 Test2: 2580 Rule1 <- Rule2* 'b' # To force a long evaluation of aaaa... 2581 / Rule2* 'c' # before finding a 'b' or a 'c' 2582 Rule2 <- 'a' 2583 `; 2584 2585 mixin(grammar!(Memoization.yes)(gram1)); 2586 mixin(grammar!(Memoization.no)(gram2)); 2587 2588 assert(is(typeof(Test1.memo))); 2589 assert(!is(typeof(Test2.memo))); 2590 2591 ParseTree result1 = Test1("aaaaaaac"); // Memo + Runtime 2592 enum ParseTree result2 = Test1("aaaaaaac"); // Memo + Compile-time. Note: there is no actual memoization at CT. 2593 ParseTree result3 = Test2("aaaaaaac"); // No memo + Runtime 2594 enum ParseTree result4 = Test2("aaaaaaac"); // No memo + Compile-time 2595 2596 assert(result1 == result2); 2597 assert(result3 == result4); 2598 2599 //Directly comparing result1 and result3 is not possible, for the grammar names are different 2600 assert(pegged.peg.softCompare(result1, result2)); 2601 assert(pegged.peg.softCompare(result1, result3)); 2602 assert(pegged.peg.softCompare(result1, result4)); 2603 } 2604 2605 unittest // Memoization reset in composed grammars. Issue #162 2606 { 2607 enum MathGrammar = ` 2608 Math: 2609 List <- Var (:',' Var)* 2610 Var <~ [a-zA-Z] 2611 `; 2612 enum LetGrammar = ` 2613 Let: 2614 Stmt <- Math.List ' be' 2615 `; 2616 2617 mixin(grammar(MathGrammar)); 2618 mixin(grammar!(Memoization.no)(LetGrammar)); 2619 2620 assert(Let("a,b be").successful); 2621 assert(Let("K,H,G be").successful); // This fails if the memoization table is 2622 // not cleared in all composed grammars. 2623 } 2624 2625 unittest // Test lambda syntax in semantic actions 2626 { 2627 import std.array; 2628 import std..string : strip; 2629 2630 auto actions = [ 2631 2632 // Normal action 2633 `{ myAction }`, 2634 2635 // List of normal actions 2636 `{ myAction, myAction2 }`, 2637 2638 // Simple do-nothing lambda 2639 `{ (a) {return a;} }`, 2640 2641 // Simple do-nothing lambda with formatting 2642 `{ (a) { 2643 return a; 2644 }}`, 2645 2646 // Lambda with commas and spurious braces to try and confuse it 2647 `{ (a, b) { 2648 string s = "}"; 2649 if (a.successful,) { 2650 s ~= q"<}>"; 2651 } else { 2652 { s ~= q"<}>"; /* } */ } 2653 } 2654 return a;} }`, 2655 2656 // List of mixed actions and lambdas 2657 `{ myAction , (a) {return a;}, myAction2 , (a) { /* , } */ return a; } }`, 2658 2659 // Ambiguous lambda (not sure it would compile if used... but it should parse) 2660 `{ myAction, a => transform(a), myAction2 }`, 2661 2662 // Something more convoluted 2663 "{ myAction, (a) { 2664 /* block } comment with } braces */ 2665 string s = `} {` // wysiwyg string with braces and line comment with brace } 2666 if (s is null) { 2667 // } 2668 } else { // scopes 2669 { // nested scopes 2670 writeln(q{ \"}\" }); // token string with nested string with brace 2671 } 2672 } 2673 2674 string s = `1,2,3,4,5` // commas separate actions 2675 2676 /+ } Crazy nesting block comment 2677 /+ } +/ 2678 /+ } +/ 2679 /+ /+ } +/ } +/ 2680 } 2681 +/ 2682 2683 q\"< } <}> <> <<}<>}>> >\"; // delimited string 2684 q\"[ [}] [] [[[ } ]]] ]\"; // delimited string 2685 q\"( () }(}) (((}))}) )\"; // delimited string 2686 q\"{ {} {} {{{}}} }\"; // delimited string 2687 q{ class {} {} struct void \"}\" } /* another token string } */ 2688 2689 struct S 2690 { 2691 void foo() {} 2692 void bar() {} 2693 } 2694 2695 return a; 2696 }, myAction2 }", 2697 ]; 2698 2699 auto results = [ 2700 [`myAction`], 2701 [`myAction`,`myAction2`], 2702 [`(a) {return a;}`], 2703 [`(a) { 2704 return a; 2705 }`], 2706 [`(a, b) { 2707 string s = "}"; 2708 if (a.successful,) { 2709 s ~= q"<}>"; 2710 } else { 2711 { s ~= q"<}>"; /* } */ } 2712 } 2713 return a;}`], 2714 [`myAction`,`(a) {return a;}`,`myAction2`,`(a) { /* , } */ return a; }`], 2715 [`myAction`,`a => transform(a)`,`myAction2`], 2716 [`myAction`,"(a) { 2717 /* block } comment with } braces */ 2718 string s = `} {` // wysiwyg string with braces and line comment with brace } 2719 if (s is null) { 2720 // } 2721 } else { // scopes 2722 { // nested scopes 2723 writeln(q{ \"}\" }); // token string with nested string with brace 2724 } 2725 } 2726 2727 string s = `1,2,3,4,5` // commas separate actions 2728 2729 /+ } Crazy nesting block comment 2730 /+ } +/ 2731 /+ } +/ 2732 /+ /+ } +/ } +/ 2733 } 2734 +/ 2735 2736 q\"< } <}> <> <<}<>}>> >\"; // delimited string 2737 q\"[ [}] [] [[[ } ]]] ]\"; // delimited string 2738 q\"( () }(}) (((}))}) )\"; // delimited string 2739 q\"{ {} {} {{{}}} }\"; // delimited string 2740 q{ class {} {} struct void \"}\" } /* another token string } */ 2741 2742 struct S 2743 { 2744 void foo() {} 2745 void bar() {} 2746 } 2747 2748 return a; 2749 }",`myAction2`] 2750 ]; 2751 2752 foreach(idx, act; actions) 2753 { 2754 auto grammar = `P: Rule <- RuleA ` ~ act ~ ` RuleA <- 'A'`; 2755 auto p = Pegged(grammar); 2756 2757 assert(p.successful); 2758 2759 auto action = p.children[0].children[1] // Pegged.Definition 2760 .children[2] // Pegged.Expression 2761 .children[0] // Pegged.FirstExpression 2762 .children[0] // Pegged.Sequence 2763 .children[0] // Pegged.Prefix 2764 .children[0] // Pegged.Suffix 2765 .children[1]; // Pegged.Action 2766 2767 assert(action.matches.length == results[idx].length); 2768 foreach(i, s; action.matches) 2769 assert(strip(s) == results[idx][i], 2770 "\nGot |"~s~"|" ~ "\nNeeded: |"~results[idx][i]~"|"); 2771 } 2772 } 2773 2774 unittest 2775 { 2776 // Higher-level word boundary test. 2777 mixin(grammar(` 2778 TestGrammar: 2779 2780 Foo < '{' 'X' '}' 2781 Bar < 'A' 'B' 2782 2783 Spacing <: 2784 / blank+ 2785 / blank* wordBoundary 2786 / wordBoundary blank* 2787 / ![a-zA-Z] 2788 / !. 2789 2790 `)); 2791 2792 auto pt = TestGrammar.Foo("{ X }"); 2793 assert(pt.successful); 2794 2795 pt = TestGrammar.Foo("{X}"); 2796 assert(pt.successful); 2797 2798 pt = TestGrammar.Bar("A B"); 2799 assert(pt.successful); 2800 2801 pt = TestGrammar.Bar("AB"); 2802 assert(!pt.successful); 2803 } 2804 2805 unittest // Issue #129 unit test 2806 { 2807 enum gram = ` 2808 G: 2809 A <- B 2810 B <- C 2811 C <- 'c' D 2812 D <- 'd' 2813 `; 2814 2815 mixin(grammar(gram)); 2816 2817 string input = "cd"; 2818 2819 ParseTree p = G(input); 2820 assert(p.successful); 2821 assert(p.name == "G"); 2822 assert(p.children.length == 1); 2823 assert(p.children[0].name == "G.A"); 2824 assert(p.children[0].children.length == 1); 2825 assert(p.children[0].children[0].name == "G.B"); 2826 assert(p.children[0].children[0].children.length == 1); 2827 assert(p.children[0].children[0].children[0].name == "G.C"); 2828 assert(p.children[0].children[0].children[0].children.length == 1); 2829 assert(p.children[0].children[0].children[0].children[0].name == "G.D"); 2830 assert(p.children[0].children[0].children[0].children[0].children.length == 0); 2831 } 2832 2833 unittest // Direct left-recursion 2834 { 2835 enum LeftGrammar = ` 2836 Left: 2837 S <- E eoi 2838 E <- E '+n' / 'n' 2839 `; 2840 mixin(grammar(LeftGrammar)); 2841 ParseTree result = Left("n+n+n+n"); 2842 assert(result.successful); 2843 assert(result.matches == ["n", "+n", "+n", "+n"]); 2844 } 2845 2846 unittest // Indirect left-recursion 2847 { 2848 enum LeftGrammar = ` 2849 Left: 2850 S <- E eoi 2851 E <- F 'n' / 'n' 2852 F <- E '+' 2853 `; 2854 mixin(grammar(LeftGrammar)); 2855 ParseTree result = Left("n+n+n+n"); 2856 assert(result.successful); 2857 assert(result.matches == ["n", "+", "n", "+", "n", "+", "n"]); 2858 } 2859 2860 unittest // Proper blocking of memoization 2861 { 2862 // Three interlocking cycles of indirect left-recursion. 2863 enum LeftGrammar = ` 2864 Left: 2865 S <- E eoi 2866 E <- F 'n' / 'n' 2867 F <- E '+' I* / G '-' 2868 G <- H 'm' / E 2869 H <- G 'l' 2870 I <- '(' A+ ')' 2871 A <- 'a' 2872 `; 2873 mixin(grammar(LeftGrammar)); 2874 ParseTree result = Left("nlm-n+(aaa)n"); 2875 assert(result.successful); 2876 assert(result.matches == ["n", "l", "m", "-", "n", "+", "(", "a", "a", "a", ")", "n"]); 2877 } 2878 2879 // Example from http://www.inf.puc-rio.br/~roberto/docs/sblp2012.pdf 2880 unittest // Mutual left-recursion 2881 { 2882 /* A thing about stoppers: 2883 Because P is at the intersection of left-recursive cycles P -> P and L -> P -> L, it should 2884 suffice to make only P a stopper to stop unbounded left-recursion. And so it does. But, 2885 stoppers parse greedily: they always consume the maximum of input. So below, if only P is a stopper, 2886 at some point P parses the complete input. Then L fails because it cannot append ".x", then M fails. 2887 If both are made a stopper then M succeeds. That is because P will try L when P '(n)' no longer 2888 consumes input, which will appear as a left-recursion to L if it is a stopper and because of that 2889 it will have a chance to succeed on the full input which it recorded in its seed for the previous 2890 recursion. 2891 */ 2892 enum LeftGrammar = ` 2893 Left: 2894 M <- L eoi 2895 L <- P '.x' / 'x' 2896 P <- P '(n)' / L 2897 `; 2898 mixin(grammar(LeftGrammar)); 2899 ParseTree result = Left("x(n)(n).x(n).x"); 2900 assert(result.successful); 2901 assert(result.matches == ["x", "(n)", "(n)", ".x", "(n)", ".x"]); 2902 } 2903 2904 unittest // Left- and right-recursion (is right-associative!) 2905 { 2906 enum LeftRightGrammar = ` 2907 LeftRight: 2908 M <- E eoi 2909 E <- E '+' E / 'n' 2910 `; 2911 mixin(grammar(LeftRightGrammar)); 2912 ParseTree result = LeftRight("n+n+n+n"); 2913 assert(result.successful); 2914 assert(result.matches == ["n", "+", "n", "+", "n", "+", "n"]); 2915 } 2916 2917 unittest // Hidden left-recursion 2918 { 2919 enum HiddenLeft = ` 2920 Test: 2921 M <- A eoi 2922 A <- B? C 2923 B <- 'b' 2924 C <- A 'a' / 'c' 2925 `; 2926 mixin(grammar(HiddenLeft)); 2927 assert(Test("caa").successful); 2928 assert(Test("bbca").successful); 2929 } 2930 2931 unittest // Null-matching left-recursion 2932 { 2933 enum NullMatch = ` 2934 Test: 2935 M <- S (";" S)* eoi 2936 S <- S '+' S / 'n' / eps 2937 `; 2938 mixin(grammar(NullMatch)); 2939 assert(Test("n+n;n;").matches == ["n", "+", "n", ";", "n", ";", ""]); 2940 }