1 /** 2 Parser generation module for Pegged. 3 The Pegged parser itself is in pegged.parser, generated from pegged.examples.peggedgrammar. 4 5 The documentation is in the /docs directory. 6 */ 7 module pegged.grammar; 8 9 import std.algorithm: startsWith; 10 import std.conv: to; 11 import std.functional: toDelegate; 12 import std.stdio; 13 14 public import pegged.peg; 15 import pegged.parser; 16 17 @safe: 18 19 /** 20 Option enum to get internal memoization (parse results storing). 21 */ 22 enum Memoization { no, yes } 23 24 /** 25 This function takes a (future) module name, a (future) file name and a grammar as a string or a file. 26 It writes the corresponding parser inside a module with the given name. 27 */ 28 void asModule(Memoization withMemo = Memoization.yes)(string moduleName, string fileName, string grammarString, string optHeader = "") 29 { 30 import std.stdio; 31 auto f = File(fileName ~ ".d","w"); 32 33 f.write("/+ DO NOT EDIT BY HAND!\n", 34 "This module was automatically generated from the following grammar:\n\n"); 35 f.write(grammarString); 36 f.write("\n\n+/\n"); 37 38 f.write("module " ~ moduleName ~ ";\n\n"); 39 40 if (optHeader.length > 0) 41 f.write(optHeader ~ "\n\n"); 42 43 f.write("public import pegged.peg;\n"); 44 f.write("import std.algorithm: startsWith;\n"); 45 f.write("import std.functional: toDelegate;\n\n"); 46 f.write(grammar!(withMemo)(grammarString)); 47 } 48 49 /// ditto 50 void asModule(Memoization withMemo = Memoization.yes)(string moduleName, File file, string optHeader = "") 51 { 52 string grammarDefinition; 53 foreach(line; file.byLine) 54 { 55 grammarDefinition ~= line ~ '\n'; 56 } 57 asModule!(withMemo)(moduleName, grammarDefinition, optHeader); 58 } 59 60 // Helper to insert 'Spacing' before and after Primaries 61 ParseTree spaceArrow(ParseTree input) 62 { 63 ParseTree wrapInSpaces(ParseTree p) 64 { 65 ParseTree spacer = 66 ParseTree("Pegged.Prefix", true, null, null, 0,0, [ 67 ParseTree("Pegged.Suffix", true, null, null, 0, 0, [ 68 ParseTree("Pegged.Primary", true, null, null, 0, 0, [ 69 ParseTree("Pegged.RhsName", true, null, null, 0,0, [ 70 ParseTree("Pegged.Identifier", true, ["Spacing"]) 71 ]) 72 ]) 73 ]) 74 ]); 75 ParseTree result = ParseTree("Pegged.WrapAround", true, p.matches, p.input, p.begin, p.end, p.children); 76 result.children = spacer ~ result.children ~ spacer; 77 return result; 78 } 79 return modify!( p => p.name == "Pegged.Primary", 80 wrapInSpaces)(input); 81 } 82 83 84 /** 85 Generate a parser from a PEG definition. 86 The parser is a string containing D code, to be mixed in or written in a file. 87 88 ---- 89 enum string def = " 90 Gram: 91 A <- 'a' B* 92 B <- 'b' / 'c' 93 "; 94 95 mixin(grammar(def)); 96 97 ParseTree p = Gram("abcbccbcd"); 98 ---- 99 */ 100 string grammar(Memoization withMemo = Memoization.yes)(string definition) 101 { 102 ParseTree defAsParseTree = Pegged(definition); 103 104 if (!defAsParseTree.successful) 105 { 106 // To work around a strange bug with ParseTree printing at compile time 107 string result = "static assert(false, `" ~ defAsParseTree.toString("") ~ "`);"; 108 return result; 109 } 110 return grammar!(withMemo)(defAsParseTree); 111 } 112 /// ditto 113 string grammar(Memoization withMemo = Memoization.yes)(ParseTree defAsParseTree) 114 { 115 string[] composedGrammars; 116 117 // Grammar analysis in support of left-recursion. 118 import pegged.introspection; 119 import std.algorithm : canFind; 120 GrammarInfo grammarInfo = grammarInfo(defAsParseTree.children[0]); 121 string[] stoppers; // Keys are the rules that stop left-recursion and the 122 // values are arrays of strings containing the corresponding 123 // rules for which memoization needs to be blocked. 124 125 /* 126 I once considered that if two left-recursive cycles intersect, unbounded left-recursion 127 would be prevented in both cycles if only the intersection rule would be a stopper. Although 128 true, it causes other problems, as documented in the "Mutual left-recursion" unittest below. 129 Therefore, we simply make the first rule in every left-recursive cycle a stopper. 130 Also, one might think that it suffices to prevent ordinary memoization in just the rules 131 that are part of the cycle. However, some larger input files for pegged/examples/extended_pascal 132 would fail to parse. So memoization for all left-recursive rules is disabled during 133 left-recursion. 134 */ 135 string[] allLeftRecursiveRules; 136 foreach (cycle; grammarInfo.leftRecursiveCycles) 137 foreach (rule; cycle) 138 if (!canFind(allLeftRecursiveRules, rule)) 139 allLeftRecursiveRules ~= rule; 140 foreach (cycle; grammarInfo.leftRecursiveCycles) 141 if (!stoppers.canFind(cycle[0])) 142 stoppers ~= cycle[0]; 143 144 // Prints comment showing detected left-recursive cycles. 145 string printLeftRecursiveCycles() 146 { 147 string result; 148 foreach (cycle; grammarInfo.leftRecursiveCycles) 149 { 150 result ~= cycle[0]; 151 foreach (rule; cycle[1..$]) 152 result ~= " <- " ~ rule; 153 result ~= "\n"; 154 } 155 return result.length > 0 ? "/** Left-recursive cycles:\n" ~ result ~ "*/\n\n" : ""; 156 } 157 158 // Prints comment showing rules that stop left-recursive cycles and rules for which memoization is blocked during recursion. 159 string printLeftRecursionStoppers() 160 { 161 import std.array: join; 162 string result; 163 foreach (stopper; stoppers) 164 result ~= stopper ~ ": " ~ allLeftRecursiveRules.join(", ") ~ "\n"; 165 return result.length > 0 ? 166 "/** Rules that stop left-recursive cycles, followed by rules for which\n" 167 ~ " * memoization is blocked during recursion:\n" ~ result ~ "*/\n\n" : ""; 168 } 169 // Analysis completed. 170 171 string generateForgetMemo() 172 { 173 string result; 174 result ~= " 175 static void forgetMemo() 176 {"; 177 if (withMemo == Memoization.yes) 178 result ~= " 179 memo = null;"; 180 if (composedGrammars.length > 0) 181 result ~= " 182 import std.traits;"; 183 foreach (composed; composedGrammars) 184 result ~= " 185 static if (is(typeof(" ~ composed ~ ".forgetMemo))) 186 " ~ composed ~ ".forgetMemo();"; 187 result ~= " 188 }\n"; 189 return result; 190 } 191 192 string generateCode(ParseTree p, string propagatedName = "") 193 { 194 string result; 195 196 switch (p.name) 197 { 198 case "Pegged": 199 result = generateCode(p.children[0]); 200 break; 201 case "Pegged.Grammar": 202 string grammarName = generateCode(p.children[0]); 203 string shortGrammarName = p.children[0].matches[0]; 204 //string invokedGrammarName = generateCode(transformName(p.children[0])); 205 string firstRuleName = generateCode(p.children[1].children[0]); 206 207 result = 208 "@safe struct Generic" ~ shortGrammarName ~ "(TParseTree) 209 { 210 import std.functional : toDelegate; 211 import pegged.dynamic.grammar; 212 static import pegged.peg; 213 struct " ~ grammarName ~ "\n { 214 enum name = \"" ~ shortGrammarName ~ "\"; 215 static ParseTree delegate(ParseTree) @safe [string] before; 216 static ParseTree delegate(ParseTree) @safe [string] after; 217 static ParseTree delegate(ParseTree) @safe [string] rules;"; 218 219 if (withMemo == Memoization.yes) { 220 result ~= " 221 import std.typecons:Tuple, tuple; 222 static TParseTree[Tuple!(string, size_t)] memo;"; 223 if (grammarInfo.leftRecursiveCycles.length > 0) 224 result ~= " 225 import std.algorithm: canFind, countUntil, remove; 226 static size_t[] blockMemoAtPos;"; 227 } 228 229 result ~= " 230 static this() @trusted\n {\n"; 231 232 ParseTree[] definitions = p.children[1 .. $]; 233 bool userDefinedSpacing; 234 foreach(i,def; definitions) 235 { 236 if (def.children[0].children.length == 1) // Non-parameterized ruleName 237 result ~= " rules[\"" ~ def.matches[0] ~ "\"] = toDelegate(&" ~ def.matches[0] ~ ");\n"; 238 if (def.matches[0] == "Spacing") // user-defined spacing 239 { 240 userDefinedSpacing = true; 241 break; 242 } 243 } 244 if(!userDefinedSpacing) 245 result ~= " rules[\"Spacing\"] = toDelegate(&Spacing);\n"; 246 247 result ~= 248 " } 249 250 template hooked(alias r, string name) 251 { 252 static ParseTree hooked(ParseTree p) @safe 253 { 254 ParseTree result; 255 256 if (name in before) 257 { 258 result = before[name](p); 259 if (result.successful) 260 return result; 261 } 262 263 result = r(p); 264 if (result.successful || name !in after) 265 return result; 266 267 result = after[name](p); 268 return result; 269 } 270 271 static ParseTree hooked(string input) @safe 272 { 273 return hooked!(r, name)(ParseTree(\"\",false,[],input)); 274 } 275 } 276 277 static void addRuleBefore(string parentRule, string ruleSyntax) @safe 278 { 279 // enum name is the current grammar name 280 DynamicGrammar dg = pegged.dynamic.grammar.grammar(name ~ \": \" ~ ruleSyntax, rules); 281 foreach(ruleName,rule; dg.rules) 282 if (ruleName != \"Spacing\") // Keep the local Spacing rule, do not overwrite it 283 rules[ruleName] = rule; 284 before[parentRule] = rules[dg.startingRule]; 285 } 286 287 static void addRuleAfter(string parentRule, string ruleSyntax) @safe 288 { 289 // enum name is the current grammar named 290 DynamicGrammar dg = pegged.dynamic.grammar.grammar(name ~ \": \" ~ ruleSyntax, rules); 291 foreach(ruleName,rule; dg.rules) 292 { 293 if (ruleName != \"Spacing\") 294 rules[ruleName] = rule; 295 } 296 after[parentRule] = rules[dg.startingRule]; 297 } 298 299 static bool isRule(string s) pure nothrow @nogc 300 { 301 import std.algorithm : startsWith; 302 return s.startsWith(\"" ~ shortGrammarName ~ ".\"); 303 } 304 "; 305 306 307 /+ 308 ~ " switch(s)\n" 309 ~ " {\n"; 310 311 bool[string] ruleNames; // to avoid duplicates, when using parameterized rules 312 string parameterizedRulesSpecialCode; // because param rules need to be put in the 'default' part of the switch 313 314 string paramRuleHandler(string target) 315 { 316 return "if (s.length >= "~to!string(shortGrammarName.length + target.length + 3) 317 ~" && s[0.."~to!string(shortGrammarName.length + target.length + 3)~"] == \"" 318 ~shortGrammarName ~ "." ~ target~"!(\") return true;"; 319 } 320 321 foreach(i,def; definitions) 322 { 323 /* 324 if (def.matches[0] !in ruleNames) 325 { 326 ruleNames[def.matches[0]] = true; 327 328 if (def.children[0].children.length > 1) // Parameterized rule 329 parameterizedRulesSpecialCode ~= " " ~ paramRuleHandler(def.matches[0])~ "\n"; 330 else 331 result ~= " case \"" ~ shortGrammarName ~ "." ~ def.matches[0] ~ "\":\n"; 332 } 333 */ 334 if (def.matches[0] == "Spacing") // user-defined spacing 335 { 336 userDefinedSpacing = true; 337 break; 338 } 339 } 340 result ~= " return true;\n" 341 ~ " default:\n" 342 ~ parameterizedRulesSpecialCode 343 ~ " return false;\n }\n }\n"; 344 +/ 345 result ~= " mixin decimateTree;\n\n"; 346 347 // If the grammar provides a Spacing rule, then this will be used. 348 // else, the predefined 'spacing' rule is used. 349 result ~= userDefinedSpacing ? "" : " alias spacing Spacing;\n\n"; 350 351 // Creating the inner functions, each corresponding to a grammar rule 352 foreach(def; definitions) 353 result ~= generateCode(def, shortGrammarName); 354 355 // if the first rule is parameterized (a template), it's impossible to get an opCall 356 // because we don't know with which template arguments it should be called. 357 // So no opCall is generated in this case. 358 if (p.children[1].children[0].children.length == 1) 359 { 360 // General calling interface 361 result ~= " static TParseTree opCall(TParseTree p)\n" 362 ~ " {\n" 363 ~ " TParseTree result = decimateTree(" ~ firstRuleName ~ "(p));\n" 364 ~ " result.children = [result];\n" 365 ~ " result.name = \"" ~ shortGrammarName ~ "\";\n" 366 ~ " return result;\n" 367 ~ " }\n\n" 368 ~ " static TParseTree opCall(string input)\n" 369 ~ " {\n"; 370 371 if (withMemo == Memoization.no) 372 result ~= " forgetMemo();\n" 373 ~ " return " ~ shortGrammarName ~ "(TParseTree(``, false, [], input, 0, 0));\n" 374 ~ " }\n"; 375 else 376 result ~= " if(__ctfe)\n" 377 ~ " {\n" 378 ~ " return " ~ shortGrammarName ~ "(TParseTree(``, false, [], input, 0, 0));\n" 379 ~ " }\n" 380 ~ " else\n" 381 ~ " {\n" 382 ~ " forgetMemo();\n" 383 ~ " return " ~ shortGrammarName ~ "(TParseTree(``, false, [], input, 0, 0));\n" 384 ~ " }\n" 385 ~ " }\n"; 386 387 result ~= " static string opCall(GetName g)\n" 388 ~ " {\n" 389 ~ " return \"" ~ shortGrammarName ~ "\";\n" 390 ~ " }\n\n"; 391 } 392 result ~= generateForgetMemo(); 393 result ~= " }\n" // end of grammar struct definition 394 ~ "}\n\n" // end of template definition 395 ~ "alias Generic" ~ shortGrammarName ~ "!(ParseTree)." 396 ~ shortGrammarName ~ " " ~ shortGrammarName ~ ";\n\n"; 397 break; 398 case "Pegged.Definition": 399 // children[0]: name 400 // children[1]: arrow (arrow type as first child) 401 // children[2]: description 402 403 string code; 404 405 switch(p.children[1].children[0].name) 406 { 407 case "Pegged.LEFTARROW": 408 code ~= generateCode(p.children[2]); 409 break; 410 case "Pegged.FUSEARROW": 411 code ~= "pegged.peg.fuse!(" ~ generateCode(p.children[2]) ~ ")"; 412 break; 413 case "Pegged.DISCARDARROW": 414 code ~= "pegged.peg.discard!(" ~ generateCode(p.children[2]) ~ ")"; 415 break; 416 case "Pegged.KEEPARROW": 417 code ~= "pegged.peg.keep!("~ generateCode(p.children[2]) ~ ")"; 418 break; 419 case "Pegged.DROPARROW": 420 code ~= "pegged.peg.drop!("~ generateCode(p.children[2]) ~ ")"; 421 break; 422 case "Pegged.PROPAGATEARROW": 423 code ~= "pegged.peg.propagate!("~ generateCode(p.children[2]) ~ ")"; 424 break; 425 case "Pegged.SPACEARROW": 426 ParseTree modified = spaceArrow(p.children[2]); 427 code ~= generateCode(modified); 428 break; 429 case "Pegged.ACTIONARROW": 430 auto actionResult = generateCode(p.children[2]); 431 foreach(action; p.children[1].matches[1..$]) 432 actionResult = "pegged.peg.action!(" ~ actionResult ~ ", " ~ action ~ ")"; 433 code ~= actionResult; 434 break; 435 default: 436 break; 437 } 438 439 bool parameterizedRule = p.children[0].children.length > 1; 440 string completeName = generateCode(p.children[0]); 441 string shortName = p.matches[0]; 442 string innerName; 443 string hookedName = p.matches[0]; 444 445 if (parameterizedRule) 446 { 447 result = " template " ~ completeName ~ "\n" 448 ~ " {\n"; 449 innerName ~= "\"" ~ shortName ~ "!(\" ~ "; 450 hookedName ~= "_" ~ to!string(p.children[0].children[1].children.length); 451 foreach(i,param; p.children[0].children[1].children) 452 innerName ~= "pegged.peg.getName!("~ param.children[0].matches[0] 453 ~ (i<p.children[0].children[1].children.length-1 ? ")() ~ \", \" ~ " 454 : ")"); 455 innerName ~= " ~ \")\""; 456 } 457 else 458 { 459 innerName ~= "`" ~ completeName ~ "`"; 460 } 461 462 string ctfeCode = " pegged.peg.defined!(" ~ code ~ ", \"" ~ propagatedName ~ "." ~ innerName[1..$-1] ~ "\")"; 463 code = "hooked!(pegged.peg.defined!(" ~ code ~ ", \"" ~ propagatedName ~ "." ~ innerName[1..$-1] ~ "\"), \"" ~ hookedName ~ "\")"; 464 465 if (withMemo == Memoization.no) 466 result ~= " static TParseTree " ~ shortName ~ "(TParseTree p)\n" 467 ~ " {\n" 468 ~ " if(__ctfe)\n" 469 ~ " {\n" 470 ~ (stoppers.canFind(shortName) ? 471 " assert(false, \"" ~ shortName ~ " is left-recursive, which is not supported " 472 ~ "at compile-time. Consider using asModule().\");\n" 473 : 474 " return " ~ ctfeCode ~ "(p);\n" 475 ) 476 ~ " }\n" 477 ~ " else\n" 478 ~ " {\n" 479 ~ (stoppers.canFind(shortName) ? 480 // This rule needs to prevent infinite left-recursion. 481 " static TParseTree[size_t /*position*/] seed;\n" 482 ~ " if (auto s = p.end in seed)\n" 483 ~ " return *s;\n" 484 ~ " auto current = fail(p);\n" 485 ~ " seed[p.end] = current;\n" 486 ~ " while(true)\n" 487 ~ " {\n" 488 ~ " auto result = " ~ code ~ "(p);\n" 489 ~ (grammarInfo.ruleInfo[shortName].nullMatch == NullMatch.no ? 490 " if (result.end > current.end)\n" 491 : 492 " if (result.end > current.end ||\n" 493 ~ " (!current.successful && result.successful) /* null-match */)\n" 494 ) 495 ~ " {\n" 496 ~ " current = result;\n" 497 ~ " seed[p.end] = current;\n" 498 ~ " } else {\n" 499 ~ " seed.remove(p.end);\n" 500 ~ " return current;\n" 501 ~ " }\n" 502 ~ " }\n" 503 : 504 // Possibly left-recursive rule, but infinite recursion is already prevented by another rule in the same cycle. 505 " return " ~ code ~ "(p);\n" 506 ) 507 ~ " }\n" 508 ~ " }\n" 509 ~ " static TParseTree " ~ shortName ~ "(string s)\n" 510 ~ " {\n" 511 ~ " if(__ctfe)\n" 512 ~ " return " ~ ctfeCode ~ "(TParseTree(\"\", false,[], s));\n" 513 ~ " else\n" 514 ~ " {\n" 515 ~ " forgetMemo();\n" 516 ~ " return " ~ code ~ "(TParseTree(\"\", false,[], s));\n" 517 ~ " }\n" 518 ~ " }\n"; 519 else // Memoization.yes 520 result ~= " static TParseTree " ~ shortName ~ "(TParseTree p)\n" 521 ~ " {\n" 522 ~ " if(__ctfe)\n" 523 ~ " {\n" 524 ~ (stoppers.canFind(shortName) ? 525 " assert(false, \"" ~ shortName ~ " is left-recursive, which is not supported " 526 ~ "at compile-time. Consider using asModule().\");\n" 527 : 528 " return " ~ ctfeCode ~ "(p);\n" 529 ) 530 ~ " }\n" 531 ~ " else\n" 532 ~ " {\n" 533 ~ (stoppers.canFind(shortName) ? 534 // This rule needs to prevent infinite left-recursion. 535 " static TParseTree[size_t /*position*/] seed;\n" 536 ~ " if (auto s = p.end in seed)\n" 537 ~ " return *s;\n" 538 ~ " if (!blockMemoAtPos.canFind(p.end))\n" 539 ~ " if (auto m = tuple(" ~ innerName ~ ", p.end) in memo)\n" 540 ~ " return *m;\n" 541 ~ " auto current = fail(p);\n" 542 ~ " seed[p.end] = current;\n" 543 ~ " blockMemoAtPos ~= p.end;\n" 544 ~ " while (true)\n" 545 ~ " {\n" 546 ~ " auto result = " ~ code ~ "(p);\n" 547 ~ (grammarInfo.ruleInfo[shortName].nullMatch == NullMatch.no ? 548 " if (result.end > current.end)\n" 549 : 550 " if (result.end > current.end ||\n" 551 ~ " (!current.successful && result.successful) /* null-match */)\n" 552 ) 553 ~ " {\n" 554 ~ " current = result;\n" 555 ~ " seed[p.end] = current;\n" 556 ~ " } else {\n" 557 // Since seed is local, it cannot be reset between parses the way memo is reset. 558 // We can get away with this by removing elements from it, done below, so that 559 // seed is empty again when left-recursion has ended and all recursive calls have 560 // returned. The advantage of a local seed is that it remains small and fast. The 561 // disadvantage is that seed doesn't memoize the final result, so that must be taken 562 // care of by memo. Note that p.end remains constant for the course of recursion, 563 // and the length of seed only grows when nested recursion occurs. 564 ~ " seed.remove(p.end);\n" 565 // TODO investigate if p.end is always the last element of blockMemoAtPos. 566 ~ " assert(blockMemoAtPos.canFind(p.end));\n" 567 ~ " blockMemoAtPos = blockMemoAtPos.remove(countUntil(blockMemoAtPos, p.end));\n" 568 ~ " memo[tuple(" ~ innerName ~ ", p.end)] = current;\n" 569 ~ " return current;\n" 570 ~ " }\n" 571 ~ " }\n" 572 : 573 // Possibly left-recursive rule, but infinite recursion is already prevented by another rule in the same cycle. 574 (allLeftRecursiveRules.canFind(shortName) ? 575 " if (blockMemoAtPos.canFind(p.end))\n" 576 ~ " return " ~ code ~ "(p);\n" 577 : "" 578 ) 579 ~ " if (auto m = tuple(" ~ innerName ~ ", p.end) in memo)\n" 580 ~ " return *m;\n" 581 ~ " else\n" 582 ~ " {\n" 583 ~ " TParseTree result = " ~ code ~ "(p);\n" 584 ~ " memo[tuple(" ~ innerName ~ ", p.end)] = result;\n" 585 ~ " return result;\n" 586 ~ " }\n" 587 ) 588 ~ " }\n" 589 ~ " }\n\n" 590 ~ " static TParseTree " ~ shortName ~ "(string s)\n" 591 ~ " {\n" 592 ~ " if(__ctfe)\n" 593 ~ " {\n" 594 ~ " return " ~ ctfeCode ~ "(TParseTree(\"\", false,[], s));\n" 595 ~ " }\n" 596 ~ " else\n" 597 ~ " {\n" 598 ~ " forgetMemo();\n" 599 ~ " return " ~ code ~ "(TParseTree(\"\", false,[], s));\n" 600 ~ " }\n" 601 ~ " }\n"; 602 603 result ~= " static string " ~ shortName ~ "(GetName g)\n" 604 ~ " {\n" 605 ~ " return \"" ~ propagatedName ~ "." ~ innerName[1..$-1] ~ "\";\n" 606 ~ " }\n\n"; 607 608 if (parameterizedRule) 609 result ~= " }\n"; 610 611 break; 612 case "Pegged.GrammarName": 613 result = generateCode(p.children[0]); 614 if (p.children.length == 2) 615 result ~= generateCode(p.children[1]); 616 break; 617 case "Pegged.LhsName": 618 result = generateCode(p.children[0]); 619 if (p.children.length == 2) 620 result ~= generateCode(p.children[1]); 621 break; 622 case "Pegged.ParamList": 623 result = "("; 624 foreach(i,child; p.children) 625 result ~= generateCode(child) ~ ", "; 626 result = result[0..$-2] ~ ")"; 627 break; 628 case "Pegged.Param": 629 result = "alias " ~ generateCode(p.children[0]); 630 break; 631 case "Pegged.SingleParam": 632 result = p.matches[0]; 633 break; 634 case "Pegged.DefaultParam": 635 result = p.matches[0] ~ " = " ~ generateCode(p.children[1]); 636 break; 637 case "Pegged.Expression": 638 result ~= generateCode(p.children[0]); 639 break; 640 case "Pegged.FirstExpression", 641 "Pegged.LongestExpression": 642 if (p.children.length > 1) // [LONGEST_]OR expression 643 { 644 // Keyword list detection: "abstract"/"alias"/... 645 bool isLiteral(ParseTree p) 646 { 647 return ( p.name == "Pegged.Sequence" 648 && p.children.length == 1 649 && p.children[0].children.length == 1 650 && p.children[0].children[0].children.length == 1 651 && p.children[0].children[0].children[0].children.length == 1 652 && p.children[0].children[0].children[0].children[0].name == "Pegged.Literal"); 653 } 654 bool keywordList = true; 655 foreach(child;p.children) 656 if (!isLiteral(child)) 657 { 658 keywordList = false; 659 break; 660 } 661 662 if (keywordList) 663 { 664 result = "pegged.peg.keywords!("; 665 foreach(seq; p.children) 666 result ~= "\"" ~ (seq.matches.length == 3 ? seq.matches[1] : "") ~ "\", "; 667 result = result[0..$-2] ~ ")"; 668 } 669 else 670 { 671 result = p.name == "Pegged.FirstExpression" ? "pegged.peg.or!(" : "pegged.peg.longest_match!("; 672 foreach(seq; p.children) 673 result ~= generateCode(seq) ~ ", "; 674 result = result[0..$-2] ~ ")"; 675 } 676 } 677 else // One child -> just a sequence, no need for an or!( , ) 678 { 679 result = generateCode(p.children[0]); 680 } 681 break; 682 case "Pegged.Sequence": 683 if (p.children.length > 1) // real sequence 684 { 685 result = "pegged.peg.and!("; 686 foreach(seq; p.children) 687 { 688 string elementCode = generateCode(seq); 689 // flattening inner sequences 690 if (elementCode.length > 6 && elementCode[0..5] == "pegged.peg.and!(") 691 elementCode = elementCode[5..$-1]; // cutting 'and!(' and ')' 692 result ~= elementCode ~ ", "; 693 } 694 result = result[0..$-2] ~ ")"; 695 } 696 else // One child -> just a Suffix, no need for a and!( , ) 697 { 698 result = generateCode(p.children[0]); 699 } 700 break; 701 case "Pegged.Prefix": 702 result = generateCode(p.children[$-1]); 703 foreach(child; p.children[0..$-1]) 704 result = generateCode(child) ~ result ~ ")"; 705 break; 706 case "Pegged.Suffix": 707 result = generateCode(p.children[0]); 708 foreach(child; p.children[1..$]) 709 { 710 switch (child.name) 711 { 712 case "Pegged.OPTION": 713 result = "pegged.peg.option!(" ~ result ~ ")"; 714 break; 715 case "Pegged.ZEROORMORE": 716 result = "pegged.peg.zeroOrMore!(" ~ result ~ ")"; 717 break; 718 case "Pegged.ONEORMORE": 719 result = "pegged.peg.oneOrMore!(" ~ result ~ ")"; 720 break; 721 case "Pegged.Action": 722 foreach(action; child.matches) 723 result = "pegged.peg.action!(" ~ result ~ ", " ~ action ~ ")"; 724 break; 725 default: 726 break; 727 } 728 } 729 break; 730 case "Pegged.Primary": 731 result = generateCode(p.children[0]); 732 break; 733 case "Pegged.RhsName": 734 result = ""; 735 string composedGrammar = ""; 736 foreach(child; p.children) 737 { 738 result ~= generateCode(child); 739 if (child.name == "Pegged.NAMESEP") 740 composedGrammar = p.input[p.begin .. child.begin]; 741 } 742 if (composedGrammar.length > 0 && !composedGrammars.canFind(composedGrammar)) 743 composedGrammars ~= composedGrammar; 744 break; 745 case "Pegged.ArgList": 746 result = "!("; 747 foreach(child; p.children) 748 result ~= generateCode(child) ~ ", "; // Allow A <- List('A'*,',') 749 result = result[0..$-2] ~ ")"; 750 break; 751 case "Pegged.Identifier": 752 result = p.matches[0]; 753 break; 754 case "Pegged.NAMESEP": 755 result = "."; 756 break; 757 case "Pegged.Literal": 758 if(p.matches.length == 3) // standard case 759 result = "pegged.peg.literal!(\"" ~ p.matches[1] ~ "\")"; 760 else // only two children -> empty literal 761 result = "pegged.peg.literal!(``)"; 762 break; 763 case "Pegged.CILiteral": 764 if(p.matches.length == 3) // standard case 765 result = "pegged.peg.caseInsensitiveLiteral!(\"" ~ p.matches[1] ~ "\")"; 766 else // only two children -> empty literal 767 result = "pegged.peg.caseInsensitiveLiteral!(``)"; 768 break; 769 case "Pegged.CharClass": 770 if (p.children.length > 1) 771 { 772 result = "pegged.peg.or!("; 773 foreach(seq; p.children) 774 result ~= generateCode(seq) ~ ", "; 775 result = result[0..$-2] ~ ")"; 776 } 777 else // One child -> just a sequence, no need for an or!( , ) 778 { 779 result = generateCode(p.children[0]); 780 } 781 break; 782 case "Pegged.CharRange": 783 /// Make the generation at the Char level: directly what is needed, be it `` or "" or whatever 784 if (p.children.length > 1) // a-b range 785 { 786 result = "pegged.peg.charRange!('" ~ generateCode(p.children[0]) 787 ~ "', '" 788 ~ generateCode(p.children[1]) 789 ~ "')"; 790 } 791 else // lone char 792 { 793 result = "pegged.peg.literal!("; 794 string ch = p.matches[0]; 795 switch (ch) 796 { 797 case "\\[": 798 case "\\]": 799 case "\\-": 800 result ~= "\"" ~ ch[1..$] ~ "\")"; 801 break; 802 case "\\\'": 803 result ~= "\"'\")"; 804 break; 805 case "\\`": 806 result ~= q{"`")}; 807 break; 808 case "\\": 809 case "\\\\": 810 result ~= "`\\`)"; 811 break; 812 case "\"": 813 case "\\\"": 814 result ~= "`\"`)"; 815 break; 816 case "\n": 817 case "\r": 818 case "\t": 819 result ~= "\"" ~ to!string(to!dchar(ch)) ~ "\")"; 820 break; 821 default: 822 result ~= "\"" ~ ch ~ "\")"; 823 } 824 } 825 break; 826 case "Pegged.Char": 827 string ch = p.matches[0]; 828 switch (ch) 829 { 830 case "\\[": 831 case "\\]": 832 case "\\-": 833 834 case "\\\'": 835 case "\\\"": 836 case "\\`": 837 case "\\\\": 838 result = ch[1..$]; 839 break; 840 case "\n": 841 case "\r": 842 case "\t": 843 result = to!string(to!dchar(ch)); 844 break; 845 default: 846 result = ch; 847 } 848 break; 849 case "Pegged.POS": 850 result = "pegged.peg.posLookahead!("; 851 break; 852 case "Pegged.NEG": 853 result = "pegged.peg.negLookahead!("; 854 break; 855 case "Pegged.FUSE": 856 result = "pegged.peg.fuse!("; 857 break; 858 case "Pegged.DISCARD": 859 result = "pegged.peg.discard!("; 860 break; 861 //case "Pegged.CUT": 862 // result = "discardChildren!("; 863 // break; 864 case "Pegged.KEEP": 865 result = "pegged.peg.keep!("; 866 break; 867 case "Pegged.DROP": 868 result = "pegged.peg.drop!("; 869 break; 870 case "Pegged.PROPAGATE": 871 result = "pegged.peg.propagate!("; 872 break; 873 case "Pegged.OPTION": 874 result = "pegged.peg.option!("; 875 break; 876 case "Pegged.ZEROORMORE": 877 result = "pegged.peg.zeroOrMore!("; 878 break; 879 case "Pegged.ONEORMORE": 880 result = "pegged.peg.oneOrMore!("; 881 break; 882 case "Pegged.Action": 883 result = generateCode(p.children[0]); 884 foreach(action; p.matches[1..$]) 885 result = "pegged.peg.action!(" ~ result ~ ", " ~ action ~ ")"; 886 break; 887 case "Pegged.ANY": 888 result = "pegged.peg.any"; 889 break; 890 case "Pegged.WrapAround": 891 result = "pegged.peg.wrapAround!(" ~ generateCode(p.children[0]) ~ ", " 892 ~ generateCode(p.children[1]) ~ ", " 893 ~ generateCode(p.children[2]) ~ ")"; 894 break; 895 default: 896 result = "Bad tree: " ~ p.toString(); 897 break; 898 } 899 return result; 900 } 901 902 903 904 return printLeftRecursiveCycles() ~ printLeftRecursionStoppers() ~ generateCode(defAsParseTree); 905 } 906 907 /** 908 Mixin to get what a failed rule expected as input. 909 Not used by Pegged yet. 910 */ 911 mixin template expected() 912 { 913 string expected(ParseTree p) 914 { 915 916 switch(p.name) 917 { 918 case "Pegged.Expression": 919 string expectation; 920 foreach(i, child; p.children) 921 expectation ~= "(" ~ expected(child) ~ ")" ~ (i < p.children.length -1 ? " or " : ""); 922 return expectation; 923 case "Pegged.Sequence": 924 string expectation; 925 foreach(i, expr; p.children) 926 expectation ~= "(" ~ expected(expr) ~ ")" ~ (i < p.children.length -1 ? " followed by " : ""); 927 return expectation; 928 case "Pegged.Prefix": 929 return expected(p.children[$-1]); 930 case "Pegged.Suffix": 931 string expectation; 932 string end; 933 foreach(prefix; p.children[1..$]) 934 switch(prefix.name) 935 { 936 case "Pegged.ZEROORMORE": 937 expectation ~= "zero or more times ("; 938 end ~= ")"; 939 break; 940 case "Pegged.ONEORMORE": 941 expectation ~= "one or more times ("; 942 end ~= ")"; 943 break; 944 case "Pegged.OPTION": 945 expectation ~= "optionally ("; 946 end ~= ")"; 947 break; 948 case "Pegged.Action": 949 break; 950 default: 951 break; 952 } 953 return expectation ~ expected(p.children[0]) ~ end; 954 case "Pegged.Primary": 955 return expected(p.children[0]); 956 //case "Pegged.RhsName": 957 // return "RhsName, not implemented."; 958 case "Pegged.Literal": 959 return "the literal `" ~ p.matches[0] ~ "`"; 960 case "Pegged.CharClass": 961 string expectation; 962 foreach(i, child; p.children) 963 expectation ~= expected(child) ~ (i < p.children.length -1 ? " or " : ""); 964 return expectation; 965 case "Pegged.CharRange": 966 if (p.children.length == 1) 967 return expected(p.children[0]); 968 else 969 return "any character between '" ~ p.matches[0] ~ "' and '" ~ p.matches[2] ~ "'"; 970 case "Pegged.Char": 971 return "the character '" ~ p.matches[0] ~ "'"; 972 case "Pegged.ANY": 973 return "any character"; 974 default: 975 return "unknow rule (" ~ p.matches[0] ~ ")"; 976 } 977 } 978 } 979 980 unittest // 'grammar' unit test: low-level functionalities 981 { 982 mixin(grammar(` 983 Test1: 984 Rule1 <- 'a' 985 Rule2 <- 'b' 986 `)); 987 988 assert(is(Test1 == struct), "A struct name Test1 is created."); 989 assert(is(typeof(Test1("a"))), "Test1 is callable with a string arg"); 990 assert(__traits(hasMember, Test1, "Rule1"), "Test1 has a member named Rule1."); 991 assert(__traits(hasMember, Test1, "Rule2"), "Test1 has a member named Rule2."); 992 assert(is(typeof(Test1.Rule1("a"))), "Test1.Rule1 is callable with a string arg"); 993 assert(is(typeof(Test1.Rule2("a"))), "Test1.Rule2 is callable with a string arg"); 994 995 assert(__traits(hasMember, Test1, "decimateTree"), "Test1 has a member named decimateTree."); 996 assert(__traits(hasMember, Test1, "name"), "Test1 has a member named name."); 997 assert(__traits(hasMember, Test1, "isRule"), "Test1 has a member named isRule."); 998 } 999 1000 unittest // 'grammar' unit test: PEG syntax 1001 { 1002 // Here we do not test pegged.peg.*, just the grammar transformations 1003 // From a PEG to a Pegged expression template. 1004 1005 mixin(grammar(` 1006 Terminals: 1007 Literal1 <- "abc" 1008 Literal2 <- 'abc' 1009 EmptyLiteral1 <- "" 1010 EmptyLiteral2 <- '' 1011 1012 Any <- . 1013 Eps <- eps 1014 Letter <- [a-z] 1015 Digit <- [0-9] 1016 ABC <- [abc] 1017 Alpha1 <- [a-zA-Z_] 1018 Alpha2 <- [_a-zA-Z] 1019 Chars1 <- [\0-\127] 1020 Chars2 <- [\x00-\xFF] 1021 Chars3 <- [\u0000-\u00FF] 1022 Chars4 <- [\U00000000-\U000000FF] 1023 `)); 1024 1025 ParseTree result = Terminals("abc"); 1026 1027 assert(result.name == "Terminals", "Grammar name test."); 1028 assert(result.children[0].name == "Terminals.Literal1", "First rule name test."); 1029 assert(result.begin == 0); 1030 assert(result.end == 3); 1031 assert(result.matches == ["abc"]); 1032 1033 ParseTree reference = Terminals.decimateTree(Terminals.Literal1("abc")); 1034 1035 assert(result.children[0] == reference, "Invoking a grammar is like invoking its first rule."); 1036 1037 assert(Terminals.Literal1("abc").successful, "Standard terminal test. Double quote syntax."); 1038 assert(Terminals.Literal2("abc").successful, "Standard terminal test. Simple quote syntax."); 1039 assert(Terminals.EmptyLiteral1("").successful , "Standard terminal test. Double quote syntax."); 1040 assert(Terminals.EmptyLiteral2("").successful, "Standard terminal test. Simple quote syntax."); 1041 1042 foreach(char c; char.min .. char.max) 1043 assert(Terminals.Any(""~c).successful, "Any terminal ('.') test."); 1044 1045 assert(Terminals.Eps("").successful, "Eps test."); 1046 assert(Terminals.Eps("abc").successful, "Eps test."); 1047 1048 string lower = "abcdefghijklmnopqrstuvwxyz"; 1049 string upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 1050 string under = "_"; 1051 string digits = "0123456789"; 1052 string others = "?./,;:!*&()[]<>"; 1053 1054 foreach(dchar dc; lower) 1055 assert(Terminals.Letter(to!string(dc)).successful); 1056 foreach(dchar dc; upper) 1057 assert(!Terminals.Letter(to!string(dc)).successful); 1058 foreach(dchar dc; digits) 1059 assert(!Terminals.Letter(to!string(dc)).successful); 1060 foreach(dchar dc; others) 1061 assert(!Terminals.Letter(to!string(dc)).successful); 1062 1063 foreach(dchar dc; lower) 1064 assert(!Terminals.Digit(to!string(dc)).successful); 1065 foreach(dchar dc; upper) 1066 assert(!Terminals.Digit(to!string(dc)).successful); 1067 foreach(dchar dc; digits) 1068 assert(Terminals.Digit(to!string(dc)).successful); 1069 foreach(dchar dc; others) 1070 assert(!Terminals.Letter(to!string(dc)).successful); 1071 1072 foreach(dchar dc; lower ~ upper ~ under) 1073 assert(Terminals.Alpha1(to!string(dc)).successful); 1074 foreach(dchar dc; digits ~ others) 1075 assert(!Terminals.Alpha1(to!string(dc)).successful); 1076 1077 foreach(dchar dc; lower ~ upper ~ under) 1078 assert(Terminals.Alpha2(to!string(dc)).successful); 1079 foreach(dchar dc; digits ~ others) 1080 assert(!Terminals.Alpha2(to!string(dc)).successful); 1081 1082 foreach(size_t index, dchar dc; lower ~ upper ~ under) 1083 assert( (index < 3 && Terminals.ABC(to!string(dc)).successful) 1084 || (index >= 3 && !Terminals.ABC(to!string(dc)).successful)); 1085 1086 foreach(dchar dc; 0..256) 1087 { 1088 string s = to!string(dc); 1089 if (dc <= '\127') 1090 assert(Terminals.Chars1(s).successful); 1091 else 1092 assert(!Terminals.Chars1(s).successful); 1093 1094 assert(Terminals.Chars2(s).successful); 1095 assert(Terminals.Chars3(s).successful); 1096 assert(Terminals.Chars4(s).successful); 1097 } 1098 1099 mixin(grammar(` 1100 Structure: 1101 Rule1 <- Rule2 / Rule3 / Rule4 # Or test 1102 Rule2 <- Rule3 Rule4 # And test 1103 Rule3 <- "abc" 1104 Rule4 <- "def" 1105 1106 Rule5 <- (Rule2 / Rule4) Rule3 # parenthesis test 1107 Rule6 <- Rule2 / Rule4 Rule3 1108 Rule7 <- Rule2 / (Rule4 Rule3) 1109 `)); 1110 1111 // Invoking Rule2 (and hence, Rule3 Rule4) 1112 result = Structure("abcdef"); 1113 1114 assert(result.successful, "Calling Rule2."); 1115 assert(result.name == "Structure", "Grammar name test."); 1116 assert(result.children[0].name == "Structure.Rule1", "First rule name test"); 1117 assert(result.children[0].children.length == 1); 1118 assert(result.children[0].children[0].name == "Structure.Rule2"); 1119 assert(result.children[0].children[0].children[0].name == "Structure.Rule3"); 1120 assert(result.children[0].children[0].children[1].name == "Structure.Rule4"); 1121 assert(result.matches == ["abc", "def"]); 1122 assert(result.begin ==0); 1123 assert(result.end == 6); 1124 1125 // Invoking Rule3 1126 result = Structure("abc"); 1127 1128 assert(result.successful, "Calling Rule3."); 1129 assert(result.name == "Structure", "Grammar name test."); 1130 assert(result.children[0].name == "Structure.Rule1", "First rule name test"); 1131 assert(result.children[0].children.length == 1); 1132 assert(result.children[0].children[0].name == "Structure.Rule3"); 1133 assert(result.children[0].children[0].children.length == 0); 1134 assert(result.matches == ["abc"]); 1135 assert(result.begin ==0); 1136 assert(result.end == 3); 1137 1138 // Invoking Rule4 1139 result = Structure("def"); 1140 1141 assert(result.successful, "Calling Rule2."); 1142 assert(result.name == "Structure", "Grammar name test."); 1143 assert(result.children[0].name == "Structure.Rule1", "First rule name test"); 1144 assert(result.children[0].children.length == 1); 1145 assert(result.children[0].children[0].name == "Structure.Rule4"); 1146 assert(result.children[0].children[0].children.length == 0); 1147 assert(result.matches == ["def"]); 1148 assert(result.begin ==0); 1149 assert(result.end == 3); 1150 1151 // Failure 1152 result =Structure("ab_def"); 1153 assert(!result.successful); 1154 assert(result.name == "Structure", "Grammar name test."); 1155 assert(result.begin == 0); 1156 assert(result.end == 0); 1157 1158 // Parenthesis test 1159 // Rule5 <- (Rule2 / Rule4) Rule3 1160 result = Structure.decimateTree(Structure.Rule5("abcdefabc")); 1161 assert(result.successful); 1162 assert(result.children.length == 2, "Two children: (Rule2 / Rule4), followed by Rule3."); 1163 assert(result.children[0].name == "Structure.Rule2"); 1164 assert(result.children[1].name == "Structure.Rule3"); 1165 1166 result = Structure.decimateTree(Structure.Rule5("defabc")); 1167 assert(result.successful); 1168 assert(result.children.length == 2, "Two children: (Rule2 / Rule4), followed by Rule3."); 1169 assert(result.children[0].name == "Structure.Rule4"); 1170 assert(result.children[1].name == "Structure.Rule3"); 1171 1172 // Rule6 <- Rule2 / Rule4 Rule3 1173 result = Structure.decimateTree(Structure.Rule6("abcdef")); 1174 assert(result.successful); 1175 assert(result.children.length == 1, "One child: Rule2."); 1176 assert(result.children[0].name == "Structure.Rule2"); 1177 1178 result = Structure.decimateTree(Structure.Rule6("defabc")); 1179 assert(result.successful); 1180 assert(result.children.length == 2, "Two children: Rule4, followed by Rule3."); 1181 assert(result.children[0].name == "Structure.Rule4"); 1182 assert(result.children[1].name == "Structure.Rule3"); 1183 1184 // Rule7 <- Rule2 / (Rule4 Rule3) 1185 // That is, like Rule6 1186 result = Structure.decimateTree(Structure.Rule7("abcdef")); 1187 assert(result.successful); 1188 assert(result.children.length == 1, "One child: Rule2."); 1189 assert(result.children[0].name == "Structure.Rule2"); 1190 1191 result = Structure.decimateTree(Structure.Rule7("defabc")); 1192 assert(result.successful); 1193 assert(result.children.length == 2, "Two children: Rule4, followed by Rule3."); 1194 assert(result.children[0].name == "Structure.Rule4"); 1195 assert(result.children[1].name == "Structure.Rule3"); 1196 1197 // Prefixes and Suffixes 1198 mixin(grammar(` 1199 PrefixSuffix: 1200 Rule1 <- &"abc" 1201 Rule2 <- !"abc" 1202 Rule3 <- "abc"? 1203 Rule4 <- "abc"* 1204 Rule5 <- "abc"+ 1205 `)); 1206 1207 // Verifying &"abc" creates a positive look-ahead construct 1208 result = PrefixSuffix.Rule1("abc"); 1209 reference = posLookahead!(literal!"abc")("abc"); 1210 1211 assert(result.matches == reference.matches); 1212 assert(result.begin == reference.begin); 1213 assert(result.end == reference.end); 1214 assert(result.children[0].children == reference.children); 1215 1216 result = PrefixSuffix.Rule1("def"); 1217 reference = posLookahead!(literal!"abc")("def"); 1218 1219 assert(result.matches == reference.matches); 1220 assert(result.begin == reference.begin); 1221 assert(result.end == reference.end); 1222 assert(result.children[0].children == reference.children); 1223 1224 1225 // Verifying !"abc" creates a negative look-ahead construct 1226 result = PrefixSuffix.Rule2("abc"); 1227 reference = negLookahead!(literal!"abc")("abc"); 1228 1229 assert(result.matches == reference.matches); 1230 assert(result.begin == reference.begin); 1231 assert(result.end == reference.end); 1232 assert(result.children[0].children == reference.children); 1233 1234 result = PrefixSuffix.Rule2("def"); 1235 reference = negLookahead!(literal!"abc")("def"); 1236 1237 assert(result.matches == reference.matches); 1238 assert(result.begin == reference.begin); 1239 assert(result.end == reference.end); 1240 assert(result.children[0].children == reference.children); 1241 1242 // Verifying "abc"? creates an optional construct 1243 result = PrefixSuffix.Rule3("abc"); 1244 reference = option!(literal!"abc")("abc"); 1245 1246 assert(result.matches == reference.matches); 1247 assert(result.begin == reference.begin); 1248 assert(result.end == reference.end); 1249 assert(result.children[0].children == reference.children); 1250 1251 result = PrefixSuffix.Rule3("def"); 1252 reference = option!(literal!"abc")("def"); 1253 1254 assert(result.matches == reference.matches); 1255 assert(result.begin == reference.begin); 1256 assert(result.end == reference.end); 1257 assert(result.children[0].children == reference.children); 1258 1259 // Verifying "abc"* creates a zero or more construct 1260 result = PrefixSuffix.Rule4(""); 1261 reference = zeroOrMore!(literal!"abc")(""); 1262 1263 assert(result.matches == reference.matches); 1264 assert(result.begin == reference.begin); 1265 assert(result.end == reference.end); 1266 assert(result.children[0].children == reference.children); 1267 1268 result = PrefixSuffix.Rule4("abc"); 1269 reference = zeroOrMore!(literal!"abc")("abc"); 1270 1271 assert(result.matches == reference.matches); 1272 assert(result.begin == reference.begin); 1273 assert(result.end == reference.end); 1274 assert(result.children[0].children == reference.children); 1275 1276 result = PrefixSuffix.Rule4("abcabc"); 1277 reference = zeroOrMore!(literal!"abc")("abcabc"); 1278 1279 assert(result.matches == reference.matches); 1280 assert(result.begin == reference.begin); 1281 assert(result.end == reference.end); 1282 assert(result.children[0].children == reference.children); 1283 1284 // Verifying "abc"+ creates a one or more construct 1285 result = PrefixSuffix.Rule5(""); 1286 reference = oneOrMore!(literal!"abc")(""); 1287 1288 assert(result.matches == reference.matches); 1289 assert(result.begin == reference.begin); 1290 assert(result.end == reference.end); 1291 assert(result.children[0].children == reference.children); 1292 1293 result = PrefixSuffix.Rule5("abc"); 1294 reference = oneOrMore!(literal!"abc")("abc"); 1295 1296 assert(result.matches == reference.matches); 1297 assert(result.begin == reference.begin); 1298 assert(result.end == reference.end); 1299 assert(result.children[0].children == reference.children); 1300 1301 result = PrefixSuffix.Rule5("abcabc"); 1302 reference = oneOrMore!(literal!"abc")("abcabc"); 1303 1304 assert(result.matches == reference.matches); 1305 assert(result.begin == reference.begin); 1306 assert(result.end == reference.end); 1307 assert(result.children[0].children == reference.children); 1308 1309 // Verifying that the case insensitive literal i suffix does not clash with a rule named i. 1310 mixin(grammar(` 1311 CaseInsensitive: 1312 S <- CI i 1313 CI <- "abc"i 1314 i <- "i" 1315 `)); 1316 assert(CaseInsensitive("aBci").successful); 1317 1318 // Verifying that ordinary literals are case sensitive. 1319 mixin(grammar(` 1320 CaseSensitive: 1321 S <- CS i 1322 CS <- "abc" 1323 i <- "i" 1324 `)); 1325 assert(!CaseSensitive("aBci").successful); 1326 assert(CaseSensitive("abci").successful); 1327 } 1328 1329 unittest // Multilines rules 1330 { 1331 mixin(grammar(` 1332 Indentation: 1333 Rule1 < 1334 'a' 1335 'b' 1336 'c' 1337 Rule2 1338 <- 1339 'd' 1340 Rule3 1341 < 1342 'e' 1343 Rule4 <- 'f' Rule5 # Rule4 ends with 'f', then it's Rule5 1344 <- 'g' 1345 1346 1347 1348 1349 'h' 1350 `)); 1351 1352 1353 assert(Indentation("abc").successful); 1354 assert(Indentation.Rule2("d").successful); 1355 assert(Indentation.Rule3("e").successful); 1356 assert(Indentation.Rule4("f").successful); 1357 assert(Indentation.Rule5("gh").successful); 1358 } 1359 1360 unittest // Parsing at compile-time 1361 { 1362 mixin(grammar(` 1363 Test: 1364 Rule1 <- 'a' Rule2('b') 1365 Rule2(B) <- B 1366 `)); 1367 1368 // Equality on success 1369 ParseTree result = Test("ab"); 1370 1371 enum CTsuccess = Test("ab"); 1372 1373 assert(CTsuccess == result, "Compile-time parsing is equal to runtime parsing on success."); 1374 1375 // Equality on failure 1376 result = Test("ac"); 1377 enum CTfailure = Test("ac"); 1378 1379 assert(CTfailure == result, "Compile-time parsing is equal to runtime parsing on failure."); 1380 } 1381 1382 unittest // PEG extensions (arrows, prefixes, suffixes) 1383 { 1384 mixin(grammar(` 1385 Arrows: 1386 Rule1 <- ABC DEF # Standard arrow 1387 Rule2 < ABC DEF # Space arrow 1388 Rule3 < ABC DEF* # Space arrow 1389 Rule4 < ABC+ DEF # Space arrow 1390 1391 Rule5 <- ABC* 1392 Rule6 <~ ABC* # Fuse arrow 1393 1394 Rule7 <: ABC DEF # Discard arrow 1395 Rule8 <^ ABC DEF # Keep arrow 1396 Rule9 <; ABC DEF # Drop arrow 1397 Rule10 <% ABC Rule1 DEF # Propagate arrow 1398 1399 ABC <- "abc" 1400 DEF <- "def" 1401 `)); 1402 1403 // Comparing <- ABC DEF and < ABC DEF 1404 ParseTree result = Arrows.decimateTree(Arrows.Rule1("abcdef")); 1405 assert(result.successful); 1406 assert(result.begin == 0); 1407 assert(result.end ==6); 1408 assert(result.matches == ["abc", "def"]); 1409 assert(result.children.length == 2); 1410 assert(result.children[0].name == "Arrows.ABC"); 1411 assert(result.children[1].name == "Arrows.DEF"); 1412 1413 result = Arrows.decimateTree(Arrows.Rule1("abc def")); 1414 assert(!result.successful); 1415 1416 result = Arrows.decimateTree(Arrows.Rule2("abcdef")); 1417 assert(result.successful); 1418 assert(result.begin == 0); 1419 assert(result.end == 6); 1420 assert(result.matches == ["abc", "def"]); 1421 assert(result.children.length == 2); 1422 assert(result.children[0].name == "Arrows.ABC"); 1423 assert(result.children[1].name == "Arrows.DEF"); 1424 1425 result = Arrows.decimateTree(Arrows.Rule2("abc def ")); 1426 assert(result.successful, "space arrows consume spaces."); 1427 assert(result.begin == 0); 1428 assert(result.end == "abc def ".length, "The entire input is parsed."); 1429 assert(result.matches == ["abc", "def"]); 1430 assert(result.children.length == 2); 1431 assert(result.children[0].name == "Arrows.ABC"); 1432 assert(result.children[1].name == "Arrows.DEF"); 1433 1434 result = Arrows.decimateTree(Arrows.Rule3("abcdefdef")); 1435 assert(result.successful); 1436 assert(result.begin == 0); 1437 assert(result.end == "abcdefdef".length); 1438 assert(result.matches == ["abc", "def", "def"]); 1439 assert(result.children.length == 3); 1440 assert(result.children[0].name == "Arrows.ABC"); 1441 assert(result.children[1].name == "Arrows.DEF"); 1442 assert(result.children[2].name == "Arrows.DEF"); 1443 1444 result = Arrows.decimateTree(Arrows.Rule3("abc def defdef")); 1445 assert(result.successful, "space arrows consume spaces."); 1446 assert(result.begin == 0); 1447 assert(result.end == "abc def defdef".length, "The entire input is parsed."); 1448 assert(result.matches == ["abc", "def", "def", "def"]); 1449 assert(result.children.length == 4); 1450 assert(result.children[0].name == "Arrows.ABC"); 1451 assert(result.children[1].name == "Arrows.DEF"); 1452 assert(result.children[2].name == "Arrows.DEF"); 1453 assert(result.children[3].name == "Arrows.DEF"); 1454 1455 result = Arrows.decimateTree(Arrows.Rule4("abcabcdef")); 1456 assert(result.successful); 1457 assert(result.begin == 0); 1458 assert(result.end == "abcabcdef".length); 1459 assert(result.matches == ["abc", "abc", "def"]); 1460 assert(result.children.length == 3); 1461 assert(result.children[0].name == "Arrows.ABC"); 1462 assert(result.children[1].name == "Arrows.ABC"); 1463 assert(result.children[2].name == "Arrows.DEF"); 1464 1465 result = Arrows.decimateTree(Arrows.Rule4(" abc abcabc def ")); 1466 assert(result.successful, "space arrows consume spaces."); 1467 assert(result.begin == 0); 1468 assert(result.end == " abc abcabc def ".length, "The entire input is parsed."); 1469 assert(result.matches == ["abc", "abc", "abc", "def"]); 1470 assert(result.children.length == 4); 1471 assert(result.children[0].name == "Arrows.ABC"); 1472 assert(result.children[1].name == "Arrows.ABC"); 1473 assert(result.children[2].name == "Arrows.ABC"); 1474 assert(result.children[3].name == "Arrows.DEF"); 1475 1476 //Comparing <- ABC* and <~ ABC* 1477 result = Arrows.decimateTree(Arrows.Rule5("abcabcabc")); 1478 assert(result.successful); 1479 assert(result.begin == 0); 1480 assert(result.end == "abcabcabc".length, "The entire input is parsed."); 1481 assert(result.matches == ["abc", "abc", "abc"]); 1482 assert(result.children.length == 3, "With the * operator, all children are kept."); 1483 assert(result.children[0].name == "Arrows.ABC"); 1484 assert(result.children[1].name == "Arrows.ABC"); 1485 assert(result.children[2].name == "Arrows.ABC"); 1486 1487 result = Arrows.decimateTree(Arrows.Rule6("abcabcabc")); 1488 assert(result.successful); 1489 assert(result.begin == 0); 1490 assert(result.end == "abcabcabc".length, "The entire input is parsed."); 1491 assert(result.matches == ["abcabcabc"], "Matches are fused."); 1492 assert(result.children.length == 0, "The <~ arrow cuts children."); 1493 1494 // Comparing <- ABC DEF and <: ABC DEF 1495 result = Arrows.decimateTree(Arrows.Rule7("abcdef")); 1496 assert(result.successful); 1497 assert(result.begin == 0); 1498 assert(result.end == "abcdef".length, "The entire input is parsed."); 1499 assert(result.matches is null, "No match with the discard arrow."); 1500 assert(result.children.length == 0, "No children with the discard arrow."); 1501 1502 // Comparing <- ABC DEF and <^ ABC DEF 1503 //But <^ is not very useful anyways. It does not distribute ^ among the subrules. 1504 result = Arrows.decimateTree(Arrows.Rule8("abcdef")); 1505 assert(result.successful); 1506 assert(result.begin == 0); 1507 assert(result.end == "abcdef".length, "The entire input is parsed."); 1508 assert(result.matches == ["abc", "def"]); 1509 assert(result.children[0].children.length == 2); 1510 1511 // Comparing <- ABC DEF and <; ABC DEF 1512 result = Arrows.decimateTree(Arrows.Rule9("abcdef")); 1513 assert(result.successful); 1514 assert(result.begin == 0); 1515 assert(result.end == "abcdef".length, "The entire input is parsed."); 1516 assert(result.matches == ["abc", "def"], "The drop arrow keeps the matches."); 1517 assert(result.children.length == 0, "The drop arrow drops the children."); 1518 1519 // Comparing <- ABC DEF and <% ABC Rule1 DEF 1520 //But <% is not very useful anyways. It does not distribute % among the subrules. 1521 result = Arrows.decimateTree(Arrows.Rule10("abcabcdefdef")); 1522 assert(result.successful); 1523 assert(result.begin == 0); 1524 assert(result.end == "abcabcdefdef".length, "The entire input is parsed."); 1525 assert(result.matches == ["abc", "abc", "def", "def"]); 1526 assert(result.children.length == 3); 1527 assert(result.children[0].name == "Arrows.ABC"); 1528 assert(result.children[1].name == "Arrows.Rule1", "No rule replacement by its own children. See % for that."); 1529 assert(result.children[2].name == "Arrows.DEF"); 1530 } 1531 1532 unittest //More space arrow tests 1533 { 1534 mixin(grammar(` 1535 Spaces: 1536 Rule1 < A (B C)+ 1537 A <- 'a' 1538 B <- 'b' 1539 C <- 'c' 1540 `)); 1541 1542 ParseTree result = Spaces.decimateTree(Spaces.Rule1("abcbc")); 1543 1544 assert(result.successful); 1545 assert(result.begin == 0); 1546 assert(result.end == "abcbc".length); 1547 assert(result.children.length == 5); 1548 assert(result.children[0].name == "Spaces.A"); 1549 assert(result.children[1].name == "Spaces.B"); 1550 assert(result.children[2].name == "Spaces.C"); 1551 assert(result.children[3].name == "Spaces.B"); 1552 assert(result.children[4].name == "Spaces.C"); 1553 1554 result = Spaces.decimateTree(Spaces.Rule1(" a bc b c ")); 1555 1556 assert(result.successful); 1557 assert(result.begin == 0); 1558 assert(result.end == " a bc b c ".length); 1559 assert(result.children.length == 5); 1560 assert(result.children[0].name == "Spaces.A"); 1561 assert(result.children[1].name == "Spaces.B"); 1562 assert(result.children[2].name == "Spaces.C"); 1563 assert(result.children[3].name == "Spaces.B"); 1564 assert(result.children[4].name == "Spaces.C"); 1565 } 1566 1567 unittest // Prefix and suffix tests 1568 { 1569 mixin(grammar(` 1570 PrefixSuffix: 1571 # Reference 1572 Rule1 <- ABC DEF 1573 Rule2 <- "abc" "def" 1574 Rule3 <- ABC* 1575 Rule4 <- "abc"* 1576 1577 # Discard operator 1578 Rule5 <- :ABC DEF 1579 Rule6 <- ABC :DEF 1580 Rule7 <- :"abc" "def" 1581 Rule8 <- "abc" :"def" 1582 1583 # Drop operator 1584 Rule9 <- ;ABC DEF 1585 Rule10 <- ABC ;DEF 1586 Rule11 <- ;"abc" "def" 1587 Rule12 <- "abc" ;"def" 1588 1589 # Fuse operator 1590 1591 Rule13 <- ~( ABC* ) 1592 Rule14 <- ~("abc"*) 1593 1594 # Keep operator 1595 Rule15 <- ^"abc" ^"def" 1596 1597 # Propagate operator 1598 Rule16 <- ABC Rule1 DEF 1599 Rule17 <- ABC %Rule1 DEF 1600 1601 1602 ABC <- "abc" 1603 DEF <- "def" 1604 `)); 1605 1606 1607 // Comparing standard and discarded rules 1608 auto result = PrefixSuffix.decimateTree(PrefixSuffix.Rule1("abcdef")); 1609 1610 assert(result.successful); 1611 assert(result.begin == 0); 1612 assert(result.end == 6); 1613 assert(result.matches == ["abc", "def"]); 1614 assert(result.children.length == 2); 1615 assert(result.children[0].name == "PrefixSuffix.ABC"); 1616 assert(result.children[1].name == "PrefixSuffix.DEF"); 1617 1618 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule5("abcdef")); 1619 1620 assert(result.successful); 1621 assert(result.begin == 0); 1622 assert(result.end == 6); 1623 assert(result.matches == ["def"]); 1624 assert(result.children.length == 1, "The first child is discarded."); 1625 assert(result.children[0].name == "PrefixSuffix.DEF"); 1626 1627 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule6("abcdef")); 1628 1629 assert(result.successful); 1630 assert(result.begin == 0); 1631 assert(result.end == 6); 1632 assert(result.matches == ["abc"]); 1633 assert(result.children.length == 1, "The second child is discarded."); 1634 assert(result.children[0].name == "PrefixSuffix.ABC"); 1635 1636 1637 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule2("abcdef")); 1638 1639 assert(result.successful); 1640 assert(result.begin == 0); 1641 assert(result.end == 6); 1642 assert(result.matches == ["abc", "def"]); 1643 assert(result.children.length == 0, "Literals do not create children."); 1644 1645 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule7("abcdef")); 1646 1647 assert(result.successful); 1648 assert(result.begin == 0); 1649 assert(result.end == 6); 1650 assert(result.matches == ["def"]); 1651 assert(result.children.length == 0); 1652 1653 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule8("abcdef")); 1654 1655 assert(result.successful); 1656 assert(result.begin == 0); 1657 assert(result.end == 6); 1658 assert(result.matches == ["abc"]); 1659 assert(result.children.length == 0); 1660 1661 // Comparing standard and dropped rules 1662 1663 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule9("abcdef")); 1664 1665 assert(result.successful); 1666 assert(result.begin == 0); 1667 assert(result.end == 6); 1668 assert(result.matches == ["abc", "def"], "All matches are there."); 1669 assert(result.children.length == 1, "The first child is discarded."); 1670 assert(result.children[0].name == "PrefixSuffix.DEF"); 1671 1672 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule10("abcdef")); 1673 1674 assert(result.successful); 1675 assert(result.begin == 0); 1676 assert(result.end == 6); 1677 assert(result.matches == ["abc", "def"], "All matches are there."); 1678 assert(result.children.length == 1, "The second child is discarded."); 1679 assert(result.children[0].name == "PrefixSuffix.ABC"); 1680 1681 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule11("abcdef")); 1682 1683 assert(result.successful); 1684 assert(result.begin == 0); 1685 assert(result.end == 6); 1686 assert(result.matches == ["abc", "def"], "All matches are there."); 1687 assert(result.children.length == 0); 1688 1689 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule12("abcdef")); 1690 1691 assert(result.successful); 1692 assert(result.begin == 0); 1693 assert(result.end == 6); 1694 assert(result.matches == ["abc", "def"], "All matches are there."); 1695 assert(result.children.length == 0); 1696 1697 1698 // Comparing standard and fused rules 1699 1700 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule3("abcabcabc")); 1701 1702 assert(result.successful); 1703 assert(result.begin == 0); 1704 assert(result.end == "abcabcabc".length); 1705 assert(result.matches == ["abc", "abc", "abc"]); 1706 assert(result.children.length == 3, "Standard '*': 3 children."); 1707 assert(result.children[0].name == "PrefixSuffix.ABC"); 1708 assert(result.children[1].name == "PrefixSuffix.ABC"); 1709 assert(result.children[2].name == "PrefixSuffix.ABC"); 1710 1711 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule4("abcabcabc")); 1712 1713 assert(result.successful); 1714 assert(result.begin == 0); 1715 assert(result.end == "abcabcabc".length); 1716 assert(result.matches == ["abc", "abc", "abc"]); 1717 assert(result.children.length == 0, "All literals are discarded by the tree decimation."); 1718 1719 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule13("abcabcabc")); 1720 1721 assert(result.successful); 1722 assert(result.begin == 0); 1723 assert(result.end == "abcabcabc".length); 1724 assert(result.matches == ["abcabcabc"], "All matches are fused."); 1725 assert(result.children.length == 0, "Children are discarded by '~'."); 1726 1727 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule14("abcabcabc")); 1728 1729 assert(result.successful); 1730 assert(result.begin == 0); 1731 assert(result.end == "abcabcabc".length); 1732 assert(result.matches == ["abcabcabc"], "All matches are there."); 1733 assert(result.children.length == 0, "Children are discarded by '~'."); 1734 1735 // Testing the keep (^) operator 1736 1737 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule15("abcdef")); 1738 1739 assert(result.successful); 1740 assert(result.begin == 0); 1741 assert(result.end == "abcdef".length); 1742 assert(result.matches == ["abc", "def"], "All matches are there."); 1743 assert(result.children.length == 2, "Both children are kept by '^'."); 1744 assert(result.children[0].name == `literal!("abc")`, 1745 `literal!("abc") is kept even though it's not part of the grammar rules.`); 1746 assert(result.children[1].name == `literal!("def")`, 1747 `literal!("def") is kept even though it's not part of the grammar rules.`); 1748 1749 // Comparing standard and propagated (%) rules. 1750 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule16("abcabcdefdef")); 1751 1752 assert(result.successful); 1753 assert(result.begin == 0); 1754 assert(result.end == "abcabcdefdef".length); 1755 assert(result.matches == ["abc", "abc", "def", "def"], "All matches are there."); 1756 assert(result.children.length == 3, "Standard rule: three children."); 1757 assert(result.children[0].name == "PrefixSuffix.ABC"); 1758 assert(result.children[1].name == "PrefixSuffix.Rule1"); 1759 assert(result.children[1].children.length == 2, "Rule1 creates two children."); 1760 assert(result.children[1].children[0].name, "PrefixSuffix.ABC"); 1761 assert(result.children[1].children[1].name, "PrefixSuffix.DEF"); 1762 assert(result.children[2].name == "PrefixSuffix.DEF"); 1763 1764 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule17("abcabcdefdef")); 1765 1766 // From (ABC, Rule1(ABC,DEF), DEF) to (ABC,ABC,DEF,DEF) 1767 assert(result.successful); 1768 assert(result.begin == 0); 1769 assert(result.end == "abcabcdefdef".length); 1770 assert(result.matches == ["abc", "abc", "def", "def"], "All matches are there."); 1771 assert(result.children.length == 4, "%-affected rule: four children."); 1772 assert(result.children[0].name == "PrefixSuffix.ABC"); 1773 assert(result.children[1].name == "PrefixSuffix.ABC"); 1774 assert(result.children[2].name == "PrefixSuffix.DEF"); 1775 assert(result.children[2].name == "PrefixSuffix.DEF"); 1776 1777 // Testing % and < together 1778 mixin(grammar(` 1779 PropTest: 1780 Rule1 < B C+ 1781 Rule2 <- B (%C)+ 1782 Rule3 < B (%C)+ 1783 Rule4 < B %(D E)+ 1784 1785 B <- 'b' 1786 C <- D E 1787 D <- 'd' 1788 E <- 'e' 1789 `)); 1790 1791 result = PropTest.decimateTree(PropTest.Rule1("bdedede")); 1792 assert(result.successful); 1793 assert(result.begin == 0); 1794 assert(result.end == "bdedede".length); 1795 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1796 assert(result.children.length == 4, "b and de, de, de"); 1797 assert(result.children[0].name == "PropTest.B"); 1798 assert(result.children[1].name == "PropTest.C"); 1799 assert(result.children[2].name == "PropTest.C"); 1800 assert(result.children[3].name == "PropTest.C"); 1801 1802 result = PropTest.decimateTree(PropTest.Rule2("bdedede")); 1803 assert(result.successful); 1804 assert(result.begin == 0); 1805 assert(result.end == "bdedede".length); 1806 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1807 assert(result.children.length == 7, "b and (d and e), thrice."); 1808 assert(result.children[0].name == "PropTest.B"); 1809 assert(result.children[1].name == "PropTest.D"); 1810 assert(result.children[2].name == "PropTest.E"); 1811 assert(result.children[3].name == "PropTest.D"); 1812 assert(result.children[4].name == "PropTest.E"); 1813 assert(result.children[5].name == "PropTest.D"); 1814 assert(result.children[6].name == "PropTest.E"); 1815 1816 result = PropTest.decimateTree(PropTest.Rule3("bdedede")); 1817 assert(result.successful); 1818 assert(result.begin == 0); 1819 assert(result.end == "bdedede".length); 1820 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1821 assert(result.children.length == 7, "b and (d and e), thrice."); 1822 assert(result.children[0].name == "PropTest.B"); 1823 assert(result.children[1].name == "PropTest.D"); 1824 assert(result.children[2].name == "PropTest.E"); 1825 assert(result.children[3].name == "PropTest.D"); 1826 assert(result.children[4].name == "PropTest.E"); 1827 assert(result.children[5].name == "PropTest.D"); 1828 assert(result.children[6].name == "PropTest.E"); 1829 1830 result = PropTest.decimateTree(PropTest.Rule3(" b de de de ")); 1831 assert(result.successful); 1832 assert(result.begin == 0); 1833 assert(result.end == " b de de de ".length); 1834 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1835 assert(result.children.length == 7, "b and (d and e), thrice."); 1836 assert(result.children[0].name == "PropTest.B"); 1837 assert(result.children[1].name == "PropTest.D"); 1838 assert(result.children[2].name == "PropTest.E"); 1839 assert(result.children[3].name == "PropTest.D"); 1840 assert(result.children[4].name == "PropTest.E"); 1841 assert(result.children[5].name == "PropTest.D"); 1842 assert(result.children[6].name == "PropTest.E"); 1843 1844 result = PropTest.decimateTree(PropTest.Rule4("bdedede")); 1845 assert(result.successful); 1846 assert(result.begin == 0); 1847 assert(result.end == "bdedede".length); 1848 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1849 assert(result.children.length == 7, "b and (d and e), thrice."); 1850 assert(result.children[0].name == "PropTest.B"); 1851 assert(result.children[1].name == "PropTest.D"); 1852 assert(result.children[2].name == "PropTest.E"); 1853 assert(result.children[3].name == "PropTest.D"); 1854 assert(result.children[4].name == "PropTest.E"); 1855 assert(result.children[5].name == "PropTest.D"); 1856 assert(result.children[6].name == "PropTest.E"); 1857 1858 result = PropTest.decimateTree(PropTest.Rule4(" b de de de ")); 1859 assert(result.successful); 1860 assert(result.begin == 0); 1861 assert(result.end == " b de de de ".length); 1862 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1863 assert(result.children.length == 7, "b and (d and e), thrice."); 1864 assert(result.children[0].name == "PropTest.B"); 1865 assert(result.children[1].name == "PropTest.D"); 1866 assert(result.children[2].name == "PropTest.E"); 1867 assert(result.children[3].name == "PropTest.D"); 1868 assert(result.children[4].name == "PropTest.E"); 1869 assert(result.children[5].name == "PropTest.D"); 1870 assert(result.children[6].name == "PropTest.E"); 1871 1872 // More than one prefix, more than one suffixes 1873 mixin(grammar(` 1874 MoreThanOne: 1875 Rule1 <- ~:("abc"*) # Two prefixes (nothing left for ~, after :) 1876 Rule2 <- :~("abc"*) # Two prefixes (: will discard everything ~ did) 1877 Rule3 <- ;:~"abc" # Many prefixes 1878 Rule4 <- ~~~("abc"*) # Many fuses (no global effect) 1879 1880 Rule5 <- "abc"+* # Many suffixes 1881 Rule6 <- "abc"+? # Many suffixes 1882 1883 Rule7 <- !!"abc" # Double negation, equivalent to '&' 1884 Rule8 <- &"abc" 1885 1886 Rule9 <- ^^"abc"+* # Many suffixes and prefixes 1887 `)); 1888 1889 assert(is(MoreThanOne), "This compiles all right."); 1890 1891 result = MoreThanOne.decimateTree(MoreThanOne.Rule1("abcabcabc")); 1892 assert(result.successful); 1893 assert(result.begin == 0); 1894 assert(result.end == "abcabcabc".length); 1895 assert(result.matches is null); 1896 assert(result.children.length == 0); 1897 1898 result = MoreThanOne.decimateTree(MoreThanOne.Rule2("abcabcabc")); 1899 assert(result.successful); 1900 assert(result.begin == 0); 1901 assert(result.end == "abcabcabc".length); 1902 assert(result.matches is null); 1903 assert(result.children.length == 0); 1904 1905 result = MoreThanOne.decimateTree(MoreThanOne.Rule3("abcabcabc")); 1906 assert(result.successful); 1907 assert(result.begin == 0); 1908 assert(result.end == "abc".length); 1909 assert(result.matches is null); 1910 assert(result.children.length == 0); 1911 1912 result = MoreThanOne.decimateTree(MoreThanOne.Rule4("abcabcabc")); 1913 assert(result.successful); 1914 assert(result.begin == 0); 1915 assert(result.end == "abcabcabc".length); 1916 assert(result.matches == ["abcabcabc"]); 1917 assert(result.children.length == 0); 1918 1919 // +* and +? 1920 result = MoreThanOne.decimateTree(MoreThanOne.Rule5("abcabcabc")); 1921 assert(result.successful); 1922 assert(result.begin == 0); 1923 assert(result.end == "abcabcabc".length); 1924 assert(result.matches == ["abc", "abc", "abc"]); 1925 assert(result.children.length == 0); 1926 1927 result = MoreThanOne.decimateTree(MoreThanOne.Rule6("abcabcabc")); 1928 assert(result.successful); 1929 assert(result.begin == 0); 1930 assert(result.end == "abcabcabc".length); 1931 assert(result.matches == ["abc", "abc", "abc"]); 1932 assert(result.children.length == 0); 1933 1934 // !! is equivalent to & 1935 result = MoreThanOne.decimateTree(MoreThanOne.Rule7("abc")); 1936 assert(result.successful); 1937 assert(result.begin == 0); 1938 assert(result.end == 0); 1939 assert(result.matches is null); 1940 assert(result.children.length == 0); 1941 1942 result = MoreThanOne.decimateTree(MoreThanOne.Rule8("abc")); 1943 assert(result.successful); 1944 assert(result.begin == 0); 1945 assert(result.end == 0); 1946 assert(result.matches is null); 1947 assert(result.children.length == 0); 1948 1949 // ^^"abc"+* 1950 result = MoreThanOne.decimateTree(MoreThanOne.Rule9("abcabcabc")); 1951 assert(result.successful); 1952 assert(result.begin == 0); 1953 assert(result.end == 9); 1954 assert(result.matches == ["abc", "abc", "abc"]); 1955 assert(result.children.length == 1); 1956 assert(result.name == `MoreThanOne.Rule9`); 1957 assert(result.children[0].name == `keep!(zeroOrMore!(oneOrMore!(literal!("abc"))))`); 1958 assert(result.children[0].children.length == 1); 1959 assert(result.children[0].children[0].name == `zeroOrMore!(oneOrMore!(literal!("abc")))`); 1960 assert(result.children[0].children[0].children.length == 1); 1961 assert(result.children[0].children[0].children[0].name == `oneOrMore!(literal!("abc"))`); 1962 assert(result.children[0].children[0].children[0].children.length == 3); 1963 assert(result.children[0].children[0].children[0].children[0].name == `literal!("abc")`); 1964 assert(result.children[0].children[0].children[0].children[1].name == `literal!("abc")`); 1965 assert(result.children[0].children[0].children[0].children[2].name == `literal!("abc")`); 1966 } 1967 1968 unittest // Issue #88 unit test 1969 { 1970 enum gram = ` 1971 P: 1972 Rule1 <- (w 'a' w)* 1973 Rule2 <- (wx 'a' wx)* 1974 w <- :(' ' / '\n' / '\t' / '\r')* 1975 wx <- (:' ' / '\n' / '\t' / '\r')* 1976 `; 1977 1978 mixin(grammar(gram)); 1979 1980 string input = " a a a a a a "; 1981 1982 ParseTree p1 = P.decimateTree(P.Rule1(input)); 1983 ParseTree p2 = P.decimateTree(P.Rule2(input)); 1984 assert(softCompare(p1,p2)); 1985 1986 input = " a\n \011\012 a\n\t a\x09\x0A a "; 1987 p1 = P.decimateTree(P.Rule1(input)); 1988 p2 = P.decimateTree(P.Rule2(input)); 1989 assert(p1.end == input.length); // Parse the entire string 1990 assert(p2.end == input.length); 1991 } 1992 1993 unittest // Leading alternation 1994 { 1995 mixin(grammar(` 1996 LeadingAlternation: 1997 Rule1 <- / 'a' 1998 Rule2 <- / 'a' / 'b' 1999 Rule3 <- (/ 'a' / 'b') 2000 `)); 2001 2002 ParseTree result = LeadingAlternation.decimateTree(LeadingAlternation.Rule1("a")); 2003 assert(result.successful); 2004 assert(result.begin == 0); 2005 assert(result.end == 1); 2006 assert(result.matches == ["a"]); 2007 2008 result = LeadingAlternation.decimateTree(LeadingAlternation.Rule2("b")); 2009 assert(result.successful); 2010 assert(result.begin == 0); 2011 assert(result.end == 1); 2012 assert(result.matches == ["b"]); 2013 2014 result = LeadingAlternation.decimateTree(LeadingAlternation.Rule3("b")); 2015 assert(result.successful); 2016 assert(result.begin == 0); 2017 assert(result.end == 1); 2018 assert(result.matches == ["b"]); 2019 } 2020 2021 unittest // Extended chars tests 2022 { 2023 mixin(grammar(" 2024 Chars: 2025 # Lone chars 2026 Rule1 <- '\t' '0' 'A' '~' 2027 Rule2 <- '\11' '\60' '\101' '\176' # \t 0 A ~ in octal 2028 Rule3 <- '\011' '\060' '\101' '\176' # \t 0 A ~ in octal (prefix 0) 2029 Rule4 <- '\x09' '\x30' '\x41' '\x7E' # \t 0 A ~ in hexadecimal 2030 Rule5 <- '\u0009' '\u0030' '\u0041' '\u007E' # \t 0 A ~ in unicode 2031 Rule6 <- '\U00000009' '\U00000030' '\U00000041' '\U0000007E' # \t 0 A ~ in unicode 2032 2033 # Strings literals 2034 Rule7 <- '\t0A~' 2035 Rule8 <- '\11\60\101\176' # \t 0 A ~ in octal 2036 Rule9 <- '\011\060\101\176' # \t 0 A ~ in octal (prefix 0) 2037 Rule10 <- '\x09\x30\x41\x7E' # \t 0 A ~ in hexadecimal 2038 Rule11 <- '\u0009\u0030\u0041\u007E' # \t 0 A ~ in unicode 2039 Rule12 <- '\U00000009\U00000030\U00000041' '\U0000007E' # \t 0 A ~ in unicode 2040 2041 # Outside Latin 2042 Rule13 <- '\u03B1\u03B9\u03C6\u03B1' # alpha in greek 2043 Rule14 <- 'αιφα' 2044 2045 # Hello's 2046 English <- 'Hello' 2047 Russian <- 'Здравствуйте' 2048 Arabic <- 'السلام عليك' 2049 Chinese <- '你好' 2050 Japanese <- '今日は' 2051 Spanish <- '¡Hola!' 2052 ")); 2053 2054 2055 assert(Chars.decimateTree(Chars.Rule1("\t0A~")).successful); 2056 assert(Chars.decimateTree(Chars.Rule2("\t0A~")).successful); 2057 assert(Chars.decimateTree(Chars.Rule3("\t0A~")).successful); 2058 assert(Chars.decimateTree(Chars.Rule4("\t0A~")).successful); 2059 assert(Chars.decimateTree(Chars.Rule5("\t0A~")).successful); 2060 assert(Chars.decimateTree(Chars.Rule6("\t0A~")).successful); 2061 2062 assert(Chars.decimateTree(Chars.Rule7("\t0A~")).successful); 2063 assert(Chars.decimateTree(Chars.Rule8("\t0A~")).successful); 2064 assert(Chars.decimateTree(Chars.Rule9("\t0A~")).successful); 2065 assert(Chars.decimateTree(Chars.Rule10("\t0A~")).successful); 2066 assert(Chars.decimateTree(Chars.Rule11("\t0A~")).successful); 2067 assert(Chars.decimateTree(Chars.Rule12("\t0A~")).successful); 2068 2069 assert(Chars.decimateTree(Chars.Rule13("\u03B1\u03B9\u03C6\u03B1")).successful); 2070 assert(Chars.decimateTree(Chars.Rule13("αιφα")).successful); 2071 2072 assert(Chars.decimateTree(Chars.Rule14("\u03B1\u03B9\u03C6\u03B1")).successful); 2073 assert(Chars.decimateTree(Chars.Rule14("αιφα")).successful); 2074 2075 assert(Chars.decimateTree(Chars.English("Hello")).successful); 2076 assert(Chars.decimateTree(Chars.Russian("Здравствуйте")).successful); 2077 assert(Chars.decimateTree(Chars.Arabic("السلام عليك")).successful); 2078 assert(Chars.decimateTree(Chars.Chinese("你好")).successful); 2079 assert(Chars.decimateTree(Chars.Japanese("今日は'")).successful); 2080 assert(Chars.decimateTree(Chars.Spanish("¡Hola!")).successful); 2081 } 2082 2083 unittest // Extended char range tests 2084 { 2085 import std.conv; 2086 2087 mixin(grammar(` 2088 CharRanges: 2089 Rule1 <- [a-z] 2090 Rule2 <- [\141-\172] # a-z in octal 2091 Rule3 <- [\x61-\x7A] # a-z in hexadecimal 2092 Rule4 <- [\u0061-\u007A] # a-z in UTF16 2093 Rule5 <- [\U00000061-\U0000007A] # a-z in UTF32 2094 2095 Rule6 <- [\-\[\]\\\'\"\n\r\t] 2096 `)); 2097 2098 string lower = "abcdefghijklmnopqrstuvwxyz"; 2099 string upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 2100 string digits = "0123456789"; 2101 string others = "?./,;:!*&()[]<>"; 2102 string escapes = "-[]\\\'\"\n\r\t"; 2103 2104 foreach(dchar c; lower) 2105 { 2106 assert(CharRanges.Rule1(to!string(c)).successful); 2107 assert(CharRanges.Rule2(to!string(c)).successful); 2108 assert(CharRanges.Rule3(to!string(c)).successful); 2109 assert(CharRanges.Rule4(to!string(c)).successful); 2110 assert(CharRanges.Rule5(to!string(c)).successful); 2111 } 2112 2113 foreach(dchar c; upper ~ digits ~ others) 2114 { 2115 assert(!CharRanges.Rule1(to!string(c)).successful); 2116 assert(!CharRanges.Rule2(to!string(c)).successful); 2117 assert(!CharRanges.Rule3(to!string(c)).successful); 2118 assert(!CharRanges.Rule4(to!string(c)).successful); 2119 assert(!CharRanges.Rule5(to!string(c)).successful); 2120 } 2121 2122 foreach(dchar c; escapes) 2123 { 2124 assert(CharRanges.Rule6(to!string(c)).successful); 2125 } 2126 } 2127 2128 unittest // qualified names for rules 2129 { 2130 mixin(grammar(` 2131 First: 2132 Rule1 <- "abc" 2133 Rule2 <- "def" 2134 `)); 2135 2136 mixin(grammar(` 2137 Second: 2138 Rule1 <- First.Rule1 2139 Rule2 <- First.Rule2 2140 Rule3 <- pegged.peg.list(pegged.peg.identifier, ',') 2141 `)); 2142 2143 // Equal on success 2144 ParseTree reference = First("abc"); 2145 ParseTree result = Second("abc"); 2146 assert(reference.successful); 2147 assert(result.successful); 2148 assert(result.matches == reference.matches); 2149 assert(result.begin == reference.begin); 2150 assert(result.end == reference.end); 2151 2152 // Equal on failure 2153 reference = First("def"); 2154 result = Second("def"); 2155 assert(!reference.successful); 2156 assert(!result.successful); 2157 assert(result.matches == reference.matches); 2158 assert(result.begin == reference.begin); 2159 assert(result.end == reference.end); 2160 2161 // Second rule test 2162 reference = First.Rule2("def"); 2163 result = Second.Rule2("def"); 2164 assert(reference.successful); 2165 assert(result.matches == reference.matches); 2166 assert(result.begin == reference.begin); 2167 assert(result.end == reference.end); 2168 2169 // External (predefined) rule call: 2170 result = Second.Rule3("foo,bar,baz"); 2171 assert(result.successful); 2172 assert(result.begin == 0); 2173 assert(result.end == "foo,bar,baz".length); 2174 assert(result.matches == ["foo", "bar", "baz"]); 2175 } 2176 2177 unittest // Parameterized rules 2178 { 2179 mixin(grammar(` 2180 Parameterized: 2181 # Different arities 2182 Rule1(A) <- A+ 2183 Rule1(A,B) <- (A B)+ 2184 Rule1(A,B,C) <- (A B C)+ 2185 2186 # Inner call 2187 Call1 <- Rule1('a') 2188 Call2 <- Rule1('a','b') 2189 Call3 <- Rule1('a','b','c') 2190 Call4(A) <- Rule1(A, A, A) 2191 Call5(A) <- Rule1('a', A, 'c') 2192 2193 # Default values 2194 Rule2(A = 'a', B = 'b') <- (A B)+ 2195 2196 # Re-using the parameters 2197 Rule3(A,B) <- A B B A # Money money money! 2198 2199 # The standard parameterized rule 2200 List(Elem, Sep) < Elem (:Sep Elem)* 2201 2202 # Another common PEG pattern 2203 AllUntil(End) <~ (!End .)* :End 2204 `)); 2205 2206 alias Parameterized.Rule1!(literal!"a") R1; 2207 alias oneOrMore!(literal!"a") Ref1; 2208 2209 ParseTree reference = Ref1("aaaa"); 2210 ParseTree result = R1("aaaa"); 2211 2212 assert(result.name == `Parameterized.Rule1!(literal!("a"))`); 2213 assert(reference.successful); 2214 assert(result.successful); 2215 assert(result.matches == reference.matches); 2216 assert(result.begin == reference.begin); 2217 assert(result.end == reference.end); 2218 2219 result = Parameterized.Call1("aaaa"); 2220 2221 assert(result.name == `Parameterized.Call1`); 2222 assert(result.successful); 2223 assert(result.matches == reference.matches); 2224 assert(result.begin == reference.begin); 2225 assert(result.end == reference.end); 2226 2227 alias Parameterized.Rule1!(literal!"abc") R1long; 2228 alias oneOrMore!(literal!"abc") Ref1long; 2229 2230 reference = Ref1long("abcabcabcabc"); 2231 result = R1long("abcabcabcabc"); 2232 2233 assert(result.name == `Parameterized.Rule1!(literal!("abc"))`); 2234 assert(reference.successful); 2235 assert(result.successful); 2236 assert(result.matches == reference.matches); 2237 assert(result.begin == reference.begin); 2238 assert(result.end == reference.end); 2239 2240 alias Parameterized.Rule1!(literal!"a", literal!"b") R2; 2241 alias oneOrMore!(and!(literal!"a", literal!"b")) Ref2; 2242 2243 reference = Ref2("abababab"); 2244 result = R2("abababab"); 2245 2246 assert(result.name == `Parameterized.Rule1!(literal!("a"), literal!("b"))`); 2247 assert(reference.successful); 2248 assert(result.successful); 2249 assert(result.matches == reference.matches); 2250 assert(result.begin == reference.begin); 2251 assert(result.end == reference.end); 2252 2253 result = Parameterized.Call2("abababab"); 2254 2255 assert(result.name == `Parameterized.Call2`); 2256 assert(result.successful); 2257 assert(result.matches == reference.matches); 2258 assert(result.begin == reference.begin); 2259 assert(result.end == reference.end); 2260 2261 alias Parameterized.Rule1!(literal!"a", literal!"b", literal!"c") R3; 2262 alias oneOrMore!(and!(literal!"a", literal!"b", literal!"c")) Ref3; 2263 2264 reference = Ref3("abcabcabcabc"); 2265 result = R3("abcabcabcabc"); 2266 2267 assert(result.name == `Parameterized.Rule1!(literal!("a"), literal!("b"), literal!("c"))`); 2268 assert(reference.successful); 2269 assert(result.successful); 2270 assert(result.matches == reference.matches); 2271 assert(result.begin == reference.begin); 2272 assert(result.end == reference.end); 2273 2274 result = Parameterized.Call3("abcabcabcabc"); 2275 2276 assert(result.name == `Parameterized.Call3`); 2277 assert(result.successful); 2278 assert(result.matches == reference.matches); 2279 assert(result.begin == reference.begin); 2280 assert(result.end == reference.end); 2281 2282 result = Parameterized.Call4!(literal!"A")("AAAAAA"); 2283 2284 assert(result.name == `Parameterized.Call4!(literal!("A"))`); 2285 assert(result.successful); 2286 assert(result.begin == 0); 2287 assert(result.end == "AAAAAA".length); 2288 assert(result.matches == ["A","A","A","A","A","A"]); 2289 2290 result = Parameterized.Call5!(literal!"A")("aAcaAc"); 2291 2292 assert(result.name == `Parameterized.Call5!(literal!("A"))`); 2293 assert(result.successful); 2294 assert(result.begin == 0); 2295 assert(result.end == "aAcaAc".length); 2296 assert(result.matches == ["a","A","c","a","A","c"]); 2297 2298 // Default parameters 2299 alias Parameterized.Rule2!() R2_1; 2300 alias Parameterized.Rule2!(literal!"a") R2_2; 2301 alias Parameterized.Rule2!(literal!"a", literal!"b") R2_3; 2302 2303 assert(R2_1("ababab").successful); 2304 assert(R2_2("ababab").successful); 2305 assert(R2_3("ababab").successful); 2306 2307 // Re-using a parameter (A B B A) 2308 result = Parameterized.Rule3!(literal!"A", literal!"B")("ABBA"); 2309 2310 assert(result.name == `Parameterized.Rule3!(literal!("A"), literal!("B"))`); 2311 assert(result.successful); 2312 assert(result.begin == 0); 2313 assert(result.end == "ABBA".length); 2314 assert(result.matches == ["A", "B", "B", "A"]); 2315 2316 alias Parameterized.List!(identifier, literal!",") IdList; // Identifiers separated by ',' 2317 alias Parameterized.List!(IdList, literal!";") IdList2; // IdList's separated by ';' 2318 2319 result = IdList("foo, bar, baz"); 2320 2321 assert(result.name == `Parameterized.List!(identifier, literal!(","))`); 2322 assert(result.successful); 2323 assert(result.begin == 0); 2324 assert(result.end == "foo, bar, baz".length); 2325 assert(result.matches == ["foo", "bar", "baz"]); 2326 2327 result = Parameterized.decimateTree(IdList2("foo,bar,baz; abc, def, ghi")); 2328 2329 assert(result.name == `Parameterized.List!(Parameterized.List!(identifier, literal!(",")), literal!(";"))`); 2330 assert(result.successful); 2331 assert(result.begin == 0); 2332 assert(result.end == "foo,bar,baz; abc, def, ghi".length); 2333 assert(result.matches == ["foo", "bar", "baz", "abc", "def", "ghi"]); 2334 2335 assert(result.children.length == 2); 2336 2337 assert(result.children[0].name == `Parameterized.List!(identifier, literal!(","))`); 2338 assert(result.children[0].matches == ["foo", "bar", "baz"]); 2339 2340 assert(result.children[1].name == `Parameterized.List!(identifier, literal!(","))`); 2341 assert(result.children[1].matches == ["abc", "def", "ghi"]); 2342 2343 alias Parameterized.AllUntil!(or!(endOfLine)) Line; 2344 alias zeroOrMore!(Line) Lines; 2345 2346 string input = 2347 "This is an input text. 2348 Here is another line. 2349 2350 And the last one. 2351 "; 2352 2353 result = Lines(input); 2354 assert(result.successful); 2355 assert(result.children.length == 4); 2356 assert(result.children[0].matches == ["This is an input text."]); 2357 assert(result.children[1].matches == ["Here is another line."]); 2358 assert(result.children[2].matches is null); 2359 assert(result.children[3].matches == [" And the last one."]); 2360 2361 // Parameterized grammar test 2362 mixin(grammar(` 2363 Arithmetic(Atom) : 2364 Expr < Factor (('+'/'-') Factor)* 2365 Factor < Primary (('*'/'/') Primary)* 2366 Primary < '(' Expr ')' / '-' Expr / Atom 2367 `)); 2368 2369 alias Arithmetic!(identifier) Arith1; 2370 alias Arithmetic!(or!(identifier, digits)) Arith2; 2371 2372 assert(Arith1("x + y*z/foo").successful); 2373 assert(Arith2("x + y*z/foo").successful); 2374 2375 assert(!Arith1("1 + 2*3/456").successful); 2376 assert(Arith2("1 + 2*3/456").successful); 2377 assert(Arith2("1 + 2*3/z").successful); 2378 } 2379 2380 version(unittest) // Semantic actions 2381 { 2382 P doubler(P)(P p) 2383 { 2384 if (p.successful) 2385 p.matches ~= p.matches; 2386 return p; 2387 } 2388 } 2389 2390 unittest // Semantic actions, testing { foo } and { foo, bar, baz } 2391 { 2392 mixin(grammar(` 2393 Semantic: 2394 Rule1 <- 'a' {doubler} 2395 Rule2 <- 'b' {doubler, doubler} 2396 Rule3 <- 'b' {doubler} {doubler} # Same as Rule2 2397 Rule4 <- 'b' {doubler, doubler, doubler} 2398 Rule5 <- 'a' {doubler} 'b' 'c'{doubler} 2399 Rule6 <{doubler} 'a' # Rule Level actions 2400 Rule7 <{doubler} 'a' 'b' {doubler} # Rule Level actions 2401 `)); 2402 2403 ParseTree result = Semantic.decimateTree(Semantic.Rule1("a")); 2404 assert(result.successful); 2405 assert(result.matches == ["a", "a"]); 2406 2407 result = Semantic.decimateTree(Semantic.Rule1("b")); 2408 assert(!result.successful); 2409 assert(result.matches == [`"a"`]); 2410 2411 result = Semantic.decimateTree(Semantic.Rule2("b")); 2412 assert(result.successful); 2413 assert(result.matches == ["b", "b", "b", "b"]); 2414 2415 result = Semantic.decimateTree(Semantic.Rule3("b")); 2416 assert(result.successful); 2417 assert(result.matches == ["b", "b", "b", "b"]); 2418 2419 result = Semantic.decimateTree(Semantic.Rule4("b")); 2420 assert(result.successful); 2421 assert(result.matches == ["b", "b", "b", "b", "b", "b", "b", "b"]); 2422 2423 result = Semantic.decimateTree(Semantic.Rule5("abc")); 2424 assert(result.successful); 2425 assert(result.matches == ["a", "a", "b", "c", "c"]); 2426 2427 result = Semantic.decimateTree(Semantic.Rule6("abc")); 2428 assert(result.successful); 2429 assert(result.matches == ["a", "a"]); 2430 2431 result = Semantic.decimateTree(Semantic.Rule7("abc")); 2432 assert(result.successful); 2433 assert(result.matches == ["a", "b", "b", "a", "b", "b"]); 2434 2435 } 2436 2437 version(unittest) 2438 { 2439 P foo(P)(P p) { return p;} // for testing actions 2440 2441 void badGrammar(string s)() @safe 2442 { 2443 assert(!__traits(compiles, {mixin(grammar(s));}), "This should fail: " ~ s); 2444 } 2445 2446 void goodGrammar(string s)() @safe 2447 { 2448 assert(__traits(compiles, {mixin(grammar(s));}), "This should work: " ~ s); 2449 } 2450 } 2451 2452 2453 /+ Failed (commit 4cd177a), DMD crashed. Too many grammar istantiations, I guess. 2454 unittest // failure cases: unnamed grammar, no-rule grammar, syntax errors, etc. 2455 { 2456 // No grammar 2457 badGrammar!""; 2458 2459 // Name without colon nor rules 2460 badGrammar!"Name"; 2461 2462 // No rule 2463 badGrammar!"Name:"; 2464 badGrammar!"Name1 Name2"; 2465 2466 // Incomplete and badly formulated rules 2467 badGrammar!"Name: 2468 Rule1"; 2469 badGrammar!"Name: 2470 Rule1 Rule2"; 2471 badGrammar!"Name 2472 Rule1 Rule2"; 2473 badGrammar!"Name: 2474 Rule1 <-"; 2475 badGrammar!"Name: 2476 Rule1 <~"; 2477 badGrammar!"Name: 2478 Rule1 < "; 2479 badGrammar!"Name: 2480 Rule1 <%"; 2481 badGrammar!"Name: 2482 Rule1 <;"; 2483 badGrammar!"Name 2484 Rule1 <- <-"; 2485 2486 // Non-closing parenthesis, brackets and quotes 2487 badGrammar!"Name: 2488 Rule1 <- ('a'"; 2489 badGrammar!"Name: 2490 Rule1 <- 'a')"; 2491 badGrammar!"Name: 2492 Rule1 <- ('a'))"; 2493 badGrammar!"Name: 2494 Rule1 <- (('a')"; 2495 badGrammar!"Name: 2496 Rule1 <- 'a"; 2497 badGrammar!"Name: 2498 Rule1 <- a'"; 2499 badGrammar!`Name: 2500 Rule1 <- "a`; 2501 badGrammar!`Name: 2502 Rule1 <- a"`; 2503 badGrammar!`Name: 2504 Rule1 <- 'a"`; 2505 badGrammar!`Name: 2506 Rule1 <- "a'`; 2507 badGrammar!"Name: 2508 Rule1 <- [a"; 2509 badGrammar!"Name: 2510 Rule1 <- a]"; 2511 badGrammar!"Name: 2512 Rule1 <- [a]]"; 2513 // But <- [[a] is legal: matches '[' or 'a' 2514 goodGrammar!"Name: 2515 Rule1 <- [[a]"; 2516 2517 // Bad prefix/postfix 2518 badGrammar!"Name: 2519 Rule1 <- 'a'~"; 2520 badGrammar!"Name: 2521 Rule1 <- 'a'%"; 2522 badGrammar!"Name: 2523 Rule1 <- 'a'!"; 2524 badGrammar!"Name: 2525 Rule1 <- 'a'&"; 2526 badGrammar!"Name: 2527 Rule1 <- 'a';"; 2528 badGrammar!"Name: 2529 Rule1 <- *'a'"; 2530 badGrammar!"Name: 2531 Rule1 <- +'a'"; 2532 badGrammar!"Name: 2533 Rule1 <- ?'a'"; 2534 badGrammar!"Name: 2535 Rule1 <- 'a' {}"; 2536 // Foo is defined in a version(unittest) block 2537 badGrammar!"Name: 2538 Rule1 <- 'a' { foo"; 2539 badGrammar!"Name: 2540 Rule1 <- 'a' foo}"; 2541 badGrammar!"Name: 2542 Rule1 <- 'a' {foo}}"; // closing } 2543 badGrammar!"Name: 2544 Rule1 <- 'a' {{foo}"; // opening { 2545 badGrammar!"Name: 2546 Rule1 <- 'a' {foo,}"; // bad comma 2547 badGrammar!"Name: 2548 Rule1 <- 'a' {,foo}"; 2549 badGrammar!"Name: 2550 Rule1 <- {foo}"; // no rule before {}'s 2551 // DMD Bug :-( 2552 /+glue.c line 1150 dmd::virtual unsigned int Type::totym():Assertion `0' failed. 2553 badGrammar!"Name: 2554 Rule1 <- 'a' {bar}"; // bar not defined 2555 +/ 2556 2557 // choice ('/') syntax errors 2558 badGrammar!"Name: 2559 Rule1 <- 'a' /"; 2560 // But: <- / 'a' is legal (it's equivalent to: <- 'a') 2561 goodGrammar!"Name: 2562 Rule1 <- / 'a'"; 2563 badGrammar!"Name: 2564 Rule1 <- /"; 2565 badGrammar!"Name: 2566 Rule1 <- 'a' / / 'b'"; 2567 } 2568 +/ 2569 2570 unittest // Memoization testing 2571 { 2572 enum gram1 = ` 2573 Test1: 2574 Rule1 <- Rule2* 'b' # To force a long evaluation of aaaa... 2575 / Rule2* 'c' # before finding a 'b' or a 'c' 2576 Rule2 <- 'a' 2577 `; 2578 2579 enum gram2 = ` 2580 Test2: 2581 Rule1 <- Rule2* 'b' # To force a long evaluation of aaaa... 2582 / Rule2* 'c' # before finding a 'b' or a 'c' 2583 Rule2 <- 'a' 2584 `; 2585 2586 mixin(grammar!(Memoization.yes)(gram1)); 2587 mixin(grammar!(Memoization.no)(gram2)); 2588 2589 assert(is(typeof(Test1.memo))); 2590 assert(!is(typeof(Test2.memo))); 2591 2592 ParseTree result1 = Test1("aaaaaaac"); // Memo + Runtime 2593 enum ParseTree result2 = Test1("aaaaaaac"); // Memo + Compile-time. Note: there is no actual memoization at CT. 2594 ParseTree result3 = Test2("aaaaaaac"); // No memo + Runtime 2595 enum ParseTree result4 = Test2("aaaaaaac"); // No memo + Compile-time 2596 2597 assert(result1 == result2); 2598 assert(result3 == result4); 2599 2600 //Directly comparing result1 and result3 is not possible, for the grammar names are different 2601 assert(pegged.peg.softCompare(result1, result2)); 2602 assert(pegged.peg.softCompare(result1, result3)); 2603 assert(pegged.peg.softCompare(result1, result4)); 2604 } 2605 2606 unittest // Memoization reset in composed grammars. Issue #162 2607 { 2608 enum MathGrammar = ` 2609 Math: 2610 List <- Var (:',' Var)* 2611 Var <~ [a-zA-Z] 2612 `; 2613 enum LetGrammar = ` 2614 Let: 2615 Stmt <- Math.List ' be' 2616 `; 2617 2618 mixin(grammar(MathGrammar)); 2619 mixin(grammar!(Memoization.no)(LetGrammar)); 2620 2621 assert(Let("a,b be").successful); 2622 assert(Let("K,H,G be").successful); // This fails if the memoization table is 2623 // not cleared in all composed grammars. 2624 } 2625 2626 unittest // Test lambda syntax in semantic actions 2627 { 2628 import std.array; 2629 import std.string : strip; 2630 2631 auto actions = [ 2632 2633 // Normal action 2634 `{ myAction }`, 2635 2636 // List of normal actions 2637 `{ myAction, myAction2 }`, 2638 2639 // Simple do-nothing lambda 2640 `{ (a) {return a;} }`, 2641 2642 // Simple do-nothing lambda with formatting 2643 `{ (a) { 2644 return a; 2645 }}`, 2646 2647 // Lambda with commas and spurious braces to try and confuse it 2648 `{ (a, b) { 2649 string s = "}"; 2650 if (a.successful,) { 2651 s ~= q"<}>"; 2652 } else { 2653 { s ~= q"<}>"; /* } */ } 2654 } 2655 return a;} }`, 2656 2657 // List of mixed actions and lambdas 2658 `{ myAction , (a) {return a;}, myAction2 , (a) { /* , } */ return a; } }`, 2659 2660 // Ambiguous lambda (not sure it would compile if used... but it should parse) 2661 `{ myAction, a => transform(a), myAction2 }`, 2662 2663 // Something more convoluted 2664 "{ myAction, (a) { 2665 /* block } comment with } braces */ 2666 string s = `} {` // wysiwyg string with braces and line comment with brace } 2667 if (s is null) { 2668 // } 2669 } else { // scopes 2670 { // nested scopes 2671 writeln(q{ \"}\" }); // token string with nested string with brace 2672 } 2673 } 2674 2675 string s = `1,2,3,4,5` // commas separate actions 2676 2677 /+ } Crazy nesting block comment 2678 /+ } +/ 2679 /+ } +/ 2680 /+ /+ } +/ } +/ 2681 } 2682 +/ 2683 2684 q\"< } <}> <> <<}<>}>> >\"; // delimited string 2685 q\"[ [}] [] [[[ } ]]] ]\"; // delimited string 2686 q\"( () }(}) (((}))}) )\"; // delimited string 2687 q\"{ {} {} {{{}}} }\"; // delimited string 2688 q{ class {} {} struct void \"}\" } /* another token string } */ 2689 2690 struct S 2691 { 2692 void foo() {} 2693 void bar() {} 2694 } 2695 2696 return a; 2697 }, myAction2 }", 2698 ]; 2699 2700 auto results = [ 2701 [`myAction`], 2702 [`myAction`,`myAction2`], 2703 [`(a) {return a;}`], 2704 [`(a) { 2705 return a; 2706 }`], 2707 [`(a, b) { 2708 string s = "}"; 2709 if (a.successful,) { 2710 s ~= q"<}>"; 2711 } else { 2712 { s ~= q"<}>"; /* } */ } 2713 } 2714 return a;}`], 2715 [`myAction`,`(a) {return a;}`,`myAction2`,`(a) { /* , } */ return a; }`], 2716 [`myAction`,`a => transform(a)`,`myAction2`], 2717 [`myAction`,"(a) { 2718 /* block } comment with } braces */ 2719 string s = `} {` // wysiwyg string with braces and line comment with brace } 2720 if (s is null) { 2721 // } 2722 } else { // scopes 2723 { // nested scopes 2724 writeln(q{ \"}\" }); // token string with nested string with brace 2725 } 2726 } 2727 2728 string s = `1,2,3,4,5` // commas separate actions 2729 2730 /+ } Crazy nesting block comment 2731 /+ } +/ 2732 /+ } +/ 2733 /+ /+ } +/ } +/ 2734 } 2735 +/ 2736 2737 q\"< } <}> <> <<}<>}>> >\"; // delimited string 2738 q\"[ [}] [] [[[ } ]]] ]\"; // delimited string 2739 q\"( () }(}) (((}))}) )\"; // delimited string 2740 q\"{ {} {} {{{}}} }\"; // delimited string 2741 q{ class {} {} struct void \"}\" } /* another token string } */ 2742 2743 struct S 2744 { 2745 void foo() {} 2746 void bar() {} 2747 } 2748 2749 return a; 2750 }",`myAction2`] 2751 ]; 2752 2753 foreach(idx, act; actions) 2754 { 2755 auto grammar = `P: Rule <- RuleA ` ~ act ~ ` RuleA <- 'A'`; 2756 auto p = Pegged(grammar); 2757 2758 assert(p.successful); 2759 2760 auto action = p.children[0].children[1] // Pegged.Definition 2761 .children[2] // Pegged.Expression 2762 .children[0] // Pegged.FirstExpression 2763 .children[0] // Pegged.Sequence 2764 .children[0] // Pegged.Prefix 2765 .children[0] // Pegged.Suffix 2766 .children[1]; // Pegged.Action 2767 2768 assert(action.matches.length == results[idx].length); 2769 foreach(i, s; action.matches) 2770 assert(strip(s) == results[idx][i], 2771 "\nGot |"~s~"|" ~ "\nNeeded: |"~results[idx][i]~"|"); 2772 } 2773 } 2774 2775 unittest 2776 { 2777 // Higher-level word boundary test. 2778 mixin(grammar(` 2779 TestGrammar: 2780 2781 Foo < '{' 'X' '}' 2782 Bar < 'A' 'B' 2783 2784 Spacing <: 2785 / blank+ 2786 / blank* wordBoundary 2787 / wordBoundary blank* 2788 / ![a-zA-Z] 2789 / !. 2790 2791 `)); 2792 2793 auto pt = TestGrammar.Foo("{ X }"); 2794 assert(pt.successful); 2795 2796 pt = TestGrammar.Foo("{X}"); 2797 assert(pt.successful); 2798 2799 pt = TestGrammar.Bar("A B"); 2800 assert(pt.successful); 2801 2802 pt = TestGrammar.Bar("AB"); 2803 assert(!pt.successful); 2804 } 2805 2806 unittest // Issue #129 unit test 2807 { 2808 enum gram = ` 2809 G: 2810 A <- B 2811 B <- C 2812 C <- 'c' D 2813 D <- 'd' 2814 `; 2815 2816 mixin(grammar(gram)); 2817 2818 string input = "cd"; 2819 2820 ParseTree p = G(input); 2821 assert(p.successful); 2822 assert(p.name == "G"); 2823 assert(p.children.length == 1); 2824 assert(p.children[0].name == "G.A"); 2825 assert(p.children[0].children.length == 1); 2826 assert(p.children[0].children[0].name == "G.B"); 2827 assert(p.children[0].children[0].children.length == 1); 2828 assert(p.children[0].children[0].children[0].name == "G.C"); 2829 assert(p.children[0].children[0].children[0].children.length == 1); 2830 assert(p.children[0].children[0].children[0].children[0].name == "G.D"); 2831 assert(p.children[0].children[0].children[0].children[0].children.length == 0); 2832 } 2833 2834 unittest // Direct left-recursion 2835 { 2836 enum LeftGrammar = ` 2837 Left: 2838 S <- E eoi 2839 E <- E '+n' / 'n' 2840 `; 2841 mixin(grammar(LeftGrammar)); 2842 ParseTree result = Left("n+n+n+n"); 2843 assert(result.successful); 2844 assert(result.matches == ["n", "+n", "+n", "+n"]); 2845 } 2846 2847 unittest // Indirect left-recursion 2848 { 2849 enum LeftGrammar = ` 2850 Left: 2851 S <- E eoi 2852 E <- F 'n' / 'n' 2853 F <- E '+' 2854 `; 2855 mixin(grammar(LeftGrammar)); 2856 ParseTree result = Left("n+n+n+n"); 2857 assert(result.successful); 2858 assert(result.matches == ["n", "+", "n", "+", "n", "+", "n"]); 2859 } 2860 2861 unittest // Proper blocking of memoization 2862 { 2863 // Three interlocking cycles of indirect left-recursion. 2864 enum LeftGrammar = ` 2865 Left: 2866 S <- E eoi 2867 E <- F 'n' / 'n' 2868 F <- E '+' I* / G '-' 2869 G <- H 'm' / E 2870 H <- G 'l' 2871 I <- '(' A+ ')' 2872 A <- 'a' 2873 `; 2874 mixin(grammar(LeftGrammar)); 2875 ParseTree result = Left("nlm-n+(aaa)n"); 2876 assert(result.successful); 2877 assert(result.matches == ["n", "l", "m", "-", "n", "+", "(", "a", "a", "a", ")", "n"]); 2878 } 2879 2880 // Example from http://www.inf.puc-rio.br/~roberto/docs/sblp2012.pdf 2881 unittest // Mutual left-recursion 2882 { 2883 /* A thing about stoppers: 2884 Because P is at the intersection of left-recursive cycles P -> P and L -> P -> L, it should 2885 suffice to make only P a stopper to stop unbounded left-recursion. And so it does. But, 2886 stoppers parse greedily: they always consume the maximum of input. So below, if only P is a stopper, 2887 at some point P parses the complete input. Then L fails because it cannot append ".x", then M fails. 2888 If both are made a stopper then M succeeds. That is because P will try L when P '(n)' no longer 2889 consumes input, which will appear as a left-recursion to L if it is a stopper and because of that 2890 it will have a chance to succeed on the full input which it recorded in its seed for the previous 2891 recursion. 2892 */ 2893 enum LeftGrammar = ` 2894 Left: 2895 M <- L eoi 2896 L <- P '.x' / 'x' 2897 P <- P '(n)' / L 2898 `; 2899 mixin(grammar(LeftGrammar)); 2900 ParseTree result = Left("x(n)(n).x(n).x"); 2901 assert(result.successful); 2902 assert(result.matches == ["x", "(n)", "(n)", ".x", "(n)", ".x"]); 2903 } 2904 2905 unittest // Left- and right-recursion (is right-associative!) 2906 { 2907 enum LeftRightGrammar = ` 2908 LeftRight: 2909 M <- E eoi 2910 E <- E '+' E / 'n' 2911 `; 2912 mixin(grammar(LeftRightGrammar)); 2913 ParseTree result = LeftRight("n+n+n+n"); 2914 assert(result.successful); 2915 assert(result.matches == ["n", "+", "n", "+", "n", "+", "n"]); 2916 } 2917 2918 unittest // Hidden left-recursion 2919 { 2920 enum HiddenLeft = ` 2921 Test: 2922 M <- A eoi 2923 A <- B? C 2924 B <- 'b' 2925 C <- A 'a' / 'c' 2926 `; 2927 mixin(grammar(HiddenLeft)); 2928 assert(Test("caa").successful); 2929 assert(Test("bbca").successful); 2930 } 2931 2932 unittest // Null-matching left-recursion 2933 { 2934 enum NullMatch = ` 2935 Test: 2936 M <- S (";" S)* eoi 2937 S <- S '+' S / 'n' / eps 2938 `; 2939 mixin(grammar(NullMatch)); 2940 assert(Test("n+n;n;").matches == ["n", "+", "n", ";", "n", ";", ""]); 2941 }