1 /** 2 Parser generation module for Pegged. 3 The Pegged parser itself is in pegged.parser, generated from pegged.examples.peggedgrammar. 4 5 The documentation is in the /docs directory. 6 */ 7 module pegged.grammar; 8 9 import std.algorithm: startsWith; 10 import std.conv: to; 11 import std.functional: toDelegate; 12 import std.stdio; 13 14 public import pegged.peg; 15 //public import pegged.introspection; 16 import pegged.parser; 17 18 19 20 /** 21 Option enum to get internal memoization (parse results storing). 22 */ 23 enum Memoization { no, yes } 24 25 /** 26 This function takes a (future) module name, a (future) file name and a grammar as a string or a file. 27 It writes the corresponding parser inside a module with the given name. 28 */ 29 void asModule(Memoization withMemo = Memoization.yes)(string moduleName, string fileName, string grammarString, string optHeader = "") 30 { 31 import std.stdio; 32 auto f = File(fileName ~ ".d","w"); 33 34 f.write("/++\nThis module was automatically generated from the following grammar:\n\n"); 35 f.write(grammarString); 36 f.write("\n\n+/\n"); 37 38 f.write("module " ~ moduleName ~ ";\n\n"); 39 40 if (optHeader.length > 0) 41 f.write(optHeader ~ "\n\n"); 42 43 f.write("public import pegged.peg;\n"); 44 f.write("import std.algorithm: startsWith;\n"); 45 f.write("import std.functional: toDelegate;\n\n"); 46 f.write(grammar!(withMemo)(grammarString)); 47 } 48 49 /// ditto 50 void asModule(Memoization withMemo = Memoization.yes)(string moduleName, File file, string optHeader = "") 51 { 52 string grammarDefinition; 53 foreach(line; file.byLine) 54 { 55 grammarDefinition ~= line ~ '\n'; 56 } 57 asModule!(withMemo)(moduleName, grammarDefinition, optHeader); 58 } 59 60 // Helper to insert 'Spacing' before and after Primaries 61 ParseTree spaceArrow(ParseTree input) 62 { 63 ParseTree wrapInSpaces(ParseTree p) 64 { 65 ParseTree spacer = 66 ParseTree("Pegged.Prefix", true, null, null, 0,0, [ 67 ParseTree("Pegged.Suffix", true, null, null, 0, 0, [ 68 ParseTree("Pegged.Primary", true, null, null, 0, 0, [ 69 ParseTree("Pegged.RhsName", true, null, null, 0,0, [ 70 ParseTree("Pegged.Identifier", true, ["Spacing"]) 71 ]) 72 ]) 73 ]) 74 ]); 75 ParseTree result = ParseTree("Pegged.WrapAround", true, p.matches, p.input, p.begin, p.end, p.children); 76 result.children = spacer ~ result.children ~ spacer; 77 return result; 78 } 79 return modify!( p => p.name == "Pegged.Primary", 80 wrapInSpaces)(input); 81 } 82 83 84 /** 85 Generate a parser from a PEG definition. 86 The parser is a string containing D code, to be mixed in or written in a file. 87 88 ---- 89 enum string def = " 90 Gram: 91 A <- 'a' B* 92 B <- 'b' / 'c' 93 "; 94 95 mixin(grammar(def)); 96 97 ParseTree p = Gram("abcbccbcd"); 98 ---- 99 */ 100 string grammar(Memoization withMemo = Memoization.yes)(string definition) 101 { 102 ParseTree defAsParseTree = Pegged(definition); 103 104 if (!defAsParseTree.successful) 105 { 106 // To work around a strange bug with ParseTree printing at compile time 107 string result = "static assert(false, `" ~ defAsParseTree.toString("") ~ "`);"; 108 return result; 109 } 110 111 string generateCode(ParseTree p, string propagatedName = "") 112 { 113 string result; 114 115 switch (p.name) 116 { 117 case "Pegged": 118 result = generateCode(p.children[0]); 119 break; 120 case "Pegged.Grammar": 121 string grammarName = generateCode(p.children[0]); 122 string shortGrammarName = p.children[0].matches[0]; 123 //string invokedGrammarName = generateCode(transformName(p.children[0])); 124 string firstRuleName = generateCode(p.children[1].children[0]); 125 126 result = 127 "struct Generic" ~ shortGrammarName ~ "(TParseTree) 128 { 129 import pegged.dynamic.grammar; 130 struct " ~ grammarName ~ "\n { 131 enum name = \"" ~ shortGrammarName ~ "\"; 132 static ParseTree delegate(ParseTree)[string] before; 133 static ParseTree delegate(ParseTree)[string] after; 134 static ParseTree delegate(ParseTree)[string] rules; 135 136 static this()\n {\n"; 137 138 ParseTree[] definitions = p.children[1 .. $]; 139 bool userDefinedSpacing; 140 foreach(i,def; definitions) 141 { 142 if (def.children[0].children.length == 1) // Non-parameterized ruleName 143 result ~= " rules[\"" ~ def.matches[0] ~ "\"] = toDelegate(&" ~ def.matches[0] ~ ");\n"; 144 if (def.matches[0] == "Spacing") // user-defined spacing 145 { 146 userDefinedSpacing = true; 147 break; 148 } 149 } 150 if(!userDefinedSpacing) 151 result ~= " rules[\"Spacing\"] = toDelegate(&Spacing);\n"; 152 153 result ~= 154 " } 155 156 template hooked(alias r, string name) 157 { 158 static ParseTree hooked(ParseTree p) 159 { 160 ParseTree result; 161 162 if (name in before) 163 { 164 result = before[name](p); 165 if (result.successful) 166 return result; 167 } 168 169 result = r(p); 170 if (result.successful || name !in after) 171 return result; 172 173 result = after[name](p); 174 return result; 175 } 176 177 static ParseTree hooked(string input) 178 { 179 return hooked!(r, name)(ParseTree(\"\",false,[],input)); 180 } 181 } 182 183 static void addRuleBefore(string parentRule, string ruleSyntax) 184 { 185 // enum name is the current grammar name 186 DynamicGrammar dg = pegged.dynamic.grammar.grammar(name ~ \": \" ~ ruleSyntax, rules); 187 foreach(ruleName,rule; dg.rules) 188 if (ruleName != \"Spacing\") // Keep the local Spacing rule, do not overwrite it 189 rules[ruleName] = rule; 190 before[parentRule] = rules[dg.startingRule]; 191 } 192 193 static void addRuleAfter(string parentRule, string ruleSyntax) 194 { 195 // enum name is the current grammar named 196 DynamicGrammar dg = pegged.dynamic.grammar.grammar(name ~ \": \" ~ ruleSyntax, rules); 197 foreach(name,rule; dg.rules) 198 { 199 if (name != \"Spacing\") 200 rules[name] = rule; 201 } 202 after[parentRule] = rules[dg.startingRule]; 203 } 204 205 static bool isRule(string s) 206 { 207 return s.startsWith(\"" ~ shortGrammarName ~ ".\"); 208 } 209 "; 210 211 if (withMemo == Memoization.yes) 212 result ~= 213 " import std.typecons:Tuple, tuple; 214 static TParseTree[Tuple!(string, size_t)] memo;\n"; 215 216 /+ 217 ~ " switch(s)\n" 218 ~ " {\n"; 219 220 bool[string] ruleNames; // to avoid duplicates, when using parameterized rules 221 string parameterizedRulesSpecialCode; // because param rules need to be put in the 'default' part of the switch 222 223 string paramRuleHandler(string target) 224 { 225 return "if (s.length >= "~to!string(shortGrammarName.length + target.length + 3) 226 ~" && s[0.."~to!string(shortGrammarName.length + target.length + 3)~"] == \"" 227 ~shortGrammarName ~ "." ~ target~"!(\") return true;"; 228 } 229 230 foreach(i,def; definitions) 231 { 232 /+ 233 if (def.matches[0] !in ruleNames) 234 { 235 ruleNames[def.matches[0]] = true; 236 237 if (def.children[0].children.length > 1) // Parameterized rule 238 parameterizedRulesSpecialCode ~= " " ~ paramRuleHandler(def.matches[0])~ "\n"; 239 else 240 result ~= " case \"" ~ shortGrammarName ~ "." ~ def.matches[0] ~ "\":\n"; 241 } 242 +/ 243 if (def.matches[0] == "Spacing") // user-defined spacing 244 { 245 userDefinedSpacing = true; 246 break; 247 } 248 } 249 result ~= " return true;\n" 250 ~ " default:\n" 251 ~ parameterizedRulesSpecialCode 252 ~ " return false;\n }\n }\n"; 253 +/ 254 result ~= " mixin decimateTree;\n"; 255 256 // If the grammar provides a Spacing rule, then this will be used. 257 // else, the predefined 'spacing' rule is used. 258 result ~= userDefinedSpacing ? "" : " alias spacing Spacing;\n\n"; 259 260 // Creating the inner functions, each corresponding to a grammar rule 261 foreach(def; definitions) 262 result ~= generateCode(def, shortGrammarName); 263 264 // if the first rule is parameterized (a template), it's impossible to get an opCall 265 // because we don't know with which template arguments it should be called. 266 // So no opCall is generated in this case. 267 if (p.children[1].children[0].children.length == 1) 268 { 269 // General calling interface 270 result ~= " static TParseTree opCall(TParseTree p)\n" 271 ~ " {\n" 272 ~ " TParseTree result = decimateTree(" ~ firstRuleName ~ "(p));\n" 273 ~ " result.children = [result];\n" 274 ~ " result.name = \"" ~ shortGrammarName ~ "\";\n" 275 ~ " return result;\n" 276 ~ " }\n\n" 277 ~ " static TParseTree opCall(string input)\n" 278 ~ " {\n"; 279 280 if (withMemo == Memoization.no) 281 result ~= " return " ~ shortGrammarName ~ "(TParseTree(``, false, [], input, 0, 0));\n" 282 ~ "}\n"; 283 else 284 result ~= " if(__ctfe)\n" 285 ~ " {\n" 286 ~ " return " ~ shortGrammarName ~ "(TParseTree(``, false, [], input, 0, 0));\n" 287 ~ " }\n" 288 ~ " else\n" 289 ~ " {\n" 290 ~ " memo = null;\n" 291 ~ " return " ~ shortGrammarName ~ "(TParseTree(``, false, [], input, 0, 0));\n" 292 ~ " }\n" 293 ~ " }\n"; 294 295 result ~= " static string opCall(GetName g)\n" 296 ~ " {\n" 297 ~ " return \"" ~ shortGrammarName ~ "\";\n" 298 ~ " }\n\n"; 299 } 300 result ~= " }\n" // end of grammar struct definition 301 ~ "}\n\n" // end of template definition 302 ~ "alias Generic" ~ shortGrammarName ~ "!(ParseTree)." 303 ~ shortGrammarName ~ " " ~ shortGrammarName ~ ";\n\n"; 304 break; 305 case "Pegged.Definition": 306 // children[0]: name 307 // children[1]: arrow (arrow type as first child) 308 // children[2]: description 309 310 string code; 311 312 switch(p.children[1].children[0].name) 313 { 314 case "Pegged.LEFTARROW": 315 code ~= generateCode(p.children[2]); 316 break; 317 case "Pegged.FUSEARROW": 318 code ~= "pegged.peg.fuse!(" ~ generateCode(p.children[2]) ~ ")"; 319 break; 320 case "Pegged.DISCARDARROW": 321 code ~= "pegged.peg.discard!(" ~ generateCode(p.children[2]) ~ ")"; 322 break; 323 case "Pegged.KEEPARROW": 324 code ~= "pegged.peg.keep!("~ generateCode(p.children[2]) ~ ")"; 325 break; 326 case "Pegged.DROPARROW": 327 code ~= "pegged.peg.drop!("~ generateCode(p.children[2]) ~ ")"; 328 break; 329 case "Pegged.PROPAGATEARROW": 330 code ~= "pegged.peg.propagate!("~ generateCode(p.children[2]) ~ ")"; 331 break; 332 case "Pegged.SPACEARROW": 333 ParseTree modified = spaceArrow(p.children[2]); 334 code ~= generateCode(modified); 335 break; 336 case "Pegged.ACTIONARROW": 337 auto actionResult = generateCode(p.children[2]); 338 foreach(action; p.children[1].matches[1..$]) 339 actionResult = "pegged.peg.action!(" ~ actionResult ~ ", " ~ action ~ ")"; 340 code ~= actionResult; 341 break; 342 default: 343 break; 344 } 345 346 bool parameterizedRule = p.children[0].children.length > 1; 347 string completeName = generateCode(p.children[0]); 348 string shortName = p.matches[0]; 349 string innerName; 350 string hookedName = p.matches[0]; 351 352 if (parameterizedRule) 353 { 354 result = " template " ~ completeName ~ "\n" 355 ~ " {\n"; 356 innerName ~= "\"" ~ shortName ~ "!(\" ~ "; 357 hookedName ~= "_" ~ to!string(p.children[0].children[1].children.length); 358 foreach(i,param; p.children[0].children[1].children) 359 innerName ~= "pegged.peg.getName!("~ param.children[0].matches[0] 360 ~ (i<p.children[0].children[1].children.length-1 ? ")() ~ \", \" ~ " 361 : ")"); 362 innerName ~= " ~ \")\""; 363 } 364 else 365 { 366 innerName ~= "`" ~ completeName ~ "`"; 367 } 368 369 string ctfeCode = " pegged.peg.defined!(" ~ code ~ ", \"" ~ propagatedName ~ "." ~ innerName[1..$-1] ~ "\")"; 370 code = "hooked!(pegged.peg.defined!(" ~ code ~ ", \"" ~ propagatedName ~ "." ~ innerName[1..$-1] ~ "\"), \"" ~ hookedName ~ "\")"; 371 372 if (withMemo == Memoization.no) 373 result ~= " static TParseTree " ~ shortName ~ "(TParseTree p)\n" 374 ~ " {\n" 375 ~ " if(__ctfe)\n" 376 ~ " return " ~ ctfeCode ~ "(p);\n" 377 ~ " else\n" 378 ~ " return " ~ code ~ "(p);\n" 379 ~ " }\n" 380 ~ " static TParseTree " ~ shortName ~ "(string s)\n" 381 ~ " {\n" 382 ~ " if(__ctfe)\n" 383 ~ " return " ~ ctfeCode ~ "(TParseTree(\"\", false,[], s));\n" 384 ~ " else\n" 385 ~ " return " ~ code ~ "(TParseTree(\"\", false,[], s));\n" 386 ~ " }\n"; 387 else 388 result ~= " static TParseTree " ~ shortName ~ "(TParseTree p)\n" 389 ~ " {\n" 390 ~ " if(__ctfe)\n" 391 ~ " {\n" 392 ~ " return " ~ ctfeCode ~ "(p);\n" 393 ~ " }\n" 394 ~ " else\n" 395 ~ " {\n" 396 ~ " if(auto m = tuple("~innerName~",p.end) in memo)\n" 397 ~ " return *m;\n" 398 ~ " else\n" 399 ~ " {\n" 400 ~ " TParseTree result = " ~ code ~ "(p);\n" 401 ~ " memo[tuple("~innerName~",p.end)] = result;\n" 402 ~ " return result;\n" 403 ~ " }\n" 404 ~ " }\n" 405 ~ " }\n\n" 406 ~ " static TParseTree " ~ shortName ~ "(string s)\n" 407 ~ " {\n" 408 ~ " if(__ctfe)\n" 409 ~ " {\n" 410 ~ " return " ~ ctfeCode ~ "(TParseTree(\"\", false,[], s));\n" 411 ~ " }\n" 412 ~ " else\n" 413 ~ " {\n" 414 ~ " memo = null;\n" 415 ~ " return " ~ code ~ "(TParseTree(\"\", false,[], s));\n" 416 ~ " }\n" 417 ~ " }\n"; 418 419 result ~= " static string " ~ shortName ~ "(GetName g)\n" 420 ~ " {\n" 421 ~ " return \"" ~ propagatedName ~ "." ~ innerName[1..$-1] ~ "\";\n" 422 ~ " }\n\n"; 423 424 if (parameterizedRule) 425 result ~= " }\n"; 426 427 break; 428 case "Pegged.GrammarName": 429 result = generateCode(p.children[0]); 430 if (p.children.length == 2) 431 result ~= generateCode(p.children[1]); 432 break; 433 case "Pegged.LhsName": 434 result = generateCode(p.children[0]); 435 if (p.children.length == 2) 436 result ~= generateCode(p.children[1]); 437 break; 438 case "Pegged.ParamList": 439 result = "("; 440 foreach(i,child; p.children) 441 result ~= generateCode(child) ~ ", "; 442 result = result[0..$-2] ~ ")"; 443 break; 444 case "Pegged.Param": 445 result = "alias " ~ generateCode(p.children[0]); 446 break; 447 case "Pegged.SingleParam": 448 result = p.matches[0]; 449 break; 450 case "Pegged.DefaultParam": 451 result = p.matches[0] ~ " = " ~ generateCode(p.children[1]); 452 break; 453 case "Pegged.Expression": 454 if (p.children.length > 1) // OR expression 455 { 456 // Keyword list detection: "abstract"/"alias"/... 457 bool isLiteral(ParseTree p) 458 { 459 return ( p.name == "Pegged.Sequence" 460 && p.children.length == 1 461 && p.children[0].children.length == 1 462 && p.children[0].children[0].children.length == 1 463 && p.children[0].children[0].children[0].children.length == 1 464 && p.children[0].children[0].children[0].children[0].name == "Pegged.Literal"); 465 } 466 bool keywordList = true; 467 foreach(child;p.children) 468 if (!isLiteral(child)) 469 { 470 keywordList = false; 471 break; 472 } 473 474 if (keywordList) 475 { 476 result = "pegged.peg.keywords!("; 477 foreach(seq; p.children) 478 result ~= "\"" ~ (seq.matches.length == 3 ? seq.matches[1] : "") ~ "\", "; 479 result = result[0..$-2] ~ ")"; 480 } 481 else 482 { 483 result = "pegged.peg.or!("; 484 foreach(seq; p.children) 485 result ~= generateCode(seq) ~ ", "; 486 result = result[0..$-2] ~ ")"; 487 } 488 } 489 else // One child -> just a sequence, no need for a or!( , ) 490 { 491 result = generateCode(p.children[0]); 492 } 493 break; 494 case "Pegged.Sequence": 495 if (p.children.length > 1) // real sequence 496 { 497 result = "pegged.peg.and!("; 498 foreach(seq; p.children) 499 { 500 string elementCode = generateCode(seq); 501 // flattening inner sequences 502 if (elementCode.length > 6 && elementCode[0..5] == "pegged.peg.and!(") 503 elementCode = elementCode[5..$-1]; // cutting 'and!(' and ')' 504 result ~= elementCode ~ ", "; 505 } 506 result = result[0..$-2] ~ ")"; 507 } 508 else // One child -> just a Suffix, no need for a and!( , ) 509 { 510 result = generateCode(p.children[0]); 511 } 512 break; 513 case "Pegged.Prefix": 514 result = generateCode(p.children[$-1]); 515 foreach(child; p.children[0..$-1]) 516 result = generateCode(child) ~ result ~ ")"; 517 break; 518 case "Pegged.Suffix": 519 result = generateCode(p.children[0]); 520 foreach(child; p.children[1..$]) 521 { 522 switch (child.name) 523 { 524 case "Pegged.OPTION": 525 result = "pegged.peg.option!(" ~ result ~ ")"; 526 break; 527 case "Pegged.ZEROORMORE": 528 result = "pegged.peg.zeroOrMore!(" ~ result ~ ")"; 529 break; 530 case "Pegged.ONEORMORE": 531 result = "pegged.peg.oneOrMore!(" ~ result ~ ")"; 532 break; 533 case "Pegged.Action": 534 foreach(action; child.matches) 535 result = "pegged.peg.action!(" ~ result ~ ", " ~ action ~ ")"; 536 break; 537 default: 538 break; 539 } 540 } 541 break; 542 case "Pegged.Primary": 543 result = generateCode(p.children[0]); 544 break; 545 case "Pegged.RhsName": 546 result = ""; 547 foreach(i,child; p.children) 548 result ~= generateCode(child); 549 break; 550 case "Pegged.ArgList": 551 result = "!("; 552 foreach(child; p.children) 553 result ~= generateCode(child) ~ ", "; // Allow A <- List('A'*,',') 554 result = result[0..$-2] ~ ")"; 555 break; 556 case "Pegged.Identifier": 557 result = p.matches[0]; 558 break; 559 case "Pegged.NAMESEP": 560 result = "."; 561 break; 562 case "Pegged.Literal": 563 if(p.matches.length == 3) // standard case 564 result = "pegged.peg.literal!(\"" ~ p.matches[1] ~ "\")"; 565 else // only two children -> empty literal 566 result = "pegged.peg.literal!(``)"; 567 break; 568 case "Pegged.CharClass": 569 if (p.children.length > 1) 570 { 571 result = "pegged.peg.or!("; 572 foreach(seq; p.children) 573 result ~= generateCode(seq) ~ ", "; 574 result = result[0..$-2] ~ ")"; 575 } 576 else // One child -> just a sequence, no need for a or!( , ) 577 { 578 result = generateCode(p.children[0]); 579 } 580 break; 581 case "Pegged.CharRange": 582 /// Make the generation at the Char level: directly what is needed, be it `` or "" or whatever 583 if (p.children.length > 1) // a-b range 584 { 585 result = "pegged.peg.charRange!('" ~ generateCode(p.children[0]) 586 ~ "', '" 587 ~ generateCode(p.children[1]) 588 ~ "')"; 589 } 590 else // lone char 591 { 592 result = "pegged.peg.literal!("; 593 string ch = p.matches[0]; 594 switch (ch) 595 { 596 case "\\[": 597 case "\\]": 598 case "\\-": 599 result ~= "\"" ~ ch[1..$] ~ "\")"; 600 break; 601 case "\\\'": 602 result ~= "\"'\")"; 603 break; 604 case "\\`": 605 result ~= q{"`")}; 606 break; 607 case "\\": 608 case "\\\\": 609 result ~= "`\\`)"; 610 break; 611 case "\"": 612 case "\\\"": 613 result ~= "`\"`)"; 614 break; 615 case "\n": 616 case "\r": 617 case "\t": 618 result ~= "\"" ~ to!string(to!dchar(ch)) ~ "\")"; 619 break; 620 default: 621 result ~= "\"" ~ ch ~ "\")"; 622 } 623 } 624 break; 625 case "Pegged.Char": 626 string ch = p.matches[0]; 627 switch (ch) 628 { 629 case "\\[": 630 case "\\]": 631 case "\\-": 632 633 case "\\\'": 634 case "\\\"": 635 case "\\`": 636 case "\\\\": 637 result = ch[1..$]; 638 break; 639 case "\n": 640 case "\r": 641 case "\t": 642 result = to!string(to!dchar(ch)); 643 break; 644 default: 645 result = ch; 646 } 647 break; 648 case "Pegged.POS": 649 result = "pegged.peg.posLookahead!("; 650 break; 651 case "Pegged.NEG": 652 result = "pegged.peg.negLookahead!("; 653 break; 654 case "Pegged.FUSE": 655 result = "pegged.peg.fuse!("; 656 break; 657 case "Pegged.DISCARD": 658 result = "pegged.peg.discard!("; 659 break; 660 //case "Pegged.CUT": 661 // result = "discardChildren!("; 662 // break; 663 case "Pegged.KEEP": 664 result = "pegged.peg.keep!("; 665 break; 666 case "Pegged.DROP": 667 result = "pegged.peg.drop!("; 668 break; 669 case "Pegged.PROPAGATE": 670 result = "pegged.peg.propagate!("; 671 break; 672 case "Pegged.OPTION": 673 result = "pegged.peg.option!("; 674 break; 675 case "Pegged.ZEROORMORE": 676 result = "pegged.peg.zeroOrMore!("; 677 break; 678 case "Pegged.ONEORMORE": 679 result = "pegged.peg.oneOrMore!("; 680 break; 681 case "Pegged.Action": 682 result = generateCode(p.children[0]); 683 foreach(action; p.matches[1..$]) 684 result = "pegged.peg.action!(" ~ result ~ ", " ~ action ~ ")"; 685 break; 686 case "Pegged.ANY": 687 result = "pegged.peg.any"; 688 break; 689 case "Pegged.WrapAround": 690 result = "pegged.peg.wrapAround!(" ~ generateCode(p.children[0]) ~ ", " 691 ~ generateCode(p.children[1]) ~ ", " 692 ~ generateCode(p.children[2]) ~ ")"; 693 break; 694 default: 695 result = "Bad tree: " ~ p.toString(); 696 break; 697 } 698 return result; 699 } 700 701 702 703 return generateCode(defAsParseTree); 704 } 705 706 /** 707 Mixin to get what a failed rule expected as input. 708 Not used by Pegged yet. 709 */ 710 mixin template expected() 711 { 712 string expected(ParseTree p) 713 { 714 715 switch(p.name) 716 { 717 case "Pegged.Expression": 718 string expectation; 719 foreach(i, child; p.children) 720 expectation ~= "(" ~ expected(child) ~ ")" ~ (i < p.children.length -1 ? " or " : ""); 721 return expectation; 722 case "Pegged.Sequence": 723 string expectation; 724 foreach(i, expr; p.children) 725 expectation ~= "(" ~ expected(expr) ~ ")" ~ (i < p.children.length -1 ? " followed by " : ""); 726 return expectation; 727 case "Pegged.Prefix": 728 return expected(p.children[$-1]); 729 case "Pegged.Suffix": 730 string expectation; 731 string end; 732 foreach(prefix; p.children[1..$]) 733 switch(prefix.name) 734 { 735 case "Pegged.ZEROORMORE": 736 expectation ~= "zero or more times ("; 737 end ~= ")"; 738 break; 739 case "Pegged.ONEORMORE": 740 expectation ~= "one or more times ("; 741 end ~= ")"; 742 break; 743 case "Pegged.OPTION": 744 expectation ~= "optionally ("; 745 end ~= ")"; 746 break; 747 case "Pegged.Action": 748 break; 749 default: 750 break; 751 } 752 return expectation ~ expected(p.children[0]) ~ end; 753 case "Pegged.Primary": 754 return expected(p.children[0]); 755 //case "Pegged.RhsName": 756 // return "RhsName, not implemented."; 757 case "Pegged.Literal": 758 return "the literal `" ~ p.matches[0] ~ "`"; 759 case "Pegged.CharClass": 760 string expectation; 761 foreach(i, child; p.children) 762 expectation ~= expected(child) ~ (i < p.children.length -1 ? " or " : ""); 763 return expectation; 764 case "Pegged.CharRange": 765 if (p.children.length == 1) 766 return expected(p.children[0]); 767 else 768 return "any character between '" ~ p.matches[0] ~ "' and '" ~ p.matches[2] ~ "'"; 769 case "Pegged.Char": 770 return "the character '" ~ p.matches[0] ~ "'"; 771 case "Pegged.ANY": 772 return "any character"; 773 default: 774 return "unknow rule (" ~ p.matches[0] ~ ")"; 775 } 776 } 777 } 778 779 unittest // 'grammar' unit test: low-level functionalities 780 { 781 mixin(grammar(` 782 Test1: 783 Rule1 <- 'a' 784 Rule2 <- 'b' 785 `)); 786 787 assert(is(Test1 == struct), "A struct name Test1 is created."); 788 assert(is(typeof(Test1("a"))), "Test1 is callable with a string arg"); 789 assert(__traits(hasMember, Test1, "Rule1"), "Test1 has a member named Rule1."); 790 assert(__traits(hasMember, Test1, "Rule2"), "Test1 has a member named Rule2."); 791 assert(is(typeof(Test1.Rule1("a"))), "Test1.Rule1 is callable with a string arg"); 792 assert(is(typeof(Test1.Rule2("a"))), "Test1.Rule2 is callable with a string arg"); 793 794 assert(__traits(hasMember, Test1, "decimateTree"), "Test1 has a member named decimateTree."); 795 assert(__traits(hasMember, Test1, "name"), "Test1 has a member named name."); 796 assert(__traits(hasMember, Test1, "isRule"), "Test1 has a member named isRule."); 797 } 798 799 unittest // 'grammar' unit test: PEG syntax 800 { 801 // Here we do not test pegged.peg.*, just the grammar transformations 802 // From a PEG to a Pegged expression template. 803 804 mixin(grammar(` 805 Terminals: 806 Literal1 <- "abc" 807 Literal2 <- 'abc' 808 EmptyLiteral1 <- "" 809 EmptyLiteral2 <- '' 810 811 Any <- . 812 Eps <- eps 813 Letter <- [a-z] 814 Digit <- [0-9] 815 ABC <- [abc] 816 Alpha1 <- [a-zA-Z_] 817 Alpha2 <- [_a-zA-Z] 818 Chars1 <- [\0-\127] 819 Chars2 <- [\x00-\xFF] 820 Chars3 <- [\u0000-\u00FF] 821 Chars4 <- [\U00000000-\U000000FF] 822 `)); 823 824 ParseTree result = Terminals("abc"); 825 826 assert(result.name == "Terminals", "Grammar name test."); 827 assert(result.children[0].name == "Terminals.Literal1", "First rule name test."); 828 assert(result.begin == 0); 829 assert(result.end == 3); 830 assert(result.matches == ["abc"]); 831 832 ParseTree reference = Terminals.decimateTree(Terminals.Literal1("abc")); 833 834 assert(result.children[0] == reference, "Invoking a grammar is like invoking its first rule."); 835 836 assert(Terminals.Literal1("abc").successful, "Standard terminal test. Double quote syntax."); 837 assert(Terminals.Literal2("abc").successful, "Standard terminal test. Simple quote syntax."); 838 assert(Terminals.EmptyLiteral1("").successful , "Standard terminal test. Double quote syntax."); 839 assert(Terminals.EmptyLiteral2("").successful, "Standard terminal test. Simple quote syntax."); 840 841 foreach(char c; char.min .. char.max) 842 assert(Terminals.Any(""~c).successful, "Any terminal ('.') test."); 843 844 assert(Terminals.Eps("").successful, "Eps test."); 845 assert(Terminals.Eps("abc").successful, "Eps test."); 846 847 string lower = "abcdefghijklmnopqrstuvwxyz"; 848 string upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 849 string under = "_"; 850 string digits = "0123456789"; 851 string others = "?./,;:!*&()[]<>"; 852 853 foreach(dchar dc; lower) 854 assert(Terminals.Letter(to!string(dc)).successful); 855 foreach(dchar dc; upper) 856 assert(!Terminals.Letter(to!string(dc)).successful); 857 foreach(dchar dc; digits) 858 assert(!Terminals.Letter(to!string(dc)).successful); 859 foreach(dchar dc; others) 860 assert(!Terminals.Letter(to!string(dc)).successful); 861 862 foreach(dchar dc; lower) 863 assert(!Terminals.Digit(to!string(dc)).successful); 864 foreach(dchar dc; upper) 865 assert(!Terminals.Digit(to!string(dc)).successful); 866 foreach(dchar dc; digits) 867 assert(Terminals.Digit(to!string(dc)).successful); 868 foreach(dchar dc; others) 869 assert(!Terminals.Letter(to!string(dc)).successful); 870 871 foreach(dchar dc; lower ~ upper ~ under) 872 assert(Terminals.Alpha1(to!string(dc)).successful); 873 foreach(dchar dc; digits ~ others) 874 assert(!Terminals.Alpha1(to!string(dc)).successful); 875 876 foreach(dchar dc; lower ~ upper ~ under) 877 assert(Terminals.Alpha2(to!string(dc)).successful); 878 foreach(dchar dc; digits ~ others) 879 assert(!Terminals.Alpha2(to!string(dc)).successful); 880 881 foreach(size_t index, dchar dc; lower ~ upper ~ under) 882 assert( (index < 3 && Terminals.ABC(to!string(dc)).successful) 883 || (index >= 3 && !Terminals.ABC(to!string(dc)).successful)); 884 885 foreach(dchar dc; 0..256) 886 { 887 string s = to!string(dc); 888 if (dc <= '\127') 889 assert(Terminals.Chars1(s).successful); 890 else 891 assert(!Terminals.Chars1(s).successful); 892 893 assert(Terminals.Chars2(s).successful); 894 assert(Terminals.Chars3(s).successful); 895 assert(Terminals.Chars4(s).successful); 896 } 897 898 mixin(grammar(` 899 Structure: 900 Rule1 <- Rule2 / Rule3 / Rule4 # Or test 901 Rule2 <- Rule3 Rule4 # And test 902 Rule3 <- "abc" 903 Rule4 <- "def" 904 905 Rule5 <- (Rule2 / Rule4) Rule3 # parenthesis test 906 Rule6 <- Rule2 / Rule4 Rule3 907 Rule7 <- Rule2 / (Rule4 Rule3) 908 `)); 909 910 // Invoking Rule2 (and hence, Rule3 Rule4) 911 result = Structure("abcdef"); 912 913 assert(result.successful, "Calling Rule2."); 914 assert(result.name == "Structure", "Grammar name test."); 915 assert(result.children[0].name == "Structure.Rule1", "First rule name test"); 916 assert(result.children[0].children.length == 1); 917 assert(result.children[0].children[0].name == "Structure.Rule2"); 918 assert(result.children[0].children[0].children[0].name == "Structure.Rule3"); 919 assert(result.children[0].children[0].children[1].name == "Structure.Rule4"); 920 assert(result.matches == ["abc", "def"]); 921 assert(result.begin ==0); 922 assert(result.end == 6); 923 924 // Invoking Rule3 925 result = Structure("abc"); 926 927 assert(result.successful, "Calling Rule3."); 928 assert(result.name == "Structure", "Grammar name test."); 929 assert(result.children[0].name == "Structure.Rule1", "First rule name test"); 930 assert(result.children[0].children.length == 1); 931 assert(result.children[0].children[0].name == "Structure.Rule3"); 932 assert(result.children[0].children[0].children.length == 0); 933 assert(result.matches == ["abc"]); 934 assert(result.begin ==0); 935 assert(result.end == 3); 936 937 // Invoking Rule4 938 result = Structure("def"); 939 940 assert(result.successful, "Calling Rule2."); 941 assert(result.name == "Structure", "Grammar name test."); 942 assert(result.children[0].name == "Structure.Rule1", "First rule name test"); 943 assert(result.children[0].children.length == 1); 944 assert(result.children[0].children[0].name == "Structure.Rule4"); 945 assert(result.children[0].children[0].children.length == 0); 946 assert(result.matches == ["def"]); 947 assert(result.begin ==0); 948 assert(result.end == 3); 949 950 // Failure 951 result =Structure("ab_def"); 952 assert(!result.successful); 953 assert(result.name == "Structure", "Grammar name test."); 954 assert(result.begin == 0); 955 assert(result.end == 0); 956 957 // Parenthesis test 958 // Rule5 <- (Rule2 / Rule4) Rule3 959 result = Structure.decimateTree(Structure.Rule5("abcdefabc")); 960 assert(result.successful); 961 assert(result.children.length == 2, "Two children: (Rule2 / Rule4), followed by Rule3."); 962 assert(result.children[0].name == "Structure.Rule2"); 963 assert(result.children[1].name == "Structure.Rule3"); 964 965 result = Structure.decimateTree(Structure.Rule5("defabc")); 966 assert(result.successful); 967 assert(result.children.length == 2, "Two children: (Rule2 / Rule4), followed by Rule3."); 968 assert(result.children[0].name == "Structure.Rule4"); 969 assert(result.children[1].name == "Structure.Rule3"); 970 971 // Rule6 <- Rule2 / Rule4 Rule3 972 result = Structure.decimateTree(Structure.Rule6("abcdef")); 973 assert(result.successful); 974 assert(result.children.length == 1, "One child: Rule2."); 975 assert(result.children[0].name == "Structure.Rule2"); 976 977 result = Structure.decimateTree(Structure.Rule6("defabc")); 978 assert(result.successful); 979 assert(result.children.length == 2, "Two children: Rule4, followed by Rule3."); 980 assert(result.children[0].name == "Structure.Rule4"); 981 assert(result.children[1].name == "Structure.Rule3"); 982 983 // Rule7 <- Rule2 / (Rule4 Rule3) 984 // That is, like Rule6 985 result = Structure.decimateTree(Structure.Rule7("abcdef")); 986 assert(result.successful); 987 assert(result.children.length == 1, "One child: Rule2."); 988 assert(result.children[0].name == "Structure.Rule2"); 989 990 result = Structure.decimateTree(Structure.Rule7("defabc")); 991 assert(result.successful); 992 assert(result.children.length == 2, "Two children: Rule4, followed by Rule3."); 993 assert(result.children[0].name == "Structure.Rule4"); 994 assert(result.children[1].name == "Structure.Rule3"); 995 996 // Prefixes and Suffixes 997 mixin(grammar(` 998 PrefixSuffix: 999 Rule1 <- &"abc" 1000 Rule2 <- !"abc" 1001 Rule3 <- "abc"? 1002 Rule4 <- "abc"* 1003 Rule5 <- "abc"+ 1004 `)); 1005 1006 // Verifying &"abc" creates a positive look-ahead construct 1007 result = PrefixSuffix.Rule1("abc"); 1008 reference = posLookahead!(literal!"abc")("abc"); 1009 1010 assert(result.matches == reference.matches); 1011 assert(result.begin == reference.begin); 1012 assert(result.end == reference.end); 1013 assert(result.children[0].children == reference.children); 1014 1015 result = PrefixSuffix.Rule1("def"); 1016 reference = posLookahead!(literal!"abc")("def"); 1017 1018 assert(result.matches == reference.matches); 1019 assert(result.begin == reference.begin); 1020 assert(result.end == reference.end); 1021 assert(result.children[0].children == reference.children); 1022 1023 1024 // Verifying !"abc" creates a negative look-ahead construct 1025 result = PrefixSuffix.Rule2("abc"); 1026 reference = negLookahead!(literal!"abc")("abc"); 1027 1028 assert(result.matches == reference.matches); 1029 assert(result.begin == reference.begin); 1030 assert(result.end == reference.end); 1031 assert(result.children[0].children == reference.children); 1032 1033 result = PrefixSuffix.Rule2("def"); 1034 reference = negLookahead!(literal!"abc")("def"); 1035 1036 assert(result.matches == reference.matches); 1037 assert(result.begin == reference.begin); 1038 assert(result.end == reference.end); 1039 assert(result.children[0].children == reference.children); 1040 1041 // Verifying "abc"? creates an optional construct 1042 result = PrefixSuffix.Rule3("abc"); 1043 reference = option!(literal!"abc")("abc"); 1044 1045 assert(result.matches == reference.matches); 1046 assert(result.begin == reference.begin); 1047 assert(result.end == reference.end); 1048 assert(result.children[0].children == reference.children); 1049 1050 result = PrefixSuffix.Rule3("def"); 1051 reference = option!(literal!"abc")("def"); 1052 1053 assert(result.matches == reference.matches); 1054 assert(result.begin == reference.begin); 1055 assert(result.end == reference.end); 1056 assert(result.children[0].children == reference.children); 1057 1058 // Verifying "abc"* creates a zero or more construct 1059 result = PrefixSuffix.Rule4(""); 1060 reference = zeroOrMore!(literal!"abc")(""); 1061 1062 assert(result.matches == reference.matches); 1063 assert(result.begin == reference.begin); 1064 assert(result.end == reference.end); 1065 assert(result.children[0].children == reference.children); 1066 1067 result = PrefixSuffix.Rule4("abc"); 1068 reference = zeroOrMore!(literal!"abc")("abc"); 1069 1070 assert(result.matches == reference.matches); 1071 assert(result.begin == reference.begin); 1072 assert(result.end == reference.end); 1073 assert(result.children[0].children == reference.children); 1074 1075 result = PrefixSuffix.Rule4("abcabc"); 1076 reference = zeroOrMore!(literal!"abc")("abcabc"); 1077 1078 assert(result.matches == reference.matches); 1079 assert(result.begin == reference.begin); 1080 assert(result.end == reference.end); 1081 assert(result.children[0].children == reference.children); 1082 1083 // Verifying "abc"+ creates a one or more construct 1084 result = PrefixSuffix.Rule5(""); 1085 reference = oneOrMore!(literal!"abc")(""); 1086 1087 assert(result.matches == reference.matches); 1088 assert(result.begin == reference.begin); 1089 assert(result.end == reference.end); 1090 assert(result.children[0].children == reference.children); 1091 1092 result = PrefixSuffix.Rule5("abc"); 1093 reference = oneOrMore!(literal!"abc")("abc"); 1094 1095 assert(result.matches == reference.matches); 1096 assert(result.begin == reference.begin); 1097 assert(result.end == reference.end); 1098 assert(result.children[0].children == reference.children); 1099 1100 result = PrefixSuffix.Rule5("abcabc"); 1101 reference = oneOrMore!(literal!"abc")("abcabc"); 1102 1103 assert(result.matches == reference.matches); 1104 assert(result.begin == reference.begin); 1105 assert(result.end == reference.end); 1106 assert(result.children[0].children == reference.children); 1107 } 1108 1109 unittest // Multilines rules 1110 { 1111 mixin(grammar(` 1112 Indentation: 1113 Rule1 < 1114 'a' 1115 'b' 1116 'c' 1117 Rule2 1118 <- 1119 'd' 1120 Rule3 1121 < 1122 'e' 1123 Rule4 <- 'f' Rule5 # Rule4 ends with 'f', then it's Rule5 1124 <- 'g' 1125 1126 1127 1128 1129 'h' 1130 `)); 1131 1132 1133 assert(Indentation("abc").successful); 1134 assert(Indentation.Rule2("d").successful); 1135 assert(Indentation.Rule3("e").successful); 1136 assert(Indentation.Rule4("f").successful); 1137 assert(Indentation.Rule5("gh").successful); 1138 } 1139 1140 unittest // Parsing at compile-time 1141 { 1142 mixin(grammar(` 1143 Test: 1144 Rule1 <- 'a' Rule2('b') 1145 Rule2(B) <- B 1146 `)); 1147 1148 // Equality on success 1149 ParseTree result = Test("ab"); 1150 1151 enum CTsuccess = Test("ab"); 1152 1153 assert(CTsuccess == result, "Compile-time parsing is equal to runtime parsing on success."); 1154 1155 // Equality on failure 1156 result = Test("ac"); 1157 enum CTfailure = Test("ac"); 1158 1159 assert(CTfailure == result, "Compile-time parsing is equal to runtime parsing on failure."); 1160 } 1161 1162 unittest // PEG extensions (arrows, prefixes, suffixes) 1163 { 1164 mixin(grammar(` 1165 Arrows: 1166 Rule1 <- ABC DEF # Standard arrow 1167 Rule2 < ABC DEF # Space arrow 1168 Rule3 < ABC DEF* # Space arrow 1169 Rule4 < ABC+ DEF # Space arrow 1170 1171 Rule5 <- ABC* 1172 Rule6 <~ ABC* # Fuse arrow 1173 1174 Rule7 <: ABC DEF # Discard arrow 1175 Rule8 <^ ABC DEF # Keep arrow 1176 Rule9 <; ABC DEF # Drop arrow 1177 Rule10 <% ABC Rule1 DEF # Propagate arrow 1178 1179 ABC <- "abc" 1180 DEF <- "def" 1181 `)); 1182 1183 // Comparing <- ABC DEF and < ABC DEF 1184 ParseTree result = Arrows.decimateTree(Arrows.Rule1("abcdef")); 1185 assert(result.successful); 1186 assert(result.begin == 0); 1187 assert(result.end ==6); 1188 assert(result.matches == ["abc", "def"]); 1189 assert(result.children.length == 2); 1190 assert(result.children[0].name == "Arrows.ABC"); 1191 assert(result.children[1].name == "Arrows.DEF"); 1192 1193 result = Arrows.decimateTree(Arrows.Rule1("abc def")); 1194 assert(!result.successful); 1195 1196 result = Arrows.decimateTree(Arrows.Rule2("abcdef")); 1197 assert(result.successful); 1198 assert(result.begin == 0); 1199 assert(result.end == 6); 1200 assert(result.matches == ["abc", "def"]); 1201 assert(result.children.length == 2); 1202 assert(result.children[0].name == "Arrows.ABC"); 1203 assert(result.children[1].name == "Arrows.DEF"); 1204 1205 result = Arrows.decimateTree(Arrows.Rule2("abc def ")); 1206 assert(result.successful, "space arrows consume spaces."); 1207 assert(result.begin == 0); 1208 assert(result.end == "abc def ".length, "The entire input is parsed."); 1209 assert(result.matches == ["abc", "def"]); 1210 assert(result.children.length == 2); 1211 assert(result.children[0].name == "Arrows.ABC"); 1212 assert(result.children[1].name == "Arrows.DEF"); 1213 1214 result = Arrows.decimateTree(Arrows.Rule3("abcdefdef")); 1215 assert(result.successful); 1216 assert(result.begin == 0); 1217 assert(result.end == "abcdefdef".length); 1218 assert(result.matches == ["abc", "def", "def"]); 1219 assert(result.children.length == 3); 1220 assert(result.children[0].name == "Arrows.ABC"); 1221 assert(result.children[1].name == "Arrows.DEF"); 1222 assert(result.children[2].name == "Arrows.DEF"); 1223 1224 result = Arrows.decimateTree(Arrows.Rule3("abc def defdef")); 1225 assert(result.successful, "space arrows consume spaces."); 1226 assert(result.begin == 0); 1227 assert(result.end == "abc def defdef".length, "The entire input is parsed."); 1228 assert(result.matches == ["abc", "def", "def", "def"]); 1229 assert(result.children.length == 4); 1230 assert(result.children[0].name == "Arrows.ABC"); 1231 assert(result.children[1].name == "Arrows.DEF"); 1232 assert(result.children[2].name == "Arrows.DEF"); 1233 assert(result.children[3].name == "Arrows.DEF"); 1234 1235 result = Arrows.decimateTree(Arrows.Rule4("abcabcdef")); 1236 assert(result.successful); 1237 assert(result.begin == 0); 1238 assert(result.end == "abcabcdef".length); 1239 assert(result.matches == ["abc", "abc", "def"]); 1240 assert(result.children.length == 3); 1241 assert(result.children[0].name == "Arrows.ABC"); 1242 assert(result.children[1].name == "Arrows.ABC"); 1243 assert(result.children[2].name == "Arrows.DEF"); 1244 1245 result = Arrows.decimateTree(Arrows.Rule4(" abc abcabc def ")); 1246 assert(result.successful, "space arrows consume spaces."); 1247 assert(result.begin == 0); 1248 assert(result.end == " abc abcabc def ".length, "The entire input is parsed."); 1249 assert(result.matches == ["abc", "abc", "abc", "def"]); 1250 assert(result.children.length == 4); 1251 assert(result.children[0].name == "Arrows.ABC"); 1252 assert(result.children[1].name == "Arrows.ABC"); 1253 assert(result.children[2].name == "Arrows.ABC"); 1254 assert(result.children[3].name == "Arrows.DEF"); 1255 1256 //Comparing <- ABC* and <~ ABC* 1257 result = Arrows.decimateTree(Arrows.Rule5("abcabcabc")); 1258 assert(result.successful); 1259 assert(result.begin == 0); 1260 assert(result.end == "abcabcabc".length, "The entire input is parsed."); 1261 assert(result.matches == ["abc", "abc", "abc"]); 1262 assert(result.children.length == 3, "With the * operator, all children are kept."); 1263 assert(result.children[0].name == "Arrows.ABC"); 1264 assert(result.children[1].name == "Arrows.ABC"); 1265 assert(result.children[2].name == "Arrows.ABC"); 1266 1267 result = Arrows.decimateTree(Arrows.Rule6("abcabcabc")); 1268 assert(result.successful); 1269 assert(result.begin == 0); 1270 assert(result.end == "abcabcabc".length, "The entire input is parsed."); 1271 assert(result.matches == ["abcabcabc"], "Matches are fused."); 1272 assert(result.children.length == 0, "The <~ arrow cuts children."); 1273 1274 // Comparing <- ABC DEF and <: ABC DEF 1275 result = Arrows.decimateTree(Arrows.Rule7("abcdef")); 1276 assert(result.successful); 1277 assert(result.begin == 0); 1278 assert(result.end == "abcdef".length, "The entire input is parsed."); 1279 assert(result.matches is null, "No match with the discard arrow."); 1280 assert(result.children.length == 0, "No children with the discard arrow."); 1281 1282 // Comparing <- ABC DEF and <^ ABC DEF 1283 //But <^ is not very useful anyways. It does not distribute ^ among the subrules. 1284 result = Arrows.decimateTree(Arrows.Rule8("abcdef")); 1285 assert(result.successful); 1286 assert(result.begin == 0); 1287 assert(result.end == "abcdef".length, "The entire input is parsed."); 1288 assert(result.matches == ["abc", "def"]); 1289 assert(result.children[0].children.length == 2); 1290 1291 // Comparing <- ABC DEF and <; ABC DEF 1292 result = Arrows.decimateTree(Arrows.Rule9("abcdef")); 1293 assert(result.successful); 1294 assert(result.begin == 0); 1295 assert(result.end == "abcdef".length, "The entire input is parsed."); 1296 assert(result.matches == ["abc", "def"], "The drop arrow keeps the matches."); 1297 assert(result.children.length == 0, "The drop arrow drops the children."); 1298 1299 // Comparing <- ABC DEF and <% ABC Rule1 DEF 1300 //But <% is not very useful anyways. It does not distribute % among the subrules. 1301 result = Arrows.decimateTree(Arrows.Rule10("abcabcdefdef")); 1302 assert(result.successful); 1303 assert(result.begin == 0); 1304 assert(result.end == "abcabcdefdef".length, "The entire input is parsed."); 1305 assert(result.matches == ["abc", "abc", "def", "def"]); 1306 assert(result.children.length == 3); 1307 assert(result.children[0].name == "Arrows.ABC"); 1308 assert(result.children[1].name == "Arrows.Rule1", "No rule replacement by its own children. See % for that."); 1309 assert(result.children[2].name == "Arrows.DEF"); 1310 } 1311 1312 unittest //More space arrow tests 1313 { 1314 mixin(grammar(` 1315 Spaces: 1316 Rule1 < A (B C)+ 1317 A <- 'a' 1318 B <- 'b' 1319 C <- 'c' 1320 `)); 1321 1322 ParseTree result = Spaces.decimateTree(Spaces.Rule1("abcbc")); 1323 1324 assert(result.successful); 1325 assert(result.begin == 0); 1326 assert(result.end == "abcbc".length); 1327 assert(result.children.length == 5); 1328 assert(result.children[0].name == "Spaces.A"); 1329 assert(result.children[1].name == "Spaces.B"); 1330 assert(result.children[2].name == "Spaces.C"); 1331 assert(result.children[3].name == "Spaces.B"); 1332 assert(result.children[4].name == "Spaces.C"); 1333 1334 result = Spaces.decimateTree(Spaces.Rule1(" a bc b c ")); 1335 1336 assert(result.successful); 1337 assert(result.begin == 0); 1338 assert(result.end == " a bc b c ".length); 1339 assert(result.children.length == 5); 1340 assert(result.children[0].name == "Spaces.A"); 1341 assert(result.children[1].name == "Spaces.B"); 1342 assert(result.children[2].name == "Spaces.C"); 1343 assert(result.children[3].name == "Spaces.B"); 1344 assert(result.children[4].name == "Spaces.C"); 1345 } 1346 1347 unittest // Prefix and suffix tests 1348 { 1349 mixin(grammar(` 1350 PrefixSuffix: 1351 # Reference 1352 Rule1 <- ABC DEF 1353 Rule2 <- "abc" "def" 1354 Rule3 <- ABC* 1355 Rule4 <- "abc"* 1356 1357 # Discard operator 1358 Rule5 <- :ABC DEF 1359 Rule6 <- ABC :DEF 1360 Rule7 <- :"abc" "def" 1361 Rule8 <- "abc" :"def" 1362 1363 # Drop operator 1364 Rule9 <- ;ABC DEF 1365 Rule10 <- ABC ;DEF 1366 Rule11 <- ;"abc" "def" 1367 Rule12 <- "abc" ;"def" 1368 1369 # Fuse operator 1370 1371 Rule13 <- ~( ABC* ) 1372 Rule14 <- ~("abc"*) 1373 1374 # Keep operator 1375 Rule15 <- ^"abc" ^"def" 1376 1377 # Propagate operator 1378 Rule16 <- ABC Rule1 DEF 1379 Rule17 <- ABC %Rule1 DEF 1380 1381 1382 ABC <- "abc" 1383 DEF <- "def" 1384 `)); 1385 1386 1387 // Comparing standard and discarded rules 1388 auto result = PrefixSuffix.decimateTree(PrefixSuffix.Rule1("abcdef")); 1389 1390 assert(result.successful); 1391 assert(result.begin == 0); 1392 assert(result.end == 6); 1393 assert(result.matches == ["abc", "def"]); 1394 assert(result.children.length == 2); 1395 assert(result.children[0].name == "PrefixSuffix.ABC"); 1396 assert(result.children[1].name == "PrefixSuffix.DEF"); 1397 1398 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule5("abcdef")); 1399 1400 assert(result.successful); 1401 assert(result.begin == 0); 1402 assert(result.end == 6); 1403 assert(result.matches == ["def"]); 1404 assert(result.children.length == 1, "The first child is discarded."); 1405 assert(result.children[0].name == "PrefixSuffix.DEF"); 1406 1407 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule6("abcdef")); 1408 1409 assert(result.successful); 1410 assert(result.begin == 0); 1411 assert(result.end == 6); 1412 assert(result.matches == ["abc"]); 1413 assert(result.children.length == 1, "The second child is discarded."); 1414 assert(result.children[0].name == "PrefixSuffix.ABC"); 1415 1416 1417 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule2("abcdef")); 1418 1419 assert(result.successful); 1420 assert(result.begin == 0); 1421 assert(result.end == 6); 1422 assert(result.matches == ["abc", "def"]); 1423 assert(result.children.length == 0, "Literals do not create children."); 1424 1425 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule7("abcdef")); 1426 1427 assert(result.successful); 1428 assert(result.begin == 0); 1429 assert(result.end == 6); 1430 assert(result.matches == ["def"]); 1431 assert(result.children.length == 0); 1432 1433 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule8("abcdef")); 1434 1435 assert(result.successful); 1436 assert(result.begin == 0); 1437 assert(result.end == 6); 1438 assert(result.matches == ["abc"]); 1439 assert(result.children.length == 0); 1440 1441 // Comparing standard and dropped rules 1442 1443 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule9("abcdef")); 1444 1445 assert(result.successful); 1446 assert(result.begin == 0); 1447 assert(result.end == 6); 1448 assert(result.matches == ["abc", "def"], "All matches are there."); 1449 assert(result.children.length == 1, "The first child is discarded."); 1450 assert(result.children[0].name == "PrefixSuffix.DEF"); 1451 1452 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule10("abcdef")); 1453 1454 assert(result.successful); 1455 assert(result.begin == 0); 1456 assert(result.end == 6); 1457 assert(result.matches == ["abc", "def"], "All matches are there."); 1458 assert(result.children.length == 1, "The second child is discarded."); 1459 assert(result.children[0].name == "PrefixSuffix.ABC"); 1460 1461 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule11("abcdef")); 1462 1463 assert(result.successful); 1464 assert(result.begin == 0); 1465 assert(result.end == 6); 1466 assert(result.matches == ["abc", "def"], "All matches are there."); 1467 assert(result.children.length == 0); 1468 1469 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule12("abcdef")); 1470 1471 assert(result.successful); 1472 assert(result.begin == 0); 1473 assert(result.end == 6); 1474 assert(result.matches == ["abc", "def"], "All matches are there."); 1475 assert(result.children.length == 0); 1476 1477 1478 // Comparing standard and fused rules 1479 1480 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule3("abcabcabc")); 1481 1482 assert(result.successful); 1483 assert(result.begin == 0); 1484 assert(result.end == "abcabcabc".length); 1485 assert(result.matches == ["abc", "abc", "abc"]); 1486 assert(result.children.length == 3, "Standard '*': 3 children."); 1487 assert(result.children[0].name == "PrefixSuffix.ABC"); 1488 assert(result.children[1].name == "PrefixSuffix.ABC"); 1489 assert(result.children[2].name == "PrefixSuffix.ABC"); 1490 1491 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule4("abcabcabc")); 1492 1493 assert(result.successful); 1494 assert(result.begin == 0); 1495 assert(result.end == "abcabcabc".length); 1496 assert(result.matches == ["abc", "abc", "abc"]); 1497 assert(result.children.length == 0, "All literals are discarded by the tree decimation."); 1498 1499 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule13("abcabcabc")); 1500 1501 assert(result.successful); 1502 assert(result.begin == 0); 1503 assert(result.end == "abcabcabc".length); 1504 assert(result.matches == ["abcabcabc"], "All matches are fused."); 1505 assert(result.children.length == 0, "Children are discarded by '~'."); 1506 1507 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule14("abcabcabc")); 1508 1509 assert(result.successful); 1510 assert(result.begin == 0); 1511 assert(result.end == "abcabcabc".length); 1512 assert(result.matches == ["abcabcabc"], "All matches are there."); 1513 assert(result.children.length == 0, "Children are discarded by '~'."); 1514 1515 // Testing the keep (^) operator 1516 1517 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule15("abcdef")); 1518 1519 assert(result.successful); 1520 assert(result.begin == 0); 1521 assert(result.end == "abcdef".length); 1522 assert(result.matches == ["abc", "def"], "All matches are there."); 1523 assert(result.children.length == 2, "Both children are kept by '^'."); 1524 assert(result.children[0].name == `literal!("abc")`, 1525 `literal!("abc") is kept even though it's not part of the grammar rules.`); 1526 assert(result.children[1].name == `literal!("def")`, 1527 `literal!("def") is kept even though it's not part of the grammar rules.`); 1528 1529 // Comparing standard and propagated (%) rules. 1530 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule16("abcabcdefdef")); 1531 1532 assert(result.successful); 1533 assert(result.begin == 0); 1534 assert(result.end == "abcabcdefdef".length); 1535 assert(result.matches == ["abc", "abc", "def", "def"], "All matches are there."); 1536 assert(result.children.length == 3, "Standard rule: three children."); 1537 assert(result.children[0].name == "PrefixSuffix.ABC"); 1538 assert(result.children[1].name == "PrefixSuffix.Rule1"); 1539 assert(result.children[1].children.length == 2, "Rule1 creates two children."); 1540 assert(result.children[1].children[0].name, "PrefixSuffix.ABC"); 1541 assert(result.children[1].children[1].name, "PrefixSuffix.DEF"); 1542 assert(result.children[2].name == "PrefixSuffix.DEF"); 1543 1544 result = PrefixSuffix.decimateTree(PrefixSuffix.Rule17("abcabcdefdef")); 1545 1546 // From (ABC, Rule1(ABC,DEF), DEF) to (ABC,ABC,DEF,DEF) 1547 assert(result.successful); 1548 assert(result.begin == 0); 1549 assert(result.end == "abcabcdefdef".length); 1550 assert(result.matches == ["abc", "abc", "def", "def"], "All matches are there."); 1551 assert(result.children.length == 4, "%-affected rule: four children."); 1552 assert(result.children[0].name == "PrefixSuffix.ABC"); 1553 assert(result.children[1].name == "PrefixSuffix.ABC"); 1554 assert(result.children[2].name == "PrefixSuffix.DEF"); 1555 assert(result.children[2].name == "PrefixSuffix.DEF"); 1556 1557 // Testing % and < together 1558 mixin(grammar(` 1559 PropTest: 1560 Rule1 < B C+ 1561 Rule2 <- B (%C)+ 1562 Rule3 < B (%C)+ 1563 Rule4 < B %(D E)+ 1564 1565 B <- 'b' 1566 C <- D E 1567 D <- 'd' 1568 E <- 'e' 1569 `)); 1570 1571 result = PropTest.decimateTree(PropTest.Rule1("bdedede")); 1572 assert(result.successful); 1573 assert(result.begin == 0); 1574 assert(result.end == "bdedede".length); 1575 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1576 assert(result.children.length == 4, "b and de, de, de"); 1577 assert(result.children[0].name == "PropTest.B"); 1578 assert(result.children[1].name == "PropTest.C"); 1579 assert(result.children[2].name == "PropTest.C"); 1580 assert(result.children[3].name == "PropTest.C"); 1581 1582 result = PropTest.decimateTree(PropTest.Rule2("bdedede")); 1583 assert(result.successful); 1584 assert(result.begin == 0); 1585 assert(result.end == "bdedede".length); 1586 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1587 assert(result.children.length == 7, "b and (d and e), thrice."); 1588 assert(result.children[0].name == "PropTest.B"); 1589 assert(result.children[1].name == "PropTest.D"); 1590 assert(result.children[2].name == "PropTest.E"); 1591 assert(result.children[3].name == "PropTest.D"); 1592 assert(result.children[4].name == "PropTest.E"); 1593 assert(result.children[5].name == "PropTest.D"); 1594 assert(result.children[6].name == "PropTest.E"); 1595 1596 result = PropTest.decimateTree(PropTest.Rule3("bdedede")); 1597 assert(result.successful); 1598 assert(result.begin == 0); 1599 assert(result.end == "bdedede".length); 1600 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1601 assert(result.children.length == 7, "b and (d and e), thrice."); 1602 assert(result.children[0].name == "PropTest.B"); 1603 assert(result.children[1].name == "PropTest.D"); 1604 assert(result.children[2].name == "PropTest.E"); 1605 assert(result.children[3].name == "PropTest.D"); 1606 assert(result.children[4].name == "PropTest.E"); 1607 assert(result.children[5].name == "PropTest.D"); 1608 assert(result.children[6].name == "PropTest.E"); 1609 1610 result = PropTest.decimateTree(PropTest.Rule3(" b de de de ")); 1611 assert(result.successful); 1612 assert(result.begin == 0); 1613 assert(result.end == " b de de de ".length); 1614 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1615 assert(result.children.length == 7, "b and (d and e), thrice."); 1616 assert(result.children[0].name == "PropTest.B"); 1617 assert(result.children[1].name == "PropTest.D"); 1618 assert(result.children[2].name == "PropTest.E"); 1619 assert(result.children[3].name == "PropTest.D"); 1620 assert(result.children[4].name == "PropTest.E"); 1621 assert(result.children[5].name == "PropTest.D"); 1622 assert(result.children[6].name == "PropTest.E"); 1623 1624 result = PropTest.decimateTree(PropTest.Rule4("bdedede")); 1625 assert(result.successful); 1626 assert(result.begin == 0); 1627 assert(result.end == "bdedede".length); 1628 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1629 assert(result.children.length == 7, "b and (d and e), thrice."); 1630 assert(result.children[0].name == "PropTest.B"); 1631 assert(result.children[1].name == "PropTest.D"); 1632 assert(result.children[2].name == "PropTest.E"); 1633 assert(result.children[3].name == "PropTest.D"); 1634 assert(result.children[4].name == "PropTest.E"); 1635 assert(result.children[5].name == "PropTest.D"); 1636 assert(result.children[6].name == "PropTest.E"); 1637 1638 result = PropTest.decimateTree(PropTest.Rule4(" b de de de ")); 1639 assert(result.successful); 1640 assert(result.begin == 0); 1641 assert(result.end == " b de de de ".length); 1642 assert(result.matches == ["b", "d", "e", "d", "e", "d", "e"]); 1643 assert(result.children.length == 7, "b and (d and e), thrice."); 1644 assert(result.children[0].name == "PropTest.B"); 1645 assert(result.children[1].name == "PropTest.D"); 1646 assert(result.children[2].name == "PropTest.E"); 1647 assert(result.children[3].name == "PropTest.D"); 1648 assert(result.children[4].name == "PropTest.E"); 1649 assert(result.children[5].name == "PropTest.D"); 1650 assert(result.children[6].name == "PropTest.E"); 1651 1652 // More than one prefix, more than one suffixes 1653 mixin(grammar(` 1654 MoreThanOne: 1655 Rule1 <- ~:("abc"*) # Two prefixes (nothing left for ~, after :) 1656 Rule2 <- :~("abc"*) # Two prefixes (: will discard everything ~ did) 1657 Rule3 <- ;:~"abc" # Many prefixes 1658 Rule4 <- ~~~("abc"*) # Many fuses (no global effect) 1659 1660 Rule5 <- "abc"+* # Many suffixes 1661 Rule6 <- "abc"+? # Many suffixes 1662 1663 Rule7 <- !!"abc" # Double negation, equivalent to '&' 1664 Rule8 <- &"abc" 1665 1666 Rule9 <- ^^"abc"+* # Many suffixes and prefixes 1667 `)); 1668 1669 assert(is(MoreThanOne), "This compiles all right."); 1670 1671 result = MoreThanOne.decimateTree(MoreThanOne.Rule1("abcabcabc")); 1672 assert(result.successful); 1673 assert(result.begin == 0); 1674 assert(result.end == "abcabcabc".length); 1675 assert(result.matches is null); 1676 assert(result.children.length == 0); 1677 1678 result = MoreThanOne.decimateTree(MoreThanOne.Rule2("abcabcabc")); 1679 assert(result.successful); 1680 assert(result.begin == 0); 1681 assert(result.end == "abcabcabc".length); 1682 assert(result.matches is null); 1683 assert(result.children.length == 0); 1684 1685 result = MoreThanOne.decimateTree(MoreThanOne.Rule3("abcabcabc")); 1686 assert(result.successful); 1687 assert(result.begin == 0); 1688 assert(result.end == "abc".length); 1689 assert(result.matches is null); 1690 assert(result.children.length == 0); 1691 1692 result = MoreThanOne.decimateTree(MoreThanOne.Rule4("abcabcabc")); 1693 assert(result.successful); 1694 assert(result.begin == 0); 1695 assert(result.end == "abcabcabc".length); 1696 assert(result.matches == ["abcabcabc"]); 1697 assert(result.children.length == 0); 1698 1699 // +* and +? 1700 result = MoreThanOne.decimateTree(MoreThanOne.Rule5("abcabcabc")); 1701 assert(result.successful); 1702 assert(result.begin == 0); 1703 assert(result.end == "abcabcabc".length); 1704 assert(result.matches == ["abc", "abc", "abc"]); 1705 assert(result.children.length == 0); 1706 1707 result = MoreThanOne.decimateTree(MoreThanOne.Rule6("abcabcabc")); 1708 assert(result.successful); 1709 assert(result.begin == 0); 1710 assert(result.end == "abcabcabc".length); 1711 assert(result.matches == ["abc", "abc", "abc"]); 1712 assert(result.children.length == 0); 1713 1714 // !! is equivalent to & 1715 result = MoreThanOne.decimateTree(MoreThanOne.Rule7("abc")); 1716 assert(result.successful); 1717 assert(result.begin == 0); 1718 assert(result.end == 0); 1719 assert(result.matches is null); 1720 assert(result.children.length == 0); 1721 1722 result = MoreThanOne.decimateTree(MoreThanOne.Rule8("abc")); 1723 assert(result.successful); 1724 assert(result.begin == 0); 1725 assert(result.end == 0); 1726 assert(result.matches is null); 1727 assert(result.children.length == 0); 1728 1729 // ^^"abc"+* 1730 result = MoreThanOne.decimateTree(MoreThanOne.Rule9("abcabcabc")); 1731 assert(result.successful); 1732 assert(result.begin == 0); 1733 assert(result.end == 9); 1734 assert(result.matches == ["abc", "abc", "abc"]); 1735 assert(result.children.length == 1); 1736 assert(result.name == `MoreThanOne.Rule9`); 1737 assert(result.children[0].name == `keep!(zeroOrMore!(oneOrMore!(literal!("abc"))))`); 1738 assert(result.children[0].children.length == 1); 1739 assert(result.children[0].children[0].name == `zeroOrMore!(oneOrMore!(literal!("abc")))`); 1740 assert(result.children[0].children[0].children.length == 1); 1741 assert(result.children[0].children[0].children[0].name == `oneOrMore!(literal!("abc"))`); 1742 assert(result.children[0].children[0].children[0].children.length == 3); 1743 assert(result.children[0].children[0].children[0].children[0].name == `literal!("abc")`); 1744 assert(result.children[0].children[0].children[0].children[1].name == `literal!("abc")`); 1745 assert(result.children[0].children[0].children[0].children[2].name == `literal!("abc")`); 1746 } 1747 1748 unittest // Issue #88 unit test 1749 { 1750 enum gram = ` 1751 P: 1752 Rule1 <- (w 'a' w)* 1753 Rule2 <- (wx 'a' wx)* 1754 w <- :(' ' / '\n' / '\t' / '\r')* 1755 wx <- (:' ' / '\n' / '\t' / '\r')* 1756 `; 1757 1758 mixin(grammar(gram)); 1759 1760 string input = " a a a a a a "; 1761 1762 ParseTree p1 = P.decimateTree(P.Rule1(input)); 1763 ParseTree p2 = P.decimateTree(P.Rule2(input)); 1764 assert(softCompare(p1,p2)); 1765 1766 input = " a\n \011\012 a\n\t a\x09\x0A a "; 1767 p1 = P.decimateTree(P.Rule1(input)); 1768 p2 = P.decimateTree(P.Rule2(input)); 1769 assert(p1.end == input.length); // Parse the entire string 1770 assert(p2.end == input.length); 1771 } 1772 1773 unittest // Leading alternation 1774 { 1775 mixin(grammar(` 1776 LeadingAlternation: 1777 Rule1 <- / 'a' 1778 Rule2 <- / 'a' / 'b' 1779 Rule3 <- (/ 'a' / 'b') 1780 `)); 1781 1782 ParseTree result = LeadingAlternation.decimateTree(LeadingAlternation.Rule1("a")); 1783 assert(result.successful); 1784 assert(result.begin == 0); 1785 assert(result.end == 1); 1786 assert(result.matches == ["a"]); 1787 1788 result = LeadingAlternation.decimateTree(LeadingAlternation.Rule2("b")); 1789 assert(result.successful); 1790 assert(result.begin == 0); 1791 assert(result.end == 1); 1792 assert(result.matches == ["b"]); 1793 1794 result = LeadingAlternation.decimateTree(LeadingAlternation.Rule3("b")); 1795 assert(result.successful); 1796 assert(result.begin == 0); 1797 assert(result.end == 1); 1798 assert(result.matches == ["b"]); 1799 } 1800 1801 unittest // Extended chars tests 1802 { 1803 mixin(grammar(" 1804 Chars: 1805 # Lone chars 1806 Rule1 <- '\t' '0' 'A' '~' 1807 Rule2 <- '\11' '\60' '\101' '\176' # \t 0 A ~ in octal 1808 Rule3 <- '\011' '\060' '\101' '\176' # \t 0 A ~ in octal (prefix 0) 1809 Rule4 <- '\x09' '\x30' '\x41' '\x7E' # \t 0 A ~ in hexadecimal 1810 Rule5 <- '\u0009' '\u0030' '\u0041' '\u007E' # \t 0 A ~ in unicode 1811 Rule6 <- '\U00000009' '\U00000030' '\U00000041' '\U0000007E' # \t 0 A ~ in unicode 1812 1813 # Strings literals 1814 Rule7 <- '\t0A~' 1815 Rule8 <- '\11\60\101\176' # \t 0 A ~ in octal 1816 Rule9 <- '\011\060\101\176' # \t 0 A ~ in octal (prefix 0) 1817 Rule10 <- '\x09\x30\x41\x7E' # \t 0 A ~ in hexadecimal 1818 Rule11 <- '\u0009\u0030\u0041\u007E' # \t 0 A ~ in unicode 1819 Rule12 <- '\U00000009\U00000030\U00000041' '\U0000007E' # \t 0 A ~ in unicode 1820 1821 # Outside Latin 1822 Rule13 <- '\u03B1\u03B9\u03C6\u03B1' # alpha in greek 1823 Rule14 <- 'αιφα' 1824 1825 # Hello's 1826 English <- 'Hello' 1827 Russian <- 'Здравствуйте' 1828 Arabic <- 'السلام عليك' 1829 Chinese <- '你好' 1830 Japanese <- '今日は' 1831 Spanish <- '¡Hola!' 1832 ")); 1833 1834 1835 assert(Chars.decimateTree(Chars.Rule1("\t0A~")).successful); 1836 assert(Chars.decimateTree(Chars.Rule2("\t0A~")).successful); 1837 assert(Chars.decimateTree(Chars.Rule3("\t0A~")).successful); 1838 assert(Chars.decimateTree(Chars.Rule4("\t0A~")).successful); 1839 assert(Chars.decimateTree(Chars.Rule5("\t0A~")).successful); 1840 assert(Chars.decimateTree(Chars.Rule6("\t0A~")).successful); 1841 1842 assert(Chars.decimateTree(Chars.Rule7("\t0A~")).successful); 1843 assert(Chars.decimateTree(Chars.Rule8("\t0A~")).successful); 1844 assert(Chars.decimateTree(Chars.Rule9("\t0A~")).successful); 1845 assert(Chars.decimateTree(Chars.Rule10("\t0A~")).successful); 1846 assert(Chars.decimateTree(Chars.Rule11("\t0A~")).successful); 1847 assert(Chars.decimateTree(Chars.Rule12("\t0A~")).successful); 1848 1849 assert(Chars.decimateTree(Chars.Rule13("\u03B1\u03B9\u03C6\u03B1")).successful); 1850 assert(Chars.decimateTree(Chars.Rule13("αιφα")).successful); 1851 1852 assert(Chars.decimateTree(Chars.Rule14("\u03B1\u03B9\u03C6\u03B1")).successful); 1853 assert(Chars.decimateTree(Chars.Rule14("αιφα")).successful); 1854 1855 assert(Chars.decimateTree(Chars.English("Hello")).successful); 1856 assert(Chars.decimateTree(Chars.Russian("Здравствуйте")).successful); 1857 assert(Chars.decimateTree(Chars.Arabic("السلام عليك")).successful); 1858 assert(Chars.decimateTree(Chars.Chinese("你好")).successful); 1859 assert(Chars.decimateTree(Chars.Japanese("今日は'")).successful); 1860 assert(Chars.decimateTree(Chars.Spanish("¡Hola!")).successful); 1861 } 1862 1863 unittest // Extended char range tests 1864 { 1865 import std.conv; 1866 1867 mixin(grammar(` 1868 CharRanges: 1869 Rule1 <- [a-z] 1870 Rule2 <- [\141-\172] # a-z in octal 1871 Rule3 <- [\x61-\x7A] # a-z in hexadecimal 1872 Rule4 <- [\u0061-\u007A] # a-z in UTF16 1873 Rule5 <- [\U00000061-\U0000007A] # a-z in UTF32 1874 1875 Rule6 <- [\-\[\]\\\'\"\n\r\t] 1876 `)); 1877 1878 string lower = "abcdefghijklmnopqrstuvwxyz"; 1879 string upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 1880 string digits = "0123456789"; 1881 string others = "?./,;:!*&()[]<>"; 1882 string escapes = "-[]\\\'\"\n\r\t"; 1883 1884 foreach(dchar c; lower) 1885 { 1886 assert(CharRanges.Rule1(to!string(c)).successful); 1887 assert(CharRanges.Rule2(to!string(c)).successful); 1888 assert(CharRanges.Rule3(to!string(c)).successful); 1889 assert(CharRanges.Rule4(to!string(c)).successful); 1890 assert(CharRanges.Rule5(to!string(c)).successful); 1891 } 1892 1893 foreach(dchar c; upper ~ digits ~ others) 1894 { 1895 assert(!CharRanges.Rule1(to!string(c)).successful); 1896 assert(!CharRanges.Rule2(to!string(c)).successful); 1897 assert(!CharRanges.Rule3(to!string(c)).successful); 1898 assert(!CharRanges.Rule4(to!string(c)).successful); 1899 assert(!CharRanges.Rule5(to!string(c)).successful); 1900 } 1901 1902 foreach(dchar c; escapes) 1903 { 1904 assert(CharRanges.Rule6(to!string(c)).successful); 1905 } 1906 } 1907 1908 unittest // qualified names for rules 1909 { 1910 mixin(grammar(` 1911 First: 1912 Rule1 <- "abc" 1913 Rule2 <- "def" 1914 `)); 1915 1916 mixin(grammar(` 1917 Second: 1918 Rule1 <- First.Rule1 1919 Rule2 <- First.Rule2 1920 Rule3 <- pegged.peg.list(pegged.peg.identifier, ',') 1921 `)); 1922 1923 // Equal on success 1924 ParseTree reference = First("abc"); 1925 ParseTree result = Second("abc"); 1926 assert(reference.successful); 1927 assert(result.successful); 1928 assert(result.matches == reference.matches); 1929 assert(result.begin == reference.begin); 1930 assert(result.end == reference.end); 1931 1932 // Equal on failure 1933 reference = First("def"); 1934 result = Second("def"); 1935 assert(!reference.successful); 1936 assert(!result.successful); 1937 assert(result.matches == reference.matches); 1938 assert(result.begin == reference.begin); 1939 assert(result.end == reference.end); 1940 1941 // Second rule test 1942 reference = First.Rule2("def"); 1943 result = Second.Rule2("def"); 1944 assert(reference.successful); 1945 assert(result.matches == reference.matches); 1946 assert(result.begin == reference.begin); 1947 assert(result.end == reference.end); 1948 1949 // External (predefined) rule call: 1950 result = Second.Rule3("foo,bar,baz"); 1951 assert(result.successful); 1952 assert(result.begin == 0); 1953 assert(result.end == "foo,bar,baz".length); 1954 assert(result.matches == ["foo", "bar", "baz"]); 1955 } 1956 1957 unittest // Parameterized rules 1958 { 1959 mixin(grammar(` 1960 Parameterized: 1961 # Different arities 1962 Rule1(A) <- A+ 1963 Rule1(A,B) <- (A B)+ 1964 Rule1(A,B,C) <- (A B C)+ 1965 1966 # Inner call 1967 Call1 <- Rule1('a') 1968 Call2 <- Rule1('a','b') 1969 Call3 <- Rule1('a','b','c') 1970 Call4(A) <- Rule1(A, A, A) 1971 Call5(A) <- Rule1('a', A, 'c') 1972 1973 # Default values 1974 Rule2(A = 'a', B = 'b') <- (A B)+ 1975 1976 # Re-using the parameters 1977 Rule3(A,B) <- A B B A # Money money money! 1978 1979 # The standard parameterized rule 1980 List(Elem, Sep) < Elem (:Sep Elem)* 1981 1982 # Another common PEG pattern 1983 AllUntil(End) <~ (!End .)* :End 1984 `)); 1985 1986 alias Parameterized.Rule1!(literal!"a") R1; 1987 alias oneOrMore!(literal!"a") Ref1; 1988 1989 ParseTree reference = Ref1("aaaa"); 1990 ParseTree result = R1("aaaa"); 1991 1992 assert(result.name == `Parameterized.Rule1!(literal!("a"))`); 1993 assert(reference.successful); 1994 assert(result.successful); 1995 assert(result.matches == reference.matches); 1996 assert(result.begin == reference.begin); 1997 assert(result.end == reference.end); 1998 1999 result = Parameterized.Call1("aaaa"); 2000 2001 assert(result.name == `Parameterized.Call1`); 2002 assert(result.successful); 2003 assert(result.matches == reference.matches); 2004 assert(result.begin == reference.begin); 2005 assert(result.end == reference.end); 2006 2007 alias Parameterized.Rule1!(literal!"abc") R1long; 2008 alias oneOrMore!(literal!"abc") Ref1long; 2009 2010 reference = Ref1long("abcabcabcabc"); 2011 result = R1long("abcabcabcabc"); 2012 2013 assert(result.name == `Parameterized.Rule1!(literal!("abc"))`); 2014 assert(reference.successful); 2015 assert(result.successful); 2016 assert(result.matches == reference.matches); 2017 assert(result.begin == reference.begin); 2018 assert(result.end == reference.end); 2019 2020 alias Parameterized.Rule1!(literal!"a", literal!"b") R2; 2021 alias oneOrMore!(and!(literal!"a", literal!"b")) Ref2; 2022 2023 reference = Ref2("abababab"); 2024 result = R2("abababab"); 2025 2026 assert(result.name == `Parameterized.Rule1!(literal!("a"), literal!("b"))`); 2027 assert(reference.successful); 2028 assert(result.successful); 2029 assert(result.matches == reference.matches); 2030 assert(result.begin == reference.begin); 2031 assert(result.end == reference.end); 2032 2033 result = Parameterized.Call2("abababab"); 2034 2035 assert(result.name == `Parameterized.Call2`); 2036 assert(result.successful); 2037 assert(result.matches == reference.matches); 2038 assert(result.begin == reference.begin); 2039 assert(result.end == reference.end); 2040 2041 alias Parameterized.Rule1!(literal!"a", literal!"b", literal!"c") R3; 2042 alias oneOrMore!(and!(literal!"a", literal!"b", literal!"c")) Ref3; 2043 2044 reference = Ref3("abcabcabcabc"); 2045 result = R3("abcabcabcabc"); 2046 2047 assert(result.name == `Parameterized.Rule1!(literal!("a"), literal!("b"), literal!("c"))`); 2048 assert(reference.successful); 2049 assert(result.successful); 2050 assert(result.matches == reference.matches); 2051 assert(result.begin == reference.begin); 2052 assert(result.end == reference.end); 2053 2054 result = Parameterized.Call3("abcabcabcabc"); 2055 2056 assert(result.name == `Parameterized.Call3`); 2057 assert(result.successful); 2058 assert(result.matches == reference.matches); 2059 assert(result.begin == reference.begin); 2060 assert(result.end == reference.end); 2061 2062 result = Parameterized.Call4!(literal!"A")("AAAAAA"); 2063 2064 assert(result.name == `Parameterized.Call4!(literal!("A"))`); 2065 assert(result.successful); 2066 assert(result.begin == 0); 2067 assert(result.end == "AAAAAA".length); 2068 assert(result.matches == ["A","A","A","A","A","A"]); 2069 2070 result = Parameterized.Call5!(literal!"A")("aAcaAc"); 2071 2072 assert(result.name == `Parameterized.Call5!(literal!("A"))`); 2073 assert(result.successful); 2074 assert(result.begin == 0); 2075 assert(result.end == "aAcaAc".length); 2076 assert(result.matches == ["a","A","c","a","A","c"]); 2077 2078 // Default parameters 2079 alias Parameterized.Rule2!() R2_1; 2080 alias Parameterized.Rule2!(literal!"a") R2_2; 2081 alias Parameterized.Rule2!(literal!"a", literal!"b") R2_3; 2082 2083 assert(R2_1("ababab").successful); 2084 assert(R2_2("ababab").successful); 2085 assert(R2_3("ababab").successful); 2086 2087 // Re-using a parameter (A B B A) 2088 result = Parameterized.Rule3!(literal!"A", literal!"B")("ABBA"); 2089 2090 assert(result.name == `Parameterized.Rule3!(literal!("A"), literal!("B"))`); 2091 assert(result.successful); 2092 assert(result.begin == 0); 2093 assert(result.end == "ABBA".length); 2094 assert(result.matches == ["A", "B", "B", "A"]); 2095 2096 alias Parameterized.List!(identifier, literal!",") IdList; // Identifiers separated by ',' 2097 alias Parameterized.List!(IdList, literal!";") IdList2; // IdList's separated by ';' 2098 2099 result = IdList("foo, bar, baz"); 2100 2101 assert(result.name == `Parameterized.List!(identifier, literal!(","))`); 2102 assert(result.successful); 2103 assert(result.begin == 0); 2104 assert(result.end == "foo, bar, baz".length); 2105 assert(result.matches == ["foo", "bar", "baz"]); 2106 2107 result = Parameterized.decimateTree(IdList2("foo,bar,baz; abc, def, ghi")); 2108 2109 assert(result.name == `Parameterized.List!(Parameterized.List!(identifier, literal!(",")), literal!(";"))`); 2110 assert(result.successful); 2111 assert(result.begin == 0); 2112 assert(result.end == "foo,bar,baz; abc, def, ghi".length); 2113 assert(result.matches == ["foo", "bar", "baz", "abc", "def", "ghi"]); 2114 2115 assert(result.children.length == 2); 2116 2117 assert(result.children[0].name == `Parameterized.List!(identifier, literal!(","))`); 2118 assert(result.children[0].matches == ["foo", "bar", "baz"]); 2119 2120 assert(result.children[1].name == `Parameterized.List!(identifier, literal!(","))`); 2121 assert(result.children[1].matches == ["abc", "def", "ghi"]); 2122 2123 alias Parameterized.AllUntil!(or!(endOfLine)) Line; 2124 alias zeroOrMore!(Line) Lines; 2125 2126 string input = 2127 "This is an input text. 2128 Here is another line. 2129 2130 And the last one. 2131 "; 2132 2133 result = Lines(input); 2134 assert(result.successful); 2135 assert(result.children.length == 4); 2136 assert(result.children[0].matches == ["This is an input text."]); 2137 assert(result.children[1].matches == ["Here is another line."]); 2138 assert(result.children[2].matches is null); 2139 assert(result.children[3].matches == [" And the last one."]); 2140 2141 // Parameterized grammar test 2142 mixin(grammar(` 2143 Arithmetic(Atom) : 2144 Expr < Factor (('+'/'-') Factor)* 2145 Factor < Primary (('*'/'/') Primary)* 2146 Primary < '(' Expr ')' / '-' Expr / Atom 2147 `)); 2148 2149 alias Arithmetic!(identifier) Arith1; 2150 alias Arithmetic!(or!(identifier, digits)) Arith2; 2151 2152 assert(Arith1("x + y*z/foo").successful); 2153 assert(Arith2("x + y*z/foo").successful); 2154 2155 assert(!Arith1("1 + 2*3/456").successful); 2156 assert(Arith2("1 + 2*3/456").successful); 2157 assert(Arith2("1 + 2*3/z").successful); 2158 } 2159 2160 version(unittest) // Semantic actions 2161 { 2162 P doubler(P)(P p) 2163 { 2164 if (p.successful) 2165 p.matches ~= p.matches; 2166 return p; 2167 } 2168 } 2169 2170 unittest // Semantic actions, testing { foo } and { foo, bar, baz } 2171 { 2172 mixin(grammar(` 2173 Semantic: 2174 Rule1 <- 'a' {doubler} 2175 Rule2 <- 'b' {doubler, doubler} 2176 Rule3 <- 'b' {doubler} {doubler} # Same as Rule2 2177 Rule4 <- 'b' {doubler, doubler, doubler} 2178 Rule5 <- 'a' {doubler} 'b' 'c'{doubler} 2179 Rule6 <{doubler} 'a' # Rule Level actions 2180 Rule7 <{doubler} 'a' 'b' {doubler} # Rule Level actions 2181 `)); 2182 2183 ParseTree result = Semantic.decimateTree(Semantic.Rule1("a")); 2184 assert(result.successful); 2185 assert(result.matches == ["a", "a"]); 2186 2187 result = Semantic.decimateTree(Semantic.Rule1("b")); 2188 assert(!result.successful); 2189 assert(result.matches == [`"a"`]); 2190 2191 result = Semantic.decimateTree(Semantic.Rule2("b")); 2192 assert(result.successful); 2193 assert(result.matches == ["b", "b", "b", "b"]); 2194 2195 result = Semantic.decimateTree(Semantic.Rule3("b")); 2196 assert(result.successful); 2197 assert(result.matches == ["b", "b", "b", "b"]); 2198 2199 result = Semantic.decimateTree(Semantic.Rule4("b")); 2200 assert(result.successful); 2201 assert(result.matches == ["b", "b", "b", "b", "b", "b", "b", "b"]); 2202 2203 result = Semantic.decimateTree(Semantic.Rule5("abc")); 2204 assert(result.successful); 2205 assert(result.matches == ["a", "a", "b", "c", "c"]); 2206 2207 result = Semantic.decimateTree(Semantic.Rule6("abc")); 2208 assert(result.successful); 2209 assert(result.matches == ["a", "a"]); 2210 2211 result = Semantic.decimateTree(Semantic.Rule7("abc")); 2212 assert(result.successful); 2213 assert(result.matches == ["a", "b", "b", "a", "b", "b"]); 2214 2215 } 2216 2217 version(unittest) 2218 { 2219 P foo(P)(P p) { return p;} // for testing actions 2220 2221 void badGrammar(string s)() 2222 { 2223 assert(!__traits(compiles, {mixin(grammar(s));}), "This should fail: " ~ s); 2224 } 2225 2226 void goodGrammar(string s)() 2227 { 2228 assert(__traits(compiles, {mixin(grammar(s));}), "This should work: " ~ s); 2229 } 2230 } 2231 2232 2233 /+ Failed (commit 4cd177a), DMD crashed. Too many grammar istantiations, I guess. 2234 unittest // failure cases: unnamed grammar, no-rule grammar, syntax errors, etc. 2235 { 2236 // No grammar 2237 badGrammar!""; 2238 2239 // Name without colon nor rules 2240 badGrammar!"Name"; 2241 2242 // No rule 2243 badGrammar!"Name:"; 2244 badGrammar!"Name1 Name2"; 2245 2246 // Incomplete and badly formulated rules 2247 badGrammar!"Name: 2248 Rule1"; 2249 badGrammar!"Name: 2250 Rule1 Rule2"; 2251 badGrammar!"Name 2252 Rule1 Rule2"; 2253 badGrammar!"Name: 2254 Rule1 <-"; 2255 badGrammar!"Name: 2256 Rule1 <~"; 2257 badGrammar!"Name: 2258 Rule1 < "; 2259 badGrammar!"Name: 2260 Rule1 <%"; 2261 badGrammar!"Name: 2262 Rule1 <;"; 2263 badGrammar!"Name 2264 Rule1 <- <-"; 2265 2266 // Non-closing parenthesis, brackets and quotes 2267 badGrammar!"Name: 2268 Rule1 <- ('a'"; 2269 badGrammar!"Name: 2270 Rule1 <- 'a')"; 2271 badGrammar!"Name: 2272 Rule1 <- ('a'))"; 2273 badGrammar!"Name: 2274 Rule1 <- (('a')"; 2275 badGrammar!"Name: 2276 Rule1 <- 'a"; 2277 badGrammar!"Name: 2278 Rule1 <- a'"; 2279 badGrammar!`Name: 2280 Rule1 <- "a`; 2281 badGrammar!`Name: 2282 Rule1 <- a"`; 2283 badGrammar!`Name: 2284 Rule1 <- 'a"`; 2285 badGrammar!`Name: 2286 Rule1 <- "a'`; 2287 badGrammar!"Name: 2288 Rule1 <- [a"; 2289 badGrammar!"Name: 2290 Rule1 <- a]"; 2291 badGrammar!"Name: 2292 Rule1 <- [a]]"; 2293 // But <- [[a] is legal: matches '[' or 'a' 2294 goodGrammar!"Name: 2295 Rule1 <- [[a]"; 2296 2297 // Bad prefix/postfix 2298 badGrammar!"Name: 2299 Rule1 <- 'a'~"; 2300 badGrammar!"Name: 2301 Rule1 <- 'a'%"; 2302 badGrammar!"Name: 2303 Rule1 <- 'a'!"; 2304 badGrammar!"Name: 2305 Rule1 <- 'a'&"; 2306 badGrammar!"Name: 2307 Rule1 <- 'a';"; 2308 badGrammar!"Name: 2309 Rule1 <- *'a'"; 2310 badGrammar!"Name: 2311 Rule1 <- +'a'"; 2312 badGrammar!"Name: 2313 Rule1 <- ?'a'"; 2314 badGrammar!"Name: 2315 Rule1 <- 'a' {}"; 2316 // Foo is defined in a version(unittest) block 2317 badGrammar!"Name: 2318 Rule1 <- 'a' { foo"; 2319 badGrammar!"Name: 2320 Rule1 <- 'a' foo}"; 2321 badGrammar!"Name: 2322 Rule1 <- 'a' {foo}}"; // closing } 2323 badGrammar!"Name: 2324 Rule1 <- 'a' {{foo}"; // opening { 2325 badGrammar!"Name: 2326 Rule1 <- 'a' {foo,}"; // bad comma 2327 badGrammar!"Name: 2328 Rule1 <- 'a' {,foo}"; 2329 badGrammar!"Name: 2330 Rule1 <- {foo}"; // no rule before {}'s 2331 // DMD Bug :-( 2332 /+glue.c line 1150 dmd::virtual unsigned int Type::totym():Assertion `0' failed. 2333 badGrammar!"Name: 2334 Rule1 <- 'a' {bar}"; // bar not defined 2335 +/ 2336 2337 // choice ('/') syntax errors 2338 badGrammar!"Name: 2339 Rule1 <- 'a' /"; 2340 // But: <- / 'a' is legal (it's equivalent to: <- 'a') 2341 goodGrammar!"Name: 2342 Rule1 <- / 'a'"; 2343 badGrammar!"Name: 2344 Rule1 <- /"; 2345 badGrammar!"Name: 2346 Rule1 <- 'a' / / 'b'"; 2347 } 2348 +/ 2349 2350 unittest // Memoization testing 2351 { 2352 enum gram1 = ` 2353 Test1: 2354 Rule1 <- Rule2* 'b' # To force a long evaluation of aaaa... 2355 / Rule2* 'c' # before finding a 'b' or a 'c' 2356 Rule2 <- 'a' 2357 `; 2358 2359 enum gram2 = ` 2360 Test2: 2361 Rule1 <- Rule2* 'b' # To force a long evaluation of aaaa... 2362 / Rule2* 'c' # before finding a 'b' or a 'c' 2363 Rule2 <- 'a' 2364 `; 2365 2366 mixin(grammar!(Memoization.yes)(gram1)); 2367 mixin(grammar!(Memoization.no)(gram2)); 2368 2369 assert(is(typeof(Test1.memo))); 2370 assert(!is(typeof(Test2.memo))); 2371 2372 ParseTree result1 = Test1("aaaaaaac"); // Memo + Runtime 2373 enum ParseTree result2 = Test1("aaaaaaac"); // Memo + Compile-time 2374 ParseTree result3 = Test2("aaaaaaac"); // No memo + Runtime 2375 enum ParseTree result4 = Test2("aaaaaaac"); // No memo + Compile-time 2376 2377 assert(result1 == result2); 2378 assert(result3 == result4); 2379 2380 //Directly comparing result1 and result3 is not possible, for the grammar names are different 2381 assert(pegged.peg.softCompare(result1, result2)); 2382 assert(pegged.peg.softCompare(result1, result3)); 2383 assert(pegged.peg.softCompare(result1, result4)); 2384 } 2385 2386 unittest // Test lambda syntax in semantic actions 2387 { 2388 import std.array; 2389 2390 auto actions = [ 2391 2392 // Normal action 2393 `{ myAction }`, 2394 2395 // List of normal actions 2396 `{ myAction, myAction2 }`, 2397 2398 // Simple do-nothing lambda 2399 `{ (a) {return a;} }`, 2400 2401 // Simple do-nothing lambda with formatting 2402 `{ (a) { 2403 return a; 2404 }}`, 2405 2406 // Lambda with commas and spurious braces to try and confuse it 2407 `{ (a, b) { 2408 string s = "}"; 2409 if (a.successful,) { 2410 s ~= q"<}>"; 2411 } else { 2412 { s ~= q"<}>"; /* } */ } 2413 } 2414 return a;} }`, 2415 2416 // List of mixed actions and lambdas 2417 `{ myAction , (a) {return a;}, myAction2 , (a) { /* , } */ return a; } }`, 2418 2419 // Ambiguous lambda (not sure it would compile if used... but it should parse) 2420 `{ myAction, a => transform(a), myAction2 }`, 2421 2422 // Something more convoluted 2423 "{ myAction, (a) { 2424 /* block } comment with } braces */ 2425 string s = `} {` // wysiwyg string with braces and line comment with brace } 2426 if (s is null) { 2427 // } 2428 } else { // scopes 2429 { // nested scopes 2430 writeln(q{ \"}\" }); // token string with nested string with brace 2431 } 2432 } 2433 2434 string s = `1,2,3,4,5` // commas separate actions 2435 2436 /+ } Crazy nesting block comment 2437 /+ } +/ 2438 /+ } +/ 2439 /+ /+ } +/ } +/ 2440 } 2441 +/ 2442 2443 q\"< } <}> <> <<}<>}>> >\"; // delimited string 2444 q\"[ [}] [] [[[ } ]]] ]\"; // delimited string 2445 q\"( () }(}) (((}))}) )\"; // delimited string 2446 q\"{ {} {} {{{}}} }\"; // delimited string 2447 q{ class {} {} struct void \"}\" } /* another token string } */ 2448 2449 struct S 2450 { 2451 void foo() {} 2452 void bar() {} 2453 } 2454 2455 return a; 2456 }, myAction2 }", 2457 ]; 2458 2459 auto results = [ 2460 [`myAction`], 2461 [`myAction`,`myAction2`], 2462 [`(a) {return a;}`], 2463 [`(a) { 2464 return a; 2465 }`], 2466 [`(a, b) { 2467 string s = "}"; 2468 if (a.successful,) { 2469 s ~= q"<}>"; 2470 } else { 2471 { s ~= q"<}>"; /* } */ } 2472 } 2473 return a;}`], 2474 [`myAction`,`(a) {return a;}`,`myAction2`,`(a) { /* , } */ return a; }`], 2475 [`myAction`,`a => transform(a)`,`myAction2`], 2476 [`myAction`,"(a) { 2477 /* block } comment with } braces */ 2478 string s = `} {` // wysiwyg string with braces and line comment with brace } 2479 if (s is null) { 2480 // } 2481 } else { // scopes 2482 { // nested scopes 2483 writeln(q{ \"}\" }); // token string with nested string with brace 2484 } 2485 } 2486 2487 string s = `1,2,3,4,5` // commas separate actions 2488 2489 /+ } Crazy nesting block comment 2490 /+ } +/ 2491 /+ } +/ 2492 /+ /+ } +/ } +/ 2493 } 2494 +/ 2495 2496 q\"< } <}> <> <<}<>}>> >\"; // delimited string 2497 q\"[ [}] [] [[[ } ]]] ]\"; // delimited string 2498 q\"( () }(}) (((}))}) )\"; // delimited string 2499 q\"{ {} {} {{{}}} }\"; // delimited string 2500 q{ class {} {} struct void \"}\" } /* another token string } */ 2501 2502 struct S 2503 { 2504 void foo() {} 2505 void bar() {} 2506 } 2507 2508 return a; 2509 }",`myAction2`] 2510 ]; 2511 2512 foreach(idx, act; actions) 2513 { 2514 auto grammar = `P: Rule <- RuleA ` ~ act ~ ` RuleA <- 'A'`; 2515 auto p = Pegged(grammar); 2516 2517 assert(p.successful); 2518 2519 auto action = p.children[0].children[1] 2520 .children[2] 2521 .children[0] 2522 .children[0] 2523 .children[0] 2524 .children[1]; 2525 2526 assert(action.matches.length == results[idx].length); 2527 foreach(i, s; action.matches) 2528 assert(strip(s) == results[idx][i], 2529 "\nGot |"~s~"|" ~ "\nNeeded: |"~results[idx][i]~"|"); 2530 } 2531 } 2532 2533 unittest 2534 { 2535 // Higher-level word boundary test. 2536 mixin(grammar(` 2537 TestGrammar: 2538 2539 Foo < '{' 'X' '}' 2540 Bar < 'A' 'B' 2541 2542 Spacing <: 2543 / blank+ 2544 / blank* wordBoundary 2545 / wordBoundary blank* 2546 / ![a-zA-Z] 2547 / !. 2548 2549 `)); 2550 2551 auto pt = TestGrammar.Foo("{ X }"); 2552 assert(pt.successful); 2553 2554 pt = TestGrammar.Foo("{X}"); 2555 assert(pt.successful); 2556 2557 pt = TestGrammar.Bar("A B"); 2558 assert(pt.successful); 2559 2560 pt = TestGrammar.Bar("AB"); 2561 assert(!pt.successful); 2562 } 2563 2564 unittest // Issue #129 unit test 2565 { 2566 enum gram = ` 2567 G: 2568 A <- B 2569 B <- C 2570 C <- 'c' D 2571 D <- 'd' 2572 `; 2573 2574 mixin(grammar(gram)); 2575 2576 string input = "cd"; 2577 2578 ParseTree p = G(input); 2579 assert(p.successful); 2580 assert(p.name == "G"); 2581 assert(p.children.length == 1); 2582 assert(p.children[0].name == "G.A"); 2583 assert(p.children[0].children.length == 1); 2584 assert(p.children[0].children[0].name == "G.B"); 2585 assert(p.children[0].children[0].children.length == 1); 2586 assert(p.children[0].children[0].children[0].name == "G.C"); 2587 assert(p.children[0].children[0].children[0].children.length == 1); 2588 assert(p.children[0].children[0].children[0].children[0].name == "G.D"); 2589 assert(p.children[0].children[0].children[0].children[0].children.length == 0); 2590 }