1 /** 2 This module contains the engine behind Pegged, the expression templates building blocks to create a top-down 3 recursive-descent parser. 4 5 The terminals and non-terminals described here are meant to be used inside a Pegged grammar. 6 As such, they are a bit less user-friendly than what's output by pegged.grammar. 7 For example they take a ParseTree as input, not a string. 8 9 See the /docs directory for the full documentation as markdown files. 10 */ 11 module pegged.peg; 12 13 /* 14 NOTE: 15 Do not use the GrammarTester for unittesting in this module. This module needs 16 to be able to pass its unittests before the GrammarTester is even trustable. 17 Writing tests the long way is preferred here, as it will avoid the circular 18 dependency. 19 */ 20 21 import std.algorithm: equal, map, startsWith, max, countUntil, maxElement, filter; 22 import std.uni : isAlpha, icmp; 23 import std.array; 24 import std.conv; 25 import std..string: strip; 26 import std.typetuple; 27 28 // Returns quoted and escaped version of the input, but if the input is null, then return `"end of input"`. 29 package string stringified(string inp) @safe 30 { 31 import std.format : format; 32 33 if (inp is null) 34 return `"end of input"`; 35 36 string[1] shell = [inp]; 37 return "%(%s%)".format(shell); 38 } 39 40 @safe unittest // Run- & Compile-time. 41 { 42 static bool doTest() 43 { 44 const caseSuit1 = [ 45 "" : `""`, 46 47 "\0" : `"\0"`, 48 "\n" : `"\n"`, 49 "\t" : `"\t"`, 50 51 `"` : `"\""`, 52 "`" : q"("`")", 53 "'" : `"'"`, 54 55 "42" : `"42"`, 56 "\"some text\"\n" : `"\"some text\"\n"` 57 ]; 58 59 const caseSuit2 = [cast(string)null : `"end of input"`]; 60 61 const auto testSuit = [caseSuit1, caseSuit2]; 62 63 foreach (const ref suit; testSuit) 64 { 65 foreach (const ref value, const ref expected; suit) 66 { 67 import std.format : format; 68 69 const result = value.stringified; 70 assert(result == expected, "from %s expected %s but got %s".format(value, expected, result)); 71 } 72 } 73 74 return true; 75 } 76 77 // Result is always true, but here, we just force CTFE-mode. 78 static assert(doTest); // Compile-time. 79 doTest; // Run-time. 80 } 81 82 version (tracer) 83 { 84 import std.experimental.logger; 85 import std.algorithm.comparison : min; 86 87 // Function pointers. 88 private static bool function(string ruleName, const ref ParseTree p) traceConditionFunction; 89 private static bool delegate(string ruleName, const ref ParseTree p) traceConditionDelegate; 90 private static int traceLevel; 91 private static bool traceBlocked; 92 private static bool logTraceLevel = false; 93 94 private void incTraceLevel() 95 { 96 if (!__ctfe) 97 traceLevel++; 98 } 99 100 private void decTraceLevel() 101 { 102 if (!__ctfe) 103 traceLevel--; 104 } 105 106 private bool shouldTrace(string ruleName, const ref ParseTree p) 107 { 108 if (__ctfe || traceBlocked) 109 return false; 110 if (traceConditionDelegate != null) 111 return traceConditionDelegate(ruleName, p); 112 if (traceConditionFunction != null) 113 return traceConditionFunction(ruleName, p); 114 return false; 115 } 116 117 static this() 118 { 119 traceLevel = 0; 120 traceNothing(); 121 traceBlocked = false; 122 } 123 124 /++ Supply a function to dynamically switch tracing on and off based on the rule name. 125 + 126 + Example: 127 + --- 128 + /* Exclude build-in parsers, only trace parsers generated from MyGrammar. */ 129 + setTraceConditionFunction(ruleName => ruleName.startsWith("MyGrammar")); 130 + --- 131 +/ 132 void setTraceConditionFunction(bool delegate(string ruleName, const ref ParseTree p) condition) 133 { 134 traceConditionDelegate = condition; 135 traceConditionFunction = null; 136 } 137 138 /// ditto 139 void setTraceConditionFunction(bool function(string ruleName, const ref ParseTree p) condition) 140 { 141 traceConditionFunction = condition; 142 traceConditionDelegate = null; 143 } 144 145 /** Trace all rules. 146 * 147 * This can produce a lot of output. 148 */ 149 void traceAll() 150 { 151 setTraceConditionFunction(function(string ruleName, const ref ParseTree p) {return true;}); 152 } 153 154 /** Do not trace any rules. */ 155 void traceNothing() 156 { 157 traceConditionFunction = null; 158 traceConditionDelegate = null; 159 } 160 161 private string traceMsg(ParseTree p, string expression, string name) 162 { 163 import std.format; 164 Position pos = position(p); 165 enum inputLength = 15; 166 string result; 167 for (auto i = 1; i <= traceLevel; i++) 168 result ~= format("%d|", i); 169 result ~= format(" (l:%d, c:%d, i:%d)\t", pos.line + 1, pos.col + 1, pos.index) ~ 170 expression.stringified ~ " considering rule " ~ name.stringified ~ " on " ~ 171 p.input[p.end .. min(p.input.length, p.end + inputLength)].stringified ~ 172 (p.end + inputLength > p.input.length ? "" : "..."); 173 return result; 174 } 175 176 private string traceResultMsg(ParseTree p, string name) 177 { 178 import std.format; 179 import std.range: chain; 180 import std.algorithm.iteration: joiner; 181 Position pos = position(p); 182 enum inputLength = 15; 183 string result; 184 for (auto i = 1; i <= traceLevel; i++) 185 result ~= format("%d|", i); 186 if (p.successful) 187 { 188 string consumed; 189 foreach (match; p.matches) 190 consumed ~= match; 191 result ~= format(" (l:%d, c:%d, i:%d)\t", pos.line + 1, pos.col + 1, pos.index) ~ name.stringified ~ " SUCCEEDED on " ~ 192 consumed.stringified; 193 } 194 else 195 result ~= format(" (l:%d, c:%d, i:%d)\t", pos.line + 1, pos.col + 1, pos.index) ~ name.stringified ~ " FAILED on " ~ 196 p.input[p.end .. min(p.input.length, p.end + inputLength)].stringified ~ 197 (p.end + inputLength > p.input.length ? "" : "..."); 198 return result; 199 } 200 201 /** 202 Overrides FileLogger to remove the time stamp. 203 204 Example: 205 --- 206 sharedLog = new TraceLogger("TraceLog.txt"); 207 --- 208 */ 209 class TraceLogger : FileLogger 210 { 211 this(in string fn) @safe 212 { 213 super(fn); 214 } 215 import std.concurrency : Tid; 216 import std.datetime : SysTime; 217 override protected void beginLogMsg(string file, int line, string funcName, 218 string prettyFuncName, string moduleName, LogLevel logLevel, 219 Tid threadId, SysTime timestamp, Logger logger) 220 @safe 221 { 222 } 223 } 224 } 225 226 /** 227 CT Switch for testing 'keywords' implementations 228 */ 229 enum 230 { 231 IFCHAIN, 232 TRIE 233 } 234 enum KEYWORDS = IFCHAIN; 235 236 /** 237 The basic parse tree, as used throughout the project. 238 You can define your own parse tree node, but respect the basic layout. 239 */ 240 struct ParseTree 241 { 242 string name; /// The node name 243 bool successful; /// Indicates whether a parsing was successful or not 244 string[] matches; /// The matched input's parts. Some expressions match at more than one place, hence matches is an array. 245 246 string input; /// The input string that generated the parse tree. Stored here for the parse tree to be passed to other expressions, as input. 247 size_t begin, end; /// Indices for the matched part from the very beginning of the first match to the last char of the last match. 248 249 ParseTree[] children; /// The sub-trees created by sub-rules parsing. 250 251 size_t failEnd; // The furthest this tree could match the input (including !successful rules). 252 ParseTree[] failedChild; /// The !successful child that could still be partially parsed. 253 254 /** 255 Basic toString for easy pretty-printing. 256 */ 257 string toString(string tabs = "") const 258 { 259 string result = name; 260 261 string childrenString; 262 bool allChildrenSuccessful = true; 263 264 foreach(i,child; children) 265 { 266 childrenString ~= tabs ~ " +-" ~ child.toString(tabs ~ ((i < children.length -1 ) ? " | " : " ")); 267 if (!child.successful) { 268 allChildrenSuccessful = false; 269 } 270 } 271 result ~= this.toStringThisNode(allChildrenSuccessful); 272 return result ~ childrenString; 273 } 274 275 /** 276 * Basic toString of only this node, without the children 277 */ 278 private string toStringThisNode(bool allChildrenSuccessful) const 279 { 280 if (successful) { 281 return to!string([begin, end]) ~ to!string(matches) ~ "\n"; 282 } else { // some failure info is needed 283 if (allChildrenSuccessful) { // no one calculated the position yet 284 return " " ~ this.failMsg ~ "\n"; 285 } else { 286 return " (failure)\n"; 287 } 288 } 289 } 290 291 /** 292 * Generates a generic error when a node fails 293 * 294 * @param successMsg String returned when there isn't an error 295 * @param formatFailMsg Formating delegate function that generates the error message. 296 */ 297 string failMsg(string delegate(Position, string, string, const ParseTree) formatFailMsg = defaultFormatFailMsg, 298 string successMsg = "Sucess") const @property 299 { 300 foreach(i, child; children) { 301 if (!child.successful) { 302 return child.failMsg(formatFailMsg, successMsg); 303 } 304 } 305 306 if (!successful) { 307 Position pos = position(this); 308 string left, right; 309 310 if (pos.index < 10) { 311 left = input[0 .. pos.index]; 312 } else { 313 left = input[pos.index - 10 .. pos.index]; 314 } 315 if (pos.index + 10 < input.length) { 316 right = input[pos.index .. pos.index + 10]; 317 } else { 318 right = input[pos.index .. $]; 319 } 320 return formatFailMsg(pos, left, right, this); 321 } 322 323 return successMsg; 324 } 325 326 ParseTree dup() const @property 327 { 328 ParseTree result; 329 result.name = name; 330 result.successful = successful; 331 result.matches = matches.dup; 332 result.input = input; 333 result.begin = begin; 334 result.end = end; 335 result.failEnd = failEnd; 336 result.failedChild = map!(p => p.dup)(failedChild).array(); 337 result.children = map!(p => p.dup)(children).array(); 338 return result; 339 } 340 341 immutable(ParseTree) idup() const @property 342 { 343 return cast(immutable)dup(); 344 } 345 346 // Override opIndex operators 347 ref ParseTree opIndex(size_t index) { 348 return children[index]; 349 } 350 351 ref ParseTree[] opIndex() return { 352 return children; 353 } 354 355 size_t opDollar(size_t pos)() const 356 { 357 return children.length; 358 } 359 360 ParseTree[] opSlice(size_t i, size_t j) { 361 return children[i..j]; 362 } 363 } 364 365 /** 366 * Default fail message formating function 367 */ 368 auto defaultFormatFailMsg = delegate (Position pos, string left, string right, const ParseTree pt) 369 { 370 return "Failure at line " ~ to!string(pos.line) ~ ", col " ~ to!string(pos.col) ~ ", " 371 ~ (left.length > 0 ? "after " ~ left.stringified ~ " " : "") 372 ~ "expected " ~ (pt.matches.length > 0 ? pt.matches[$ - 1].stringified : "NO MATCH") 373 ~ `, but got ` ~ right.stringified; 374 }; 375 376 377 unittest // ParseTree testing 378 { 379 ParseTree p; 380 assert(p == p, "Self-identity on null tree."); 381 382 p = ParseTree("Name", true, ["abc", "", "def"], "input", 0, 1, null); 383 assert(p == p, "Self identity on non-null tree."); 384 385 ParseTree child = ParseTree("Child", true, ["abc", "", "def"], "input", 0, 1, null); 386 p.children = [child, child]; 387 388 assert(p.children[0] == p[0], "override opIndex allows to write less verbose code to navigate the tree"); 389 assert(p.children == p[] ); 390 assert(p.children[0..1] == p[0..1] ); 391 assert(p.children[0..$] == p[0..$] ); 392 393 ParseTree q = p; 394 assert(p == q, "Copying creates equal trees."); 395 396 assert(p.children == q.children); 397 p.children = [child, child, child]; 398 assert(q.children != p.children, "Disconnecting children."); 399 400 p = ParseTree("Name", true, ["abc", "", "def"], "input", 0, 1, null); 401 p.children = [child, child]; 402 403 q = p.dup; 404 assert(p == q, "Dupping creates equal trees."); 405 406 assert(p.children == q.children, "Equal children for dupped trees."); 407 p.children = null; 408 assert(q.children != p.children); 409 410 immutable iq = p.idup; 411 q = iq.dup; 412 assert(p == q, "Dupping to/from immutable creates equal trees."); 413 414 q.children = [p,p]; 415 assert(p != q, "Tree with different children are not equal."); 416 417 p.children = [p,p]; 418 assert(p == q, "Adding equivalent children is OK."); 419 420 p.matches = null; 421 assert(p != q, "Nulling matches makes trees unequal."); 422 423 p.matches = q.matches; 424 assert(p == q, "Copying matches makes equal trees."); 425 426 p.children[0].successful = false; 427 assert(p.children[0].failMsg == `Failure at line 0, col 1, after "i" expected "def", but got "nput"`); 428 assert(p.children[1].failMsg == "Sucess"); 429 assert(p.failMsg == `Failure at line 0, col 1, after "i" expected "def", but got "nput"`); 430 } 431 432 /// To compare two trees for content (not bothering with node names) 433 /// That's useful to compare the results from two different grammars. 434 bool softCompare(const ParseTree p1, const ParseTree p2) 435 { 436 return p1.successful == p2.successful 437 && p1.matches == p2.matches 438 && p1.begin == p2.begin 439 && p1.end == p2.end 440 && equal!(softCompare)(p1.children, p2.children); // the same for children 441 } 442 443 unittest // softCompare 444 { 445 ParseTree p = ParseTree("Name", true, ["abc", "", "def"], "input", 0, 1, null); 446 ParseTree child = ParseTree("Child", true, ["abc", "", "def"], "input", 0, 1, null); 447 p.children = [child, child]; 448 449 ParseTree q = p; 450 assert(p == q, "Copy => Equal trees."); 451 assert(softCompare(p,q), "Copy => Trees equal for softCompare."); 452 453 q.name = "Another One"; 454 assert(p != q, "Name change => non-equal trees."); 455 assert(softCompare(p,q), "Name change => Trees equal for softCompare."); 456 } 457 458 /// To record a position in a text 459 struct Position 460 { 461 size_t line;/// line number (starts at 0) 462 size_t col;/// column number (starts at 0) 463 size_t index;/// index (starts at 0) 464 } 465 466 /** 467 Given an input string, returns the position corresponding to the end of the string. 468 469 For example: 470 --- 471 assert(position("abc") == Position(0,3,3)); 472 assert(position("abc 473 ") == Position(1,0,4)); 474 assert(position("abc 475 476 ") == Position(2,4,8)); 477 --- 478 */ 479 Position position(string s) 480 { 481 size_t col, line, index, prev_i; 482 char prev_c; 483 foreach(i,c; s) 484 { 485 if ((c == '\n' && !(i == prev_i + 1 && prev_c == '\r')) || // new line except when directly following a carriage return. 486 c == '\r') 487 { 488 col = 0; 489 ++line; 490 ++index; 491 prev_i = i; 492 prev_c = c; 493 } 494 else 495 { 496 if (c != '\n') 497 ++col; 498 ++index; 499 } 500 } 501 502 return Position(line,col,index); 503 } 504 505 /** 506 Same as previous overload, but from the begin of P.input to p.end 507 */ 508 Position position(const ParseTree p) 509 { 510 return position(p.input[0..p.end]); 511 } 512 513 unittest 514 { 515 assert(position("") == Position(0,0,0), "Null string, position 0."); 516 assert(position("abc") == Position(0,3,3), "length 3 string, no line feed."); 517 assert(position("abc 518 ") == Position(1,0,4), "One end of line."); 519 assert(position("abc 520 521 ----") == Position(2,4,9), "Three lines (second one empty)."); 522 assert(position("abc 523 ---- 524 ----") == Position(2,4,13), "Three lines."); 525 assert(position(" 526 527 528 ") == Position(3,0,3), "Four lines, all empty."); 529 assert(position("one\r\ntwo\r\nthree") == Position(2, 5, 15), "Three lines, DOS line endings"); 530 } 531 532 string getName(alias expr)() @property 533 { 534 static if (is(typeof( { expr(GetName()); }))) 535 return expr(GetName()); 536 else 537 return __traits(identifier, expr); 538 } 539 540 struct GetName {} 541 542 /** 543 Basic rule, that always fail without consuming. 544 */ 545 ParseTree fail(ParseTree p) 546 { 547 return ParseTree("fail", false, [], p.input, p.end, p.end, null); 548 } 549 550 /// ditto 551 ParseTree fail(string input) 552 { 553 return fail(ParseTree("", false, [], input)); 554 } 555 556 string fail(GetName g) 557 { 558 return "fail"; 559 } 560 561 unittest // 'fail' unit test 562 { 563 ParseTree input = ParseTree("input", true, [], "This is the input string.", 0,0, null); 564 ParseTree result = fail(input); 565 assert(result.name == "fail"); 566 assert(!result.successful, "'fail' fails."); 567 assert(result.matches is null, "'fail' makes no match."); 568 assert(result.input == input.input, "'fail' does not change the input."); 569 assert(result.end == input.end, "'fail' puts the index after the previous parse."); 570 assert(result.children is null, "'fail' has no children."); 571 572 result = fail("This is the input string."); 573 assert(!result.successful, "'fail' fails."); 574 } 575 576 /** 577 Matches the end of input. Fails if there is any character left. 578 */ 579 ParseTree eoi(ParseTree p) 580 { 581 if (p.end == p.input.length) 582 return ParseTree("eoi", true, [], p.input, p.end, p.end); 583 else 584 return ParseTree("eoi", false, ["end of input"], p.input, p.end, p.end); 585 } 586 587 /// ditto 588 ParseTree eoi(string input) 589 { 590 return eoi(ParseTree("", false, [], input)); 591 } 592 593 string eoi(GetName g) 594 { 595 return "eoi"; 596 } 597 598 alias eoi endOfInput; /// helper alias. 599 600 unittest // 'eoi' unit test 601 { 602 ParseTree input = ParseTree("input", true, [], "This is the input string.", 0,0, null); 603 ParseTree result = eoi(input); 604 assert(result.name == "eoi"); 605 assert(!result.successful, "'eoi' fails on non-null string."); 606 assert(result.matches == ["end of input"], "'eoi' error message."); 607 assert(result.input == input.input, "'eoi' does not change the input."); 608 assert(result.end == input.end, "'eoi' puts the index after the previous parse."); 609 assert(result.children is null, "'eoi' has no children."); 610 611 input = ParseTree("input", true, [], "", 0,0, null); 612 result = eoi(input); 613 assert(result.successful, "'eoi' succeeds on strings of length 0."); 614 result = eoi(""); 615 assert(result.successful, "'eoi' succeeds on strings of length 0."); 616 result = eoi(null); 617 assert(result.successful, "'eoi' succeeds on null strings"); 618 } 619 620 /** 621 Match any character. As long as there is at least a character left in the input, it succeeds. 622 Conversely, it fails only if called at the end of the input. 623 */ 624 ParseTree any(ParseTree p) 625 { 626 if (p.end < p.input.length) 627 return ParseTree("any", true, [p.input[p.end..p.end+1]], p.input, p.end, p.end+1); 628 else 629 return ParseTree("any", false, ["any char"], p.input, p.end, p.end); 630 } 631 632 /// ditto 633 ParseTree any(string input) 634 { 635 return any(ParseTree("", false, [], input)); 636 } 637 638 string any(GetName g) 639 { 640 return "any"; 641 } 642 643 unittest // 'any' unit test 644 { 645 ParseTree input = ParseTree("input", true, [], "This is the input string.", 0,0, null); 646 ParseTree result = any(input); 647 648 assert(result.name == "any"); 649 assert(result.successful, "'any' succeeds on non-null strings."); 650 assert(result.matches == ["T"], "'any' matches the first char in an input."); 651 assert(result.input == input.input, "'any' does not change the input."); 652 assert(result.end == input.end+1, "'any' advances the index by one position."); 653 assert(result.children is null, "'any' has no children."); 654 655 result = any("a"); 656 assert(result.successful, "'any' matches on strings of length one."); 657 assert(result.matches == ["a"], "'any' matches the first char in an input."); 658 assert(result.input == "a", "'any' does not change the input."); 659 assert(result.end == 1, "'any' advances the index by one position."); 660 assert(result.children is null, "'any' has no children."); 661 662 input = ParseTree("input", true, [], "", 0,0, null); 663 664 result = any(input); 665 assert(!result.successful, "'any' fails on strings of length 0."); 666 assert(result.matches == ["any char"], "'any' error message on strings of length 0."); 667 assert(result.end == 0, "'any' does not advance the index."); 668 669 result = any(""); 670 assert(!result.successful, "'any' fails on strings of length 0."); 671 assert(result.matches == ["any char"], "'any' error message on strings of length 0."); 672 assert(result.end == 0, "'any' does not advance the index."); 673 674 result = any(null); 675 assert(!result.successful, "'any' fails on null strings."); 676 assert(result.matches == ["any char"], "'any' error message on strings of length 0."); 677 assert(result.end == 0, "'any' does not advance the index."); 678 } 679 680 /** 681 Predefined parser: matches word boundaries, as \b for regexes. 682 */ 683 ParseTree wordBoundary(ParseTree p) 684 { 685 // TODO: I added more indexing guards and now this could probably use 686 // some simplification. Too tired to write it better. --Chad 687 bool matched = (p.end == 0 && isAlpha(p.input.front())) 688 || (p.end == p.input.length && isAlpha(p.input.back())) 689 || (p.end > 0 && isAlpha(p.input[p.end-1]) && p.end < p.input.length && !isAlpha(p.input[p.end])) 690 || (p.end > 0 && !isAlpha(p.input[p.end-1]) && p.end < p.input.length && isAlpha(p.input[p.end])); 691 if (matched) 692 return ParseTree("wordBoundary", matched, [], p.input, p.end, p.end, null); 693 else 694 return ParseTree("wordBoundary", matched, ["word boundary"], p.input, p.end, p.end, null); 695 } 696 697 /// ditto 698 ParseTree wordBoundary(string input) 699 { 700 return ParseTree("wordBoundary", isAlpha(input.front()), [], input, 0,0, null); 701 } 702 703 string wordBoundary(GetName g) 704 { 705 return "wordBoundary"; 706 } 707 708 unittest // word boundary 709 { 710 ParseTree input = ParseTree("", false, [], "This is a word."); 711 auto wb = [// "This" 712 cast(size_t)(0):true, 1:false, 2:false, 3:false, 4: true, 713 // "is" 714 5: true, 6:false, 7: true, 715 // "a" 716 8: true, 9:true, 717 // "word" 718 10:true, 11:false, 12:false, 13:false, 14:true, 719 // "." 720 15:false 721 ]; 722 723 foreach(size_t index; 0 .. input.input.length) 724 { 725 input.end = index; 726 ParseTree result = wordBoundary(input); 727 728 assert(result.name == "wordBoundary"); 729 assert(result.successful == wb[index]); // true, false, ... 730 // for errors, there is an error message 731 assert(result.successful && result.matches is null || !result.successful); 732 assert(result.begin == input.end); 733 assert(result.end == input.end); 734 assert(result.children is null); 735 } 736 } 737 738 /** 739 Represents a literal in a PEG, like "abc" or 'abc' (or even ''). 740 It succeeds if a prefix of the input is equal to its template parameter and fails otherwise. 741 */ 742 template literal(string s) 743 { 744 enum name = "literal!(\""~s~"\")"; 745 746 ParseTree literal(ParseTree p) 747 { 748 enum lit = "\"" ~ s ~ "\""; 749 750 if (p.end+s.length <= p.input.length && p.input[p.end..p.end+s.length] == s) 751 return ParseTree(name, true, [s], p.input, p.end, p.end+s.length); 752 else { 753 import std.algorithm : commonPrefix; 754 import std.utf : byCodeUnit; 755 auto prefix = p.input[p.end..$].byCodeUnit.commonPrefix(s.byCodeUnit); 756 return ParseTree(name, false, [lit], p.input, p.end, p.end, null, p.end + prefix.length); 757 } 758 } 759 760 ParseTree literal(string input) 761 { 762 return .literal!(s)(ParseTree("", false, [], input)); 763 } 764 765 766 string literal(GetName g) 767 { 768 return name; 769 } 770 771 } 772 773 unittest // 'literal' unit test 774 { 775 ParseTree input = ParseTree("input", true, [], "abcdef", 0,0, null); 776 777 alias literal!"a" a; 778 alias literal!"abc" abc; 779 alias literal!"" empty; 780 781 ParseTree result = a(input); 782 783 assert(result.name == `literal!("a")`, "Literal name test."); 784 assert(result.successful, "'a' succeeds on inputs beginning with 'a'."); 785 assert(result.matches == ["a"], "'a' matches the 'a' at the beginning."); 786 assert(result.input == input.input, "'a' does not change the input."); 787 assert(result.end == input.end+1, "'a' advances the index by one position."); 788 assert(result.children is null, "'a' has no children."); 789 790 result = a("abcdef"); 791 792 assert(result.successful, "'a' succeeds on inputs beginning with 'a'."); 793 assert(result.matches == ["a"], "'a' matches the 'a' at the beginning."); 794 assert(result.input == input.input, "'a' does not change the input."); 795 assert(result.end == input.end+1, "'a' advances the index by one position."); 796 assert(result.children is null, "'a' has no children."); 797 798 result = abc(input); 799 800 assert(result.name == `literal!("abc")`, "Literal name test."); 801 assert(result.successful, "'abc' succeeds on inputs beginning with 'abc'."); 802 assert(result.matches == ["abc"], "'abc' matches 'abc' at the beginning."); 803 assert(result.input == input.input, "'abc' does not change the input."); 804 assert(result.end == input.end+3, "'abc' advances the index by 3 positions."); 805 assert(result.children is null, "'abc' has no children."); 806 807 result = abc("abcdef"); 808 809 assert(result.successful, "'abc' succeeds on inputs beginning with 'abc'."); 810 assert(result.matches == ["abc"], "'abc' matches 'abc' at the beginning."); 811 assert(result.input == input.input, "'abc' does not change the input."); 812 assert(result.end == input.end+3, "'abc' advances the index by 3 positions."); 813 assert(result.children is null, "'abc' has no children."); 814 815 result = empty(input); 816 817 assert(result.name == `literal!("")`, "Literal name test."); 818 assert(result.successful, "'' succeeds on non-null inputs."); 819 assert(result.matches == [""], "'' matches '' at the beginning."); 820 assert(result.input == input.input, "'' does not change the input."); 821 assert(result.end == input.end+0, "'' does not advance the index."); 822 assert(result.children is null, "'' has no children."); 823 824 result = empty("abcdef"); 825 826 assert(result.successful, "'' succeeds on non-null inputs."); 827 assert(result.matches == [""], "'' matches '' at the beginning."); 828 assert(result.input == input.input, "'' does not change the input."); 829 assert(result.end == input.end+0, "'' does not advance the index."); 830 assert(result.children is null, "'' has no children."); 831 832 input.input = "bcdef"; 833 834 result = a(input); 835 836 assert(!result.successful, "'a' fails on inputs not beginning with 'a'."); 837 assert(result.matches == ["\"a\""], "'a' makes no match on 'bcdef'."); 838 assert(result.input == input.input, "'a' does not change the input."); 839 assert(result.end == input.end, "'a' does not advances the index on 'bcdef'."); 840 assert(result.children is null, "'a' has no children."); 841 842 result = abc(input); 843 844 assert(!result.successful, "'abc' fails on inputs not beginning with 'abc'."); 845 assert(result.matches == ["\"abc\""], "'abc' does no match on 'bcdef'."); 846 assert(result.input == input.input, "'abc' does not change the input."); 847 assert(result.end == input.end, "'abc' does not advance the index on 'bcdef'."); 848 assert(result.children is null, "'abc' has no children."); 849 850 result = empty(input); 851 852 assert(result.successful, "'' succeeds on non-null inputs."); 853 assert(result.matches == [""], "'' matches '' at the beginning."); 854 assert(result.input == input.input, "'' does not change the input."); 855 assert(result.end == input.end+0, "'' does not advance the index."); 856 assert(result.children is null, "'' has no children."); 857 858 input.input = ""; 859 860 result = a(input); 861 862 assert(!result.successful, "'a' fails on empty strings."); 863 assert(result.matches == ["\"a\""], "'a' does not match ''."); 864 assert(result.input == input.input, "'a' does not change the input."); 865 assert(result.end == input.end, "'a' does not advance the index on 'bcdef'."); 866 assert(result.children is null, "'a' has no children."); 867 868 result = abc(input); 869 870 assert(!result.successful, "'abc' fails on empty strings."); 871 assert(result.matches == ["\"abc\""], "'abc' does not match ''."); 872 assert(result.input == input.input, "'abc' does not change the input."); 873 assert(result.end == input.end, "'abc' does not advance the index on 'bcdef'."); 874 assert(result.children is null, "'abc' has no children."); 875 876 result = empty(input); 877 878 assert(result.successful, "'' succeeds on empty strings."); 879 assert(result.matches == [""], "'' matches '' at the beginning, even on empty strings."); 880 assert(result.input == input.input, "'' does not change the input."); 881 assert(result.end == input.end+0, "'' does not advance the index."); 882 assert(result.children is null, "'' has no children."); 883 } 884 885 /** 886 Represents a case insensitive literal in a PEG, like "abc"i or 'abc'i (or even ''i). 887 It succeeds if a case insensitive comparison of a prefix of the input and its template 888 parameter yields no difference and fails otherwise. 889 */ 890 template caseInsensitiveLiteral(string s) 891 { 892 enum name = "caseInsensitiveLiteral!(\""~s~"\")"; 893 894 ParseTree caseInsensitiveLiteral(ParseTree p) 895 { 896 enum lit = "\"" ~ s ~ "\""; 897 if (p.end+s.length <= p.input.length && icmp(p.input[p.end..p.end+s.length], s) == 0) 898 return ParseTree(name, true, [s], p.input, p.end, p.end+s.length); 899 else 900 return ParseTree(name, false, [lit], p.input, p.end, p.end); 901 } 902 903 ParseTree caseInsensitiveLiteral(string input) 904 { 905 return .caseInsensitiveLiteral!(s)(ParseTree("", false, [], input)); 906 } 907 908 909 string caseInsensitiveLiteral(GetName g) 910 { 911 return name; 912 } 913 914 } 915 916 unittest // 'caseInsensitiveLiteral' unit test 917 { 918 ParseTree input = ParseTree("input", true, [], "AbCdEf", 0,0, null); 919 920 alias caseInsensitiveLiteral!"a" a; 921 alias caseInsensitiveLiteral!"aBC" abc; 922 alias caseInsensitiveLiteral!"" empty; 923 924 ParseTree result = a(input); 925 926 assert(result.name == `caseInsensitiveLiteral!("a")`, "caseInsensitiveLiteral name test."); 927 assert(result.successful, "'a' succeeds on inputs beginning with 'A'."); 928 assert(result.matches == ["a"], "'a' matches the 'A' at the beginning."); 929 assert(result.input == input.input, "'a' does not change the input."); 930 assert(result.end == input.end+1, "'a' advances the index by one position."); 931 assert(result.children is null, "'a' has no children."); 932 933 result = a("AbCdEf"); 934 935 assert(result.successful, "'a' succeeds on inputs beginning with 'A'."); 936 assert(result.matches == ["a"], "'a' matches the 'A' at the beginning."); 937 assert(result.input == input.input, "'a' does not change the input."); 938 assert(result.end == input.end+1, "'a' advances the index by one position."); 939 assert(result.children is null, "'a' has no children."); 940 941 result = abc(input); 942 943 assert(result.name == `caseInsensitiveLiteral!("aBC")`, "caseInsensitiveLiteral name test."); 944 assert(result.successful, "'aBC' succeeds on inputs beginning with 'AbC'."); 945 assert(result.matches == ["aBC"], "'AbC' matches 'aBC' at the beginning."); 946 assert(result.input == input.input, "'aBC' does not change the input."); 947 assert(result.end == input.end+3, "'aBC' advances the index by 3 positions."); 948 assert(result.children is null, "'aBC' has no children."); 949 950 result = abc("AbCdEf"); 951 952 assert(result.successful, "'aBC' succeeds on inputs beginning with 'AbC'."); 953 assert(result.matches == ["aBC"], "'AbC' matches 'aBC' at the beginning."); 954 assert(result.input == input.input, "'aBC' does not change the input."); 955 assert(result.end == input.end+3, "'aBC' advances the index by 3 positions."); 956 assert(result.children is null, "'aBC' has no children."); 957 958 result = empty(input); 959 960 assert(result.name == `caseInsensitiveLiteral!("")`, "caseInsensitiveLiteral name test."); 961 assert(result.successful, "'' succeeds on non-null inputs."); 962 assert(result.matches == [""], "'' matches '' at the beginning."); 963 assert(result.input == input.input, "'' does not change the input."); 964 assert(result.end == input.end+0, "'' does not advance the index."); 965 assert(result.children is null, "'' has no children."); 966 967 result = empty("AbCdEf"); 968 969 assert(result.successful, "'' succeeds on non-null inputs."); 970 assert(result.matches == [""], "'' matches '' at the beginning."); 971 assert(result.input == input.input, "'' does not change the input."); 972 assert(result.end == input.end+0, "'' does not advance the index."); 973 assert(result.children is null, "'' has no children."); 974 975 input.input = "bCdEf"; 976 977 result = a(input); 978 979 assert(!result.successful, "'a' fails on inputs not beginning with 'a' or 'A'."); 980 assert(result.matches == ["\"a\""], "'a' makes no match on 'bCdEf'."); 981 assert(result.input == input.input, "'a' does not change the input."); 982 assert(result.end == input.end, "'a' does not advances the index on 'bCdEf'."); 983 assert(result.children is null, "'a' has no children."); 984 985 result = abc(input); 986 987 assert(!result.successful, "'aBC' fails on inputs beginning with 'bCdEf'."); 988 assert(result.matches == ["\"aBC\""], "'aBC' does no match on 'bCdEf'."); 989 assert(result.input == input.input, "'aBC' does not change the input."); 990 assert(result.end == input.end, "'aBC' does not advance the index on 'bCdEf'."); 991 assert(result.children is null, "'aBC' has no children."); 992 993 result = empty(input); 994 995 assert(result.successful, "'' succeeds on non-null inputs."); 996 assert(result.matches == [""], "'' matches '' at the beginning."); 997 assert(result.input == input.input, "'' does not change the input."); 998 assert(result.end == input.end+0, "'' does not advance the index."); 999 assert(result.children is null, "'' has no children."); 1000 1001 input.input = ""; 1002 1003 result = a(input); 1004 1005 assert(!result.successful, "'a' fails on empty strings."); 1006 assert(result.matches == ["\"a\""], "'a' does not match ''."); 1007 assert(result.input == input.input, "'a' does not change the input."); 1008 assert(result.end == input.end, "'a' does not advance the index on 'bCdEf'."); 1009 assert(result.children is null, "'a' has no children."); 1010 1011 result = abc(input); 1012 1013 assert(!result.successful, "'aBC' fails on empty strings."); 1014 assert(result.matches == ["\"aBC\""], "'aBC' does not match ''."); 1015 assert(result.input == input.input, "'aBC' does not change the input."); 1016 assert(result.end == input.end, "'aBC' does not advance the index on 'bCdEf'."); 1017 assert(result.children is null, "'aBC' has no children."); 1018 1019 result = empty(input); 1020 1021 assert(result.successful, "'' succeeds on empty strings."); 1022 assert(result.matches == [""], "'' matches '' at the beginning, even on empty strings."); 1023 assert(result.input == input.input, "'' does not change the input."); 1024 assert(result.end == input.end+0, "'' does not advance the index."); 1025 assert(result.children is null, "'' has no children."); 1026 1027 input.input = "ÄöÜæØå"; 1028 result = caseInsensitiveLiteral!"äÖüÆøÅ"(input); 1029 1030 assert(result.successful, "Unicode characters are matched case insensitively."); 1031 } 1032 1033 /** 1034 Represents a range of chars, from begin to end, included. So charRange!('a','z') matches 1035 all English lowercase letters. If fails if the input is empty or does not begin with a character 1036 between begin and end. 1037 1038 If begin == end, it will match one char (begin... or end). 1039 1040 begin > end is non-legal. 1041 */ 1042 template charRange(dchar begin, dchar end) if (begin <= end) 1043 { 1044 enum name = "charRange!('"~to!string(begin)~"','" ~ to!string(end) ~ "')"; 1045 1046 ParseTree charRange(ParseTree p) 1047 { 1048 enum longname = "a char between '"~to!string(begin)~"' and '"~to!string(end)~"'"; 1049 if (p.end < p.input.length && p.input[p.end] >= begin && p.input[p.end] <= end) 1050 return ParseTree(name, true, [p.input[p.end..p.end+1]], p.input, p.end, p.end+1); 1051 else 1052 return ParseTree(name, false, [longname], p.input, p.end, p.end); 1053 } 1054 1055 ParseTree charRange(string input) 1056 { 1057 return .charRange!(begin,end)(ParseTree("",false,[],input)); 1058 } 1059 1060 string charRange(GetName g) 1061 { 1062 return name; 1063 } 1064 } 1065 1066 unittest // 'charRange' unit test 1067 { 1068 ParseTree input = ParseTree("input", true, [], "abcdef", 0,0, null); 1069 1070 alias charRange!('a','a') aa; 1071 alias charRange!('a','b') ab; 1072 alias charRange!('a','z') az; 1073 alias charRange!(dchar.min,dchar.max) allChars; 1074 alias charRange!('\U00000000','\U000000FF') ASCII; 1075 1076 static assert(!__traits(compiles, {alias charRange!('z','a') za;})); 1077 1078 ParseTree result = aa(input); 1079 1080 assert(result.name == "charRange!('a','a')", "charRange name test."); 1081 assert(result.successful, "'a-a' succeeds on inputs beginning with 'a'."); 1082 assert(result.matches == ["a"], "'a-a' matches the 'a' at the beginning."); 1083 assert(result.input == input.input, "'a-a' does not change the input."); 1084 assert(result.end == input.end+1, "'a-a' advances the index by one position."); 1085 assert(result.children is null, "'a-a' has no children."); 1086 1087 result = ab("abcdef"); 1088 1089 assert(result.name == "charRange!('a','b')", "charRange name test."); 1090 assert(result.successful, "'a-b' succeeds on inputs beginning with 'a'."); 1091 assert(result.matches == ["a"], "'a-b' matches the 'a' at the beginning."); 1092 assert(result.input == input.input, "'a-b' does not change the input."); 1093 assert(result.end == input.end+1, "'a-b' advances the index by one position."); 1094 assert(result.children is null, "'a-b' has no children."); 1095 1096 result = az(input); 1097 1098 assert(result.name == "charRange!('a','z')", "charRange name test."); 1099 assert(result.successful, "'a-z' succeeds on inputs beginning with 'abc'."); 1100 assert(result.matches == ["a"], "'a-z' matches 'a' at the beginning."); 1101 assert(result.input == input.input, "'a-z' does not change the input."); 1102 assert(result.end == input.end+1, "'a-z' advances the index by one position."); 1103 assert(result.children is null, "'a-z' has no children."); 1104 1105 input.input = "bcdef"; 1106 1107 result = aa(input); 1108 1109 assert(!result.successful, "'a-a' fails on inputs not beginning with 'a'."); 1110 assert(result.matches == ["a char between 'a' and 'a'"], "'a-a' makes no match on 'bcdef'."); 1111 assert(result.input == input.input, "'a-a' does not change the input."); 1112 assert(result.end == input.end, "'a-a' does not advances the index on 'bcdef'."); 1113 assert(result.children is null, "'a-a' has no children."); 1114 1115 result = ab(input); 1116 1117 assert(result.successful, "'a-b' succeeds on inputs beginning with 'b'."); 1118 assert(result.matches == ["b"], "'a-b' matches on 'bcdef'."); 1119 assert(result.input == input.input, "'a-b' does not change the input."); 1120 assert(result.end == input.end+1, "'a-b' advances the index by one position'."); 1121 assert(result.children is null, "'a-b' has no children."); 1122 1123 result = az(input); 1124 1125 assert(result.successful, "'a-z' succeeds on 'bcdef'."); 1126 assert(result.matches == ["b"], "'a-z' matches 'b' at the beginning of 'bcdef'."); 1127 assert(result.input == input.input, "'a-z' does not change the input."); 1128 assert(result.end == input.end+1, "'a-z' advances the index by one position."); 1129 assert(result.children is null, "'a-z' has no children."); 1130 1131 input.input = ""; 1132 1133 result = aa(input); 1134 1135 assert(!result.successful, "'a-a' fails on empty strings."); 1136 assert(result.matches == ["a char between 'a' and 'a'"], "'a-a' does not match ''."); 1137 assert(result.input == input.input, "'a-a' does not change the input."); 1138 assert(result.end == input.end, "'a-a' does not advance the index on ''."); 1139 assert(result.children is null, "'a-a' has no children."); 1140 1141 result = ab(input); 1142 1143 assert(!result.successful, "'a-b' fails on empty strings."); 1144 assert(result.matches == ["a char between 'a' and 'b'"], "'a-b' does not match ''."); 1145 assert(result.input == input.input, "'a-b' does not change the input."); 1146 assert(result.end == input.end, "'a-b' does not advance the index on ''."); 1147 assert(result.children is null, "'a-b' has no children."); 1148 1149 result = az(input); 1150 1151 assert(!result.successful, "'a-z' fails on empty strings."); 1152 assert(result.matches == ["a char between 'a' and 'z'"], "'a-z' does not match ''."); 1153 assert(result.input == input.input, "'a-z' does not change the input."); 1154 assert(result.end == input.end, "'a-z' does not advance the index on ''."); 1155 assert(result.children is null, "'a-z' has no children."); 1156 1157 input.input = "123"; 1158 1159 result = aa(input); 1160 1161 assert(!result.successful, "'a-a' fails on '123'."); 1162 assert(result.matches == ["a char between 'a' and 'a'"], "'a-a' does not match '123'."); 1163 assert(result.input == input.input, "'a-a' does not change the input."); 1164 assert(result.end == input.end, "'a-a' does not advance the index on '123'."); 1165 assert(result.children is null, "'a-a' has no children."); 1166 1167 result = ab(input); 1168 1169 assert(!result.successful, "'a-b' fails on '123'."); 1170 assert(result.matches == ["a char between 'a' and 'b'"], "'a-b' does not match '123'."); 1171 assert(result.input == input.input, "'a-b' does not change the input."); 1172 assert(result.end == input.end, "'a-b' does not advance the index on '123'."); 1173 assert(result.children is null, "'a-b' has no children."); 1174 1175 result = az(input); 1176 1177 assert(!result.successful, "'a-z' fails on '123'."); 1178 assert(result.matches == ["a char between 'a' and 'z'"], "'a-z' does not match '123'."); 1179 assert(result.input == input.input, "'a-z' does not change the input."); 1180 assert(result.end == input.end, "'a-z' does not advance the index on '123'."); 1181 assert(result.children is null, "'a-z' has no children."); 1182 1183 1184 foreach(dchar ch; 0..256*(128+64+16+8)) 1185 assert(allChars(to!string(ch)).successful); 1186 1187 assert(!allChars("").successful); 1188 } 1189 1190 /** 1191 eps matches the empty string (usually denoted by the Greek letter 'epsilon') and always succeeds. 1192 It's equivalent to literal!"" (for example, it creates a match of [""]: one match, the empty string). 1193 */ 1194 ParseTree eps(ParseTree p) 1195 { 1196 return ParseTree("eps", true, [""], p.input, p.end, p.end); 1197 } 1198 1199 ParseTree eps(string input) 1200 { 1201 return eps(ParseTree("",false,[], input)); 1202 } 1203 1204 string eps(GetName g) 1205 { 1206 return "eps"; 1207 } 1208 1209 unittest // 'eps' unit test 1210 { 1211 ParseTree input = ParseTree("input", true, [], "abcdef", 0,0, null); 1212 1213 ParseTree result = eps(input); 1214 1215 assert(result.name == "eps"); 1216 assert(result.successful, "'eps' succeeds on non-null inputs."); 1217 assert(result.matches == [""], "'eps' matches '' at the beginning."); 1218 assert(result.input == input.input, "'eps' does not change the input."); 1219 assert(result.end == input.end+0, "'eps' does not advance the index."); 1220 assert(result.children is null, "'eps' has no children."); 1221 1222 input.input = ""; 1223 1224 result = eps(input); 1225 assert(result.name == "eps"); 1226 assert(result.successful, "'eps' succeeds on empty strings."); 1227 assert(result.matches == [""], "'eps' matches '' at the beginning, even on empty strings."); 1228 assert(result.input == input.input, "'eps' does not change the input."); 1229 assert(result.end == input.end+0, "'eps' does not advance the index."); 1230 assert(result.children is null, "'eps' has no children."); 1231 } 1232 1233 1234 /** 1235 Basic operator: it matches if all its subrules (stored in the rules template parameter tuple) match 1236 the input successively. Its subrules parse trees are stored as its children and its matches field 1237 will contain all its subrules matches, in order. 1238 1239 ---- 1240 alias and!(literal!"abc", charRange!('a','z')) rule; // abc followed by any letter between a and z. 1241 ParseTree input = ParseTree("",false,[],"abcd"); // low-level plumbing, 1242 // the rules described here act on ParseTree's not strings. 1243 // It's equivalent to "abcd" as input 1244 auto result = rule(input); 1245 1246 assert(result.successful); // OK, 'abc' followed by 'd' 1247 assert(result.matches == ["abc", "d"]); // stores the matches 1248 assert(result.children.length == 2); // two children, the result of "abc" on "abcd" and the result of [a-z] on "d" 1249 1250 input.input = "abc"; // changing the input string; 1251 assert(!rule(input)).successful); // NOK, abc alone 1252 input.input = "ab"; 1253 assert(!rule(input)).successful); // NOK, does not begin by abc 1254 ---- 1255 1256 If it fails, the last children will contain the failed node. That way, when printing, as sort of diagnostic is given: 1257 1258 ---- 1259 alias and!(literal!"abc", charRange!('a','z')) rule; // 'abc[a-z]', aka 'abc' followed by any letter between 'a' and 'z'. 1260 ParseTree input = ParseTree("",false,[],"abc1"); // equivalent to "abc1" 1261 1262 auto failure = rule(input); 1263 writeln(failure); 1264 /+ 1265 writes: 1266 and (failure) 1267 +-literal(abc) [0, 3]["abc"] 1268 +-charRange(a,z) failure at line 0, col 3, after "abc" a char between 'a' and 'z', but got "1" 1269 +/ 1270 ---- 1271 1272 So we know the global 'and' failed, that the first sub-rule ('abc') succeeded on input[0..3] with "abc" 1273 and that the second subrule ('[a-z]') failed at position 3 (so, on '1'). 1274 */ 1275 template and(rules...) if (rules.length > 0) 1276 { 1277 string ctfeGetNameAnd() 1278 { 1279 string name = "and!("; 1280 foreach(i,rule; rules) 1281 name ~= __traits(identifier, rule) // because using getName!(rule) causes an infinite loop during compilation 1282 // for recursive rules 1283 ~ (i < rules.length -1 ? ", " : ""); 1284 name ~= ")"; 1285 return name; 1286 } 1287 1288 enum name = ctfeGetNameAnd(); 1289 1290 ParseTree and(ParseTree p) 1291 { 1292 bool keepNode(ParseTree node) 1293 { 1294 return node.name.startsWith("keep!(") 1295 || ( !node.name.startsWith("discard!(") 1296 //&& !node.name.startsWith("drop!(") 1297 && (node.matches !is null 1298 || node.failEnd >= node.end) 1299 //&& node.begin != node.end 1300 ); 1301 } 1302 1303 version (tracer) 1304 { 1305 incTraceLevel(); 1306 } 1307 1308 ParseTree result = ParseTree(name, false, [], p.input, p.end, p.end, []); 1309 1310 foreach(i,r; rules) 1311 { 1312 version (tracer) 1313 { 1314 if (shouldTrace(getName!(r)(), p)) 1315 trace(traceMsg(result, name, getName!(r)())); 1316 } 1317 ParseTree temp = r(result); 1318 result.end = temp.end; 1319 result.failEnd = max(result.failEnd, temp.failEnd); 1320 if (temp.successful) 1321 { 1322 if (keepNode(temp)) 1323 { 1324 result.matches ~= temp.matches; 1325 if (temp.name.startsWith("drop!(")) 1326 {} 1327 else if (temp.name.startsWith("propagate!(")) 1328 result.children ~= temp.children; 1329 else 1330 result.children ~= temp; 1331 } 1332 } 1333 else 1334 { 1335 auto firstLongestFailedMatch = result.children.firstLongestFailedMatch(temp.end); 1336 if (firstLongestFailedMatch == -1) { 1337 result.children ~= temp;// add the failed node, to indicate which failed 1338 if (temp.matches.length > 0) 1339 result.matches ~= temp.matches[$-1]; 1340 } else { 1341 // don't add the failed node because a previous one already failed further back 1342 result.children = result.children[0 .. firstLongestFailedMatch+1]; // discard any intermediate correct nodes 1343 // This current 'and' rule has failed parsing and there is a successful child 1344 // that had a longer failing match. We now want to revisit that child and modify it 1345 // so that it is no longer successful and we want to move its failedChild into its children. 1346 failedChildFixup(result.children[firstLongestFailedMatch], result.children[firstLongestFailedMatch].failEnd); 1347 } 1348 result.end = result.children.maxEnd(); 1349 result.failEnd = result.children.maxFailEnd(); 1350 version (tracer) 1351 { 1352 if (shouldTrace(getName!(r)(), p)) 1353 trace(traceResultMsg(result, getName!(r)())); 1354 decTraceLevel(); 1355 } 1356 return result; // and end the parsing attempt right there 1357 } 1358 } 1359 result.successful = true; 1360 version (tracer) 1361 { 1362 foreach(i, r; rules) 1363 if (shouldTrace(getName!(r)(), p)) 1364 { 1365 trace(traceResultMsg(result, name)); 1366 break; 1367 } 1368 decTraceLevel(); 1369 } 1370 return result; 1371 } 1372 1373 ParseTree and(string input) 1374 { 1375 return .and!(rules)(ParseTree("",false,[],input)); 1376 } 1377 1378 string and(GetName g) 1379 { 1380 return name; 1381 } 1382 1383 } 1384 1385 auto firstLongestFailedMatch(ParseTree[] children, size_t threshold) { 1386 return children.countUntil!(c => c.failEnd > threshold); 1387 } 1388 1389 auto maxFailEnd(ParseTree[] children) 1390 { 1391 return children.map!(c => c.failEnd).maxElement; 1392 } 1393 1394 auto maxEnd(ParseTree[] children) 1395 { 1396 return children.map!(c => c.end).maxElement; 1397 } 1398 1399 // A child ParseTree has kept track of an alternate ParseTree (in failedChild) that matches longer. 1400 // whenever the 'and' rule fails we want to rewrite that child so that the failedChild is 1401 // moved into its children, the successful is set to false, the end is set the its failEnd, 1402 // the failEnd is reset, and all that info is propagated upwards the tree so intermediate 1403 // nodes reflect the proper state. 1404 bool failedChildFixup(ref ParseTree p, size_t failEnd) 1405 { 1406 if (p.failedChild.length > 0) 1407 { 1408 p.children ~= p.failedChild[0]; 1409 p.failedChild = []; 1410 p.successful = false; 1411 p.end = p.failEnd; 1412 p.failEnd = p.children.map!(c => c.failEnd).maxElement(); 1413 return true; 1414 } 1415 else 1416 { 1417 bool result = false; 1418 foreach (ref c; p.children) 1419 { 1420 if (c.failEnd != failEnd) 1421 continue; 1422 if (failedChildFixup(c, failEnd)) 1423 { 1424 p.end = c.end; 1425 p.successful = false; 1426 p.failEnd = p.children.map!(c => c.failEnd).maxElement(); 1427 result = true; 1428 } 1429 } 1430 return result; 1431 } 1432 } 1433 1434 unittest // 'and' unit test 1435 { 1436 alias literal!"abc" abc; 1437 alias literal!"de" de; 1438 alias literal!"f" f; 1439 1440 alias and!(abc) abcAnd; 1441 alias and!(abc,de) abcde; 1442 alias and!(abc,de,f) abcdef; 1443 alias and!(eps, abc, eps, de, eps, f, eps) withEps; 1444 1445 //assert(getName!(abcAnd)() == `and!(literal!("abc"))`); 1446 //assert(getName!(abcde)() == `and!(literal!("abc"), literal!("de"))`); 1447 //assert(getName!(abcdef)() == `and!(literal!("abc"), literal!("de"), literal!("f"))`); 1448 1449 ParseTree input = ParseTree("",false,[], "abcdefghi"); 1450 1451 ParseTree result = abcAnd(input); 1452 1453 assert(result.successful, "and!('abc') parses 'abcdefghi'"); 1454 assert(result.matches == ["abc"], "and!('abc') matches 'abc' at the beginning of 'abcdefghi'"); 1455 assert(result.end == input.end+3, "and!('abc') advances the index by 'abc' size (3)."); 1456 assert(result.children == [abc(input)], "and!('abc') has one child: the one created by 'abc'."); 1457 1458 result = abcde(input); 1459 1460 assert(result.successful, "and!('abc','de') parses 'abcdefghi'"); 1461 assert(result.matches == ["abc","de"], 1462 "and!('abc','de') matches 'abc' and 'de' at the beginning of 'abcdefghi'"); 1463 assert(result.end == input.end+5, "and!('abc','de') advances the index by 3+2 positions."); 1464 assert(result.children == [abc(input), de(abc(input))], 1465 "and!('abc','de') has two children, created by 'abc' and 'de'."); 1466 1467 result = abcdef(input); 1468 1469 assert(result.successful, "and!('abc','de','f') parses 'abcdefghi'"); 1470 assert(result.matches == ["abc","de","f"], 1471 "and!('abc','de','f') matches 'abcdef' at the beginning of 'abcdefghi'"); 1472 assert(result.end == input.end+6, "and!('abc','de','f') advances the index by 3+2+1 positions."); 1473 assert(result.children == [abc(input), de(abc(input)), f(de(abc(input)))] 1474 , "and!('abc','de') has two children, created by 'abc' and 'de'."); 1475 1476 result = withEps(input); 1477 1478 assert(result.successful, "and!('','abc','','de','','f','') parses 'abcdefghi'"); 1479 assert(result.matches == ["","abc","","de","","f",""], 1480 "and!('','abc','','de','','f','') matches 'abcdef' at the beginning of 'abcdefghi'"); 1481 assert(result.end == input.end+6, 1482 "and!('','abc','','de','','f','') advances the index by 0+3+0+2+0+1+0 positions."); 1483 1484 input.input = "bcdefghi"; 1485 1486 result = abcdef(input); 1487 1488 assert(!result.successful, "'abc' 'de' 'f' fails on 'bcdefghi'"); 1489 //assert(result.matches is null, "'abc' 'de' 'f' has no match on 'bcdefghi'"); 1490 assert(result.end == input.end); 1491 assert(result.children == [abc(input)], "'abc' 'de' 'f' has one child (a failure) on 'bcdefghi'"); 1492 1493 input.input = "abc_efghi"; 1494 1495 result = abcdef(input); 1496 1497 assert(!result.successful, "'abc' 'de' 'f' fails on 'abc_efghi'"); 1498 //assert(result.matches == ["abc"], "Only the first match, from 'abc'."); 1499 assert(result.end == input.end+3, "Advances by 3 positions, due to 'abc'"); 1500 assert(result.children == [abc(input), de(abc(input))] 1501 , "'abc' 'de' 'f' has two child on 'abc_efghi', the one from 'abc' (success) and the one from 'de' (failure)."); 1502 } 1503 1504 version (unittest) { 1505 static ParseTree getError(ref ParseTree p) { 1506 if (p.children.length > 0) 1507 return getError(p.children[$-1]); 1508 return p; 1509 } 1510 } 1511 1512 unittest // 'and' unit test with zeroOrMore and longest failing match 1513 { 1514 alias literal!"abc" A; 1515 alias literal!"def" B; 1516 alias literal!"ghi" C; 1517 1518 alias and!(zeroOrMore!(and!(A,B)), C) Thing; 1519 1520 ParseTree input = ParseTree("",false,[], "abc"); 1521 ParseTree result = Thing(input); 1522 1523 assert(!result.successful); 1524 assert(getError(result).matches[$-1] == "\"def\"", "and!(zeroOrMore!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abc\""); 1525 assert(result.matches == []); 1526 } 1527 1528 unittest // 'and' unit test with option and longest failing match 1529 { 1530 alias literal!"abc" A; 1531 alias literal!"def" B; 1532 alias literal!"ghi" C; 1533 1534 alias and!(option!(and!(A,B)), C) Thing; 1535 1536 ParseTree input = ParseTree("",false,[], "abc"); 1537 ParseTree result = Thing(input); 1538 1539 assert(!result.successful); 1540 assert(getError(result).matches[$-1] == "\"def\"", "and!(option!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abc\""); 1541 assert(result.matches == []); 1542 } 1543 1544 unittest // 'and' unit test with oneOrMore and longest failing match 1545 { 1546 alias literal!"abc" A; 1547 alias literal!"def" B; 1548 alias literal!"ghi" C; 1549 1550 alias and!(oneOrMore!(and!(A,B)), C) Thing; 1551 1552 ParseTree input = ParseTree("",false,[], "abcdefabc"); 1553 ParseTree result = Thing(input); 1554 1555 assert(!result.successful); 1556 assert(getError(result).matches[$-1] == "\"def\"", "and!(oneOrMore!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abcdefabc\""); 1557 assert(result.matches == ["abc", "def"]); 1558 } 1559 1560 template wrapAround(alias before, alias target, alias after) 1561 { 1562 ParseTree wrapAround(ParseTree p) 1563 { 1564 ParseTree temp = before(p); 1565 if (!temp.successful) 1566 return temp; 1567 1568 ParseTree result = target(temp); 1569 if (!result.successful) 1570 return result; 1571 result.begin = temp.begin; 1572 1573 temp = after(result); 1574 if (!temp.successful) 1575 return temp; 1576 1577 result.end = temp.end; 1578 return result; 1579 } 1580 1581 ParseTree wrapAround(string input) 1582 { 1583 return .wrapAround!(before, target, after)(ParseTree("",false,[],input)); 1584 } 1585 1586 string wrapAround(GetName g) 1587 { 1588 return "wrapAround!(" ~ getName!(before)() ~ 1589 ", " ~ getName!(target)() ~ 1590 ", " ~ getName!(after)() ~ ")"; 1591 } 1592 } 1593 1594 /** 1595 Basic operator: it matches if one of its subrules (stored in the rules template parameter tuple) match 1596 the input. The subrules are tested in order, from rules[0] to rules[$-1]. 1597 1598 The matching subrule parse trees is stored as its only child and its matches field 1599 will contain all the subrule matches, in order. 1600 1601 ---- 1602 alias or!(literal!"abc", charRange!('a','z')) rule; // abc or, failing that, any letter between a and z. 1603 ParseTree input = ParseTree("",false,[],"defg"); // low-level plumbing, the rules described here act on ParseTree's not strings. 1604 // It's equivalent to "defg" as input 1605 auto result = rule(input); 1606 1607 assert(result.successful); // OK 1608 assert(result.matches == ["d"]); // stores the (in this case) only match 1609 assert(result.children.length == 1); // one child, the result of "abc" or [a-z], depending on which rule succeeded. 1610 1611 input.input = "abc"; // changing the input string; 1612 assert(rule(input)).successful); // Still OK 1613 input.input = "1abc"; 1614 assert(!rule(input)).successful); // NOK, does not begin by abc nor by [a-z] 1615 ---- 1616 1617 If it fails, the last children will contain the failed node that matched furthest (longes match). 1618 That way, when printing, as sort of diagnostic is given: 1619 1620 ---- 1621 alias or!(literal!"abc", and!(literal!"ab", charRange!('0','9'))) rule; // 'abc' or 'ab[0-9]' 1622 ParseTree input = ParseTree("",false,[],"abd"); // equivalent to "abd" 1623 1624 auto failure = rule(input); 1625 writeln(failure); 1626 /+ 1627 or (failure) 1628 +-and (failure) 1629 +-literal(ab) [0, 2]["ab"] 1630 +-charRange(0,9) failure at line 0, col 2, after "ab" expected a char between '0' and '9', but got "d" 1631 +/ 1632 ---- 1633 1634 So we know 'or' failed, that the 'and' sub-rule had the longest match, matching 'ab' and failing for [0-9] on index 2. 1635 */ 1636 template or(rules...) if (rules.length > 0) 1637 { 1638 string ctfeGetNameOr() 1639 { 1640 string name = "or!("; 1641 foreach(i,rule; rules) 1642 name ~= getName!(rule) 1643 ~ (i < rules.length -1 ? ", " : ""); 1644 name ~= ")"; 1645 return name; 1646 } 1647 1648 enum name = ctfeGetNameOr(); 1649 1650 ParseTree or(ParseTree p) 1651 { 1652 // error-management 1653 ParseTree longestFail = ParseTree(name, false, [], p.input, p.end, 0); 1654 string[] errorStrings; 1655 size_t errorStringChars; 1656 string orErrorString; 1657 1658 ParseTree[rules.length] results; 1659 string[rules.length] names; 1660 size_t[rules.length] failedLength; 1661 size_t maxFailedLength; 1662 1663 version (tracer) 1664 { 1665 incTraceLevel(); 1666 } 1667 1668 // Real 'or' loop 1669 foreach(i,r; rules) 1670 { 1671 version (tracer) 1672 { 1673 if (shouldTrace(getName!(r)(), p)) 1674 trace(traceMsg(p, name, getName!(r)())); 1675 } 1676 ParseTree temp = r(p); 1677 if (temp.successful) 1678 { 1679 temp.children = [temp]; 1680 temp.name = name; 1681 // if there is a child that failed but parsed more 1682 if (longestFail.failEnd > temp.end) { 1683 temp.failEnd = longestFail.failEnd; 1684 temp.failedChild = [longestFail]; 1685 } 1686 version (tracer) 1687 { 1688 if (shouldTrace(getName!(r)(), p)) 1689 trace(traceResultMsg(temp, getName!(r)())); 1690 decTraceLevel(); 1691 } 1692 return temp; 1693 } 1694 else 1695 { 1696 version (tracer) 1697 { 1698 if (shouldTrace(getName!(r)(), p)) 1699 trace(traceResultMsg(temp, getName!(r)())); 1700 } 1701 enum errName = " (" ~ getName!(r)() ~")"; 1702 failedLength[i] = temp.end; 1703 if (temp.end >= longestFail.end) 1704 { 1705 if (temp.end == longestFail.end) 1706 errorStringChars += (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4; 1707 else 1708 errorStringChars = (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4; 1709 maxFailedLength = temp.end; 1710 longestFail = temp; 1711 names[i] = errName; 1712 results[i] = temp; 1713 1714 } 1715 // Else, this error parsed less input than another one: we discard it. 1716 } 1717 } 1718 version (tracer) 1719 { 1720 decTraceLevel(); 1721 } 1722 1723 1724 // All subrules failed, we will take the longest match as the result 1725 // If more than one node failed at the same (farthest) position, we concatenate their error messages 1726 1727 1728 char[] errString;// = new char[](errorStringChars); 1729 errString.length = errorStringChars; 1730 uint start = 0; 1731 foreach(i; 0..rules.length) 1732 { 1733 if (failedLength[i] == maxFailedLength && results[i].matches.length > 0) 1734 { 1735 auto temp = results[i]; 1736 auto len = temp.matches[$-1].length; 1737 auto nlen = names[i].length; 1738 errString[start .. start+len] = temp.matches[$-1][]; 1739 errString[start+len .. start+len+names[i].length] = names[i][]; 1740 errString[start+len+nlen .. start+len+nlen+4] = " or "; 1741 start += len + names[i].length + 4; 1742 } 1743 } 1744 orErrorString = cast(string)(errString[0..$-4]); 1745 1746 longestFail.matches = longestFail.matches.length == 0 ? [orErrorString] : 1747 longestFail.matches[0..$-1] // discarding longestFail error message 1748 ~ [orErrorString]; // and replacing it by the new, concatenated one. 1749 auto children = results[].getUpto(maxFailedLength); 1750 return ParseTree(name, false, longestFail.matches, p.input, p.end, longestFail.end, children, children.maxFailEnd); 1751 } 1752 1753 ParseTree or(string input) 1754 { 1755 return .or!(rules)(ParseTree("",false,[],input)); 1756 } 1757 1758 string or(GetName g) 1759 { 1760 return name; 1761 } 1762 } 1763 1764 auto getUpto(ParseTree[] children, size_t minFailedLength) { 1765 return children.filter!(r => max(r.end, r.failEnd) >= minFailedLength).array(); 1766 } 1767 1768 unittest // 'or' unit test 1769 { 1770 alias charRange!('a','b') ab; 1771 alias charRange!('c','d') cd; 1772 1773 alias or!(ab) abOr; 1774 alias or!(ab,cd) abOrcd; 1775 1776 assert(getName!(ab)() == "charRange!('a','b')"); 1777 assert(getName!(cd)() == "charRange!('c','d')"); 1778 1779 ParseTree input = ParseTree("",false,[], "abcdefghi"); 1780 1781 ParseTree result = abOr(input); 1782 1783 assert(result.name == "or!(charRange!('a','b'))", "or name test."); 1784 assert(result.successful, "or!([a-b]) parses 'abcdefghi'"); 1785 assert(result.matches == ["a"], "or!([a-b]) matches 'a' at the beginning of 'abcdefghi'"); 1786 assert(result.end == input.end+1, "or!([a-b]) advances the index by 'a' size (1)."); 1787 assert(result.children == [ab(input)], "or!([a-b]) has one child: the one created by '[a-b]'."); 1788 1789 result = abOrcd(input); 1790 1791 assert(result.name == "or!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 1792 assert(result.successful, "or!([a-b],[c-d]) parses 'abcdefghi'"); 1793 assert(result.matches == ["a"], "or!([a-b],[c-d]) matches 'a' at the beginning of 'abcdefghi'"); 1794 assert(result.end == input.end+1, "or!([a-b],[c-d]) advances the index by 1 position."); 1795 1796 assert(result.children == [ab(input)], "or!([a-b],[c-d]) has one child, created by [a-b]."); 1797 1798 input.input = "cdefghi"; 1799 1800 result = abOrcd(input); 1801 1802 assert(result.name == "or!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 1803 assert(result.successful, "or!([a-b],[c-d]) parses 'cdefghi'"); 1804 assert(result.matches == ["c"], "or!([a-b],[c-d]) matches 'c' at the beginning of 'cdefghi'"); 1805 assert(result.end == input.end+1, "or!([a-b],[c-d]) advances the index by 1 position."); 1806 1807 assert(result.children == [cd(input)], "or!([a-b],[c-d]) has one child, created by [c-d]."); 1808 1809 input.input = "_abcdefghi"; 1810 1811 result = abOrcd(input); 1812 1813 assert(!result.successful, "or!([a-b],[c-d]) fails on '_abcdefghi'"); 1814 assert(result.end == input.end+0, "or!([a-b],[c-d]) does not advance the index."); 1815 assert(result.matches == 1816 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 1817 , "or!([a-b],[c-d]) error message. |" ~ result.matches[0]~ "|"); 1818 1819 input.input = ""; 1820 1821 result = abOrcd(input); 1822 1823 assert(!result.successful, "or!([a-b],[c-d]) fails on and empty input"); 1824 assert(result.end == input.end+0, "or!([a-b],[c-d]) does not advance the index."); 1825 assert(result.matches == 1826 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 1827 , "or!([a-b],[c-d]) error message."); 1828 } 1829 1830 /** 1831 Basic operator: it matches if one of its subrules (stored in the rules template parameter tuple) match 1832 the input. All subrules are tested and the one producing the longest match is taken. 1833 1834 The longest matching subrule's parse tree is stored as its only child and its matches field 1835 will contain all the subrule matches, in order. 1836 */ 1837 template longest_match(rules...) if (rules.length > 0) 1838 { 1839 string ctfeGetNameOr() 1840 { 1841 string name = "longest_match!("; 1842 foreach(i,rule; rules) 1843 name ~= getName!(rule) 1844 ~ (i < rules.length -1 ? ", " : ""); 1845 name ~= ")"; 1846 return name; 1847 } 1848 1849 enum name = ctfeGetNameOr(); 1850 1851 ParseTree longest_match(ParseTree p) 1852 { 1853 // error-management 1854 ParseTree longest, longestFail = ParseTree(name, false, [], p.input, p.end, 0); 1855 string[] errorStrings; 1856 size_t errorStringChars; 1857 string orErrorString; 1858 1859 ParseTree[rules.length] results; 1860 string[rules.length] names; 1861 size_t[rules.length] failedLength; 1862 size_t maxFailedLength; 1863 1864 version (tracer) 1865 { 1866 incTraceLevel(); 1867 } 1868 1869 // Real 'longest_match' loop 1870 foreach(i,r; rules) 1871 { 1872 version (tracer) 1873 { 1874 if (shouldTrace(getName!(r)(), p)) 1875 trace(traceMsg(p, name, getName!(r)())); 1876 } 1877 ParseTree temp = r(p); 1878 version (tracer) 1879 { 1880 if (shouldTrace(getName!(r)(), p)) 1881 trace(traceResultMsg(temp, getName!(r)())); 1882 } 1883 1884 if (temp.successful) 1885 { 1886 if (temp.end > longest.end) 1887 longest = temp; 1888 // Else, this rule parsed less input than another one: we discard it. 1889 } 1890 else 1891 { 1892 enum errName = " (" ~ getName!(r)() ~")"; 1893 failedLength[i] = temp.end; 1894 if (temp.end >= longestFail.end) 1895 { 1896 maxFailedLength = temp.end; 1897 longestFail = temp; 1898 names[i] = errName; 1899 results[i] = temp; 1900 1901 if (temp.end == longestFail.end) 1902 errorStringChars += (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4; 1903 else 1904 errorStringChars = (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4; 1905 } 1906 // Else, this error parsed less input than another one: we discard it. 1907 } 1908 } 1909 version (tracer) 1910 { 1911 decTraceLevel(); 1912 } 1913 if (longest.successful) 1914 { 1915 longest.children = [longest]; 1916 longest.name = name; 1917 return longest; 1918 } 1919 1920 // All subrules failed, we will take the longest match as the result 1921 // If more than one node failed at the same (farthest) position, we concatenate their error messages 1922 1923 char[] errString;// = new char[](errorStringChars); 1924 errString.length = errorStringChars; 1925 uint start = 0; 1926 foreach(i; 0..rules.length) 1927 { 1928 if (failedLength[i] == maxFailedLength && results[i].matches.length > 0) 1929 { 1930 auto temp = results[i]; 1931 auto len = temp.matches[$-1].length; 1932 auto nlen = names[i].length; 1933 errString[start .. start+len] = temp.matches[$-1][]; 1934 errString[start+len .. start+len+names[i].length] = names[i][]; 1935 errString[start+len+nlen .. start+len+nlen+4] = " or "; 1936 start += len + names[i].length + 4; 1937 } 1938 } 1939 orErrorString = cast(string)(errString[0..$-4]); 1940 1941 longestFail.matches = longestFail.matches.length == 0 ? [orErrorString] : 1942 longestFail.matches[0..$-1] // discarding longestFail error message 1943 ~ [orErrorString]; // and replacing it by the new, concatenated one. 1944 longestFail.name = name; 1945 longestFail.begin = p.end; 1946 return longestFail; 1947 } 1948 1949 ParseTree longest_match(string input) 1950 { 1951 return .or!(rules)(ParseTree("",false,[],input)); 1952 } 1953 1954 string longest_match(GetName g) 1955 { 1956 return name; 1957 } 1958 } 1959 1960 unittest // 'longest_match' unit test 1961 { 1962 alias charRange!('a','b') ab; 1963 alias charRange!('c','d') cd; 1964 1965 alias longest_match!(ab) abOr; 1966 alias longest_match!(ab,cd) abOrcd; 1967 1968 assert(getName!(ab)() == "charRange!('a','b')"); 1969 assert(getName!(cd)() == "charRange!('c','d')"); 1970 1971 // These are the same tests as for 'or': 1972 1973 ParseTree input = ParseTree("",false,[], "abcdefghi"); 1974 1975 ParseTree result = abOr(input); 1976 1977 assert(result.name == "longest_match!(charRange!('a','b'))", "or name test."); 1978 assert(result.successful, "longest_match!([a-b]) parses 'abcdefghi'"); 1979 assert(result.matches == ["a"], "longest_match!([a-b]) matches 'a' at the beginning of 'abcdefghi'"); 1980 assert(result.end == input.end+1, "longest_match!([a-b]) advances the index by 'a' size (1)."); 1981 assert(result.children == [ab(input)], "longest_match!([a-b]) has one child: the one created by '[a-b]'."); 1982 1983 result = abOrcd(input); 1984 1985 assert(result.name == "longest_match!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 1986 assert(result.successful, "longest_match!([a-b],[c-d]) parses 'abcdefghi'"); 1987 assert(result.matches == ["a"], "longest_match!([a-b],[c-d]) matches 'a' at the beginning of 'abcdefghi'"); 1988 assert(result.end == input.end+1, "longest_match!([a-b],[c-d]) advances the index by 1 position."); 1989 1990 assert(result.children == [ab(input)], "longest_match!([a-b],[c-d]) has one child, created by [a-b]."); 1991 1992 input.input = "cdefghi"; 1993 1994 result = abOrcd(input); 1995 1996 assert(result.name == "longest_match!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 1997 assert(result.successful, "longest_match!([a-b],[c-d]) parses 'cdefghi'"); 1998 assert(result.matches == ["c"], "longest_match!([a-b],[c-d]) matches 'c' at the beginning of 'cdefghi'"); 1999 assert(result.end == input.end+1, "longest_match!([a-b],[c-d]) advances the index by 1 position."); 2000 2001 assert(result.children == [cd(input)], "longest_match!([a-b],[c-d]) has one child, created by [c-d]."); 2002 2003 input.input = "_abcdefghi"; 2004 2005 result = abOrcd(input); 2006 2007 assert(!result.successful, "longest_match!([a-b],[c-d]) fails on '_abcdefghi'"); 2008 assert(result.end == input.end+0, "longest_match!([a-b],[c-d]) does not advance the index."); 2009 assert(result.matches == 2010 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 2011 , "longest_match!([a-b],[c-d]) error message. |" ~ result.matches[0]~ "|"); 2012 2013 input.input = ""; 2014 2015 result = abOrcd(input); 2016 2017 assert(!result.successful, "longest_match!([a-b],[c-d]) fails on and empty input"); 2018 assert(result.end == input.end+0, "longest_match!([a-b],[c-d]) does not advance the index."); 2019 assert(result.matches == 2020 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 2021 , "longest_match!([a-b],[c-d]) error message."); 2022 2023 // Now test longest match behaviour: 2024 2025 input.input = "aaaccccb"; 2026 result = or!(oneOrMore!(literal!("a")), and!(oneOrMore!(literal!("a")), literal!("cc")))(input); 2027 assert(result.matches == ["a", "a", "a"], "or! takes the first matching rule."); 2028 result = longest_match!(oneOrMore!(literal!("a")), and!(oneOrMore!(literal!("a")), literal!("cc")))(input); 2029 assert(result.matches == ["a", "a", "a", "cc"], "longest_match! takes the longest matching rule."); 2030 } 2031 2032 2033 /** 2034 Compile-time switch trie from Brian Schott 2035 */ 2036 class Trie(V) : TrieNode!(V) 2037 { 2038 /** 2039 * Adds the given value to the trie with the given key 2040 */ 2041 void add(string key, V value) pure 2042 { 2043 TrieNode!(V) current = this; 2044 foreach(dchar keyPart; key) 2045 { 2046 if ((keyPart in current.children) is null) 2047 { 2048 auto node = new TrieNode!(V); 2049 current.children[keyPart] = node; 2050 current = node; 2051 } 2052 else 2053 current = current.children[keyPart]; 2054 } 2055 current.value = value; 2056 } 2057 } 2058 2059 class TrieNode(V) 2060 { 2061 V value; 2062 TrieNode!(V)[dchar] children; 2063 } 2064 2065 string printCaseStatements(V)(TrieNode!(V) node, string indentString) 2066 { 2067 string s = ""; 2068 string idnt = indentString; 2069 2070 void incIndent() { idnt ~= " "; } 2071 void decIndent() { idnt = idnt[2..$]; } 2072 2073 void put(string k) { s ~= idnt ~ k; } 2074 void append(string k) { s ~= k;} 2075 2076 foreach(dchar k, TrieNode!(V) v; node.children) 2077 { 2078 put("case '"); 2079 switch(k) 2080 { 2081 case '\n': append("\\n"); break; 2082 case '\t': append("\\t"); break; 2083 case '\r': append("\\r"); break; 2084 case 92: append("\\"); break; 2085 default: append(to!string(k)); 2086 } 2087 append("':\n"); 2088 incIndent(); 2089 2090 put("temp.end++;\n"); 2091 if (v.children.length > 0) 2092 { 2093 put("if (temp.end >= temp.input.length)\n"); 2094 put("{\n"); 2095 incIndent(); 2096 2097 if (node.children[k].value.length != 0) 2098 put("return ParseTree(name, true, [`" ~ node.children[k].value ~ "`], temp.input, p.end, temp.end)"); 2099 else 2100 put("return ParseTree(name, false, [failString], p.input, p.end, p.end)"); 2101 2102 append(";\n"); 2103 decIndent(); 2104 2105 put("}\n"); 2106 put("switch (temp.input[temp.end])\n"); 2107 put("{\n"); 2108 2109 incIndent(); 2110 append(printCaseStatements(v, idnt)); 2111 2112 put("default:\n"); 2113 incIndent(); 2114 if (v.value.length != 0) 2115 put("return ParseTree(name, true, [`" ~ v.value ~ "`], temp.input, p.end, temp.end)"); 2116 else 2117 put("return ParseTree(name, false, [failString], p.input, p.end, p.end)"); 2118 append(";\n"); 2119 decIndent(); 2120 decIndent(); 2121 put("}\n"); 2122 } 2123 else 2124 { 2125 if (v.value.length != 0) 2126 put("return ParseTree(name, true, [`" ~ v.value ~ "`], temp.input, p.end, temp.end)"); 2127 else 2128 put("return ParseTree(name, false, [failString], p.input, p.end, p.end)"); 2129 append(";\n"); 2130 } 2131 } 2132 return s; 2133 } 2134 2135 string generateCaseTrie(string[] args ...) 2136 { 2137 auto t = new Trie!(string); 2138 foreach(arg; args) 2139 { 2140 t.add(arg, arg); 2141 } 2142 return printCaseStatements(t, ""); 2143 } 2144 2145 /** 2146 or special case for literal list ("abstract"/"alias"/...) 2147 */ 2148 template keywords(kws...) if (kws.length > 0) 2149 { 2150 string ctfeGetNameKeywords() 2151 { 2152 string name= "keywords!("; 2153 foreach(i,kw;kws) 2154 name ~= "\"" ~ kw ~ "\""~ (i < kws.length -1 ? ", " : ""); 2155 name ~= ")"; 2156 return name; 2157 } 2158 2159 string ctfeConcatKeywords() 2160 { 2161 string s = "["; 2162 foreach(i, k; kws) 2163 { 2164 s ~= "\"" ~ k ~ "\""; 2165 if (i < kws.length - 1) 2166 s ~= ", "; 2167 } 2168 return s ~= "]"; 2169 } 2170 2171 enum name = ctfeGetNameKeywords(); 2172 enum failString = "one among " ~ ctfeConcatKeywords(); 2173 2174 ParseTree keywords(ParseTree p) 2175 { 2176 string keywordCode(string[] keywords) 2177 { 2178 string result; 2179 foreach(kw; keywords) 2180 result ~= "if (p.end+"~to!string(kw.length) ~ " <= p.input.length " 2181 ~" && p.input[p.end..p.end+"~to!string(kw.length)~"]==`" 2182 ~kw~"`) return ParseTree(`" 2183 ~name~"`,true,[`"~kw~"`],p.input,p.end,p.end+" 2184 ~to!string(kw.length)~");\n"; 2185 2186 result ~= "return ParseTree(`"~name~"`,false,[`" ~ failString ~ "`],p.input,p.end,p.end);"; 2187 2188 return result; 2189 } 2190 2191 static if (KEYWORDS == IFCHAIN) 2192 { 2193 mixin(keywordCode([kws])); 2194 } 2195 else static if (KEYWORDS == TRIE) 2196 { 2197 auto temp = p; 2198 if (p.end < p.input.length) // is this conditional required? 2199 { 2200 switch(p.input[p.end]) 2201 { 2202 mixin(generateCaseTrie([kws])); 2203 mixin("default: return ParseTree(`"~name~"`,false,[`" ~ failString ~ "`],p.input,p.end,p.end);"); 2204 } 2205 } 2206 else 2207 { 2208 mixin("return ParseTree(`"~name~"`,false,[`" ~ failString ~ "`],p.input,p.end,p.end);"); 2209 } 2210 } 2211 } 2212 2213 ParseTree keywords(string input) 2214 { 2215 return .keywords!(kws)(ParseTree("",false,[],input)); 2216 } 2217 2218 string keywords(GetName g) 2219 { 2220 return name; 2221 } 2222 } 2223 2224 unittest 2225 { 2226 alias keywords!("abc","de","f") kw; 2227 2228 assert(getName!(kw)() == `keywords!("abc", "de", "f")`); 2229 2230 ParseTree input = ParseTree("",false,[],"abcd"); 2231 2232 ParseTree result = kw(input); 2233 2234 assert(result.name == `keywords!("abc", "de", "f")`, "keywords name test."); 2235 assert(result.successful, "keywords success on `abcd`"); 2236 assert(result.matches == ["abc"], "keywords matches `abc` on `abcd`"); 2237 assert(result.end == input.end+3, "keywords advances the index by 3 positions."); 2238 assert(result.children is null, "No children for `keywords`."); 2239 2240 input.input = "def"; 2241 2242 result = kw(input); 2243 2244 assert(result.successful, "keywords success on `def`"); 2245 assert(result.matches == ["de"], "keywords matches `de` on `def`"); 2246 assert(result.end == input.end+2, "keywords advances the index by 2 positions."); 2247 assert(result.children is null, "No children for `keywords`."); 2248 2249 2250 input.input = "ab_def"; 2251 2252 result = kw(input); 2253 2254 assert(!result.successful, "keywords fails on `ab_def`."); 2255 assert(result.matches == [`one among ["abc", "de", "f"]`], "keywords error message." ~ result.matches[0]); 2256 assert(result.end == input.end, "keywords does not advance the index."); 2257 assert(result.children is null, "No children for `keywords`."); 2258 2259 input.input = ""; 2260 2261 result = kw(input); 2262 2263 assert(!result.successful, "keywords fails on an empty input."); 2264 assert(result.matches == [`one among ["abc", "de", "f"]`], "keywords error message."); 2265 assert(result.end == input.end, "keywords does not advance the index."); 2266 assert(result.children is null, "No children for `keywords`."); 2267 } 2268 2269 2270 /** 2271 Tries to match subrule 'r' zero or more times. It always succeeds, since if 'r' fails 2272 from the very beginning, it matched 'r' zero times... 2273 2274 Its matches are those of its subrules (they might be different for each match) and its 2275 children are all the parse trees returned by the successive application of 'r'. 2276 2277 ---- 2278 alias zeroOrMore!(or!(literal!"abc", literal!"d")) rule; // in PEG-speak: '("abc" / "d")*' 2279 ParseTree input = ParseTree("",false,[], "abcdabce"); 2280 2281 ParseTree result = rule(input); 2282 2283 assert(result.successful); 2284 assert(result.matches == ["abc", "d", "abc"]); 2285 assert(result.end == 7); // matched "abcdabce"[0..7] => "abcdabc". "e" at the end is not part of the parse tree. 2286 assert(result.children.length == 3); 2287 writeln(result); 2288 /+ 2289 writes: 2290 zeroOrMore [0, 7]["abc", "d", "abc"] 2291 +-or [0, 3]["abc"] 2292 | +-literal(abc) [0, 3]["abc"] 2293 +-or [3, 4]["d"] 2294 | +-literal(d) [3, 4]["d"] 2295 +-or [4, 7]["abc"] 2296 +-literal(abc) [4, 7]["abc"] 2297 +/ 2298 ---- 2299 2300 So we know the first child used the 'literal!"abc"' sub-rule and matched input[0..3]. 2301 The second matched input[3..4] and the third input[4..7]. 2302 2303 ---- 2304 input = ParseTree("",false,[], "efgh"); 2305 result = rule(input); 2306 assert(result.successful); // succeed, even though all patterns failed. 2307 assert(result.children.length == 0); 2308 ---- 2309 */ 2310 template zeroOrMore(alias r) 2311 { 2312 enum name = "zeroOrMore!(" ~ getName!(r) ~ ")"; 2313 2314 ParseTree zeroOrMore(ParseTree p) 2315 { 2316 auto result = ParseTree(name, true, [], p.input, p.end, p.end); 2317 version (tracer) 2318 { 2319 incTraceLevel(); 2320 if (shouldTrace(getName!(r)(), p)) 2321 trace(traceMsg(result, name, getName!(r)())); 2322 } 2323 auto temp = r(result); 2324 while(temp.successful 2325 && (temp.begin < temp.end // To avoid infinite loops on epsilon-matching rules 2326 || temp.name.startsWith("discard!("))) 2327 { 2328 result.matches ~= temp.matches; 2329 result.children ~= temp; 2330 result.end = temp.end; 2331 result.failEnd = max(result.failEnd, temp.failEnd); 2332 version (tracer) 2333 { 2334 if (shouldTrace(getName!(r)(), p)) 2335 trace(traceMsg(result, name, getName!(r)())); 2336 } 2337 temp = r(result); 2338 } 2339 auto maxFail = max(temp.failEnd, temp.end); 2340 if (maxFail > result.failEnd && maxFail > result.end) { 2341 result.failedChild = [temp]; 2342 result.failEnd = maxFail; 2343 } 2344 result.successful = true; 2345 version (tracer) 2346 { 2347 if (shouldTrace(getName!(r)(), p)) 2348 trace(traceResultMsg(result, getName!(r)())); 2349 decTraceLevel(); 2350 } 2351 return result; 2352 } 2353 2354 ParseTree zeroOrMore(string input) 2355 { 2356 return .zeroOrMore!(r)(ParseTree("",false,[],input)); 2357 } 2358 2359 string zeroOrMore(GetName g) 2360 { 2361 return name; 2362 } 2363 } 2364 2365 unittest // 'zeroOrMore' unit test 2366 { 2367 alias literal!"a" a; 2368 alias literal!"abc" abc; 2369 alias charRange!('a','z') az; 2370 2371 alias zeroOrMore!(a) as; 2372 alias zeroOrMore!(abc) abcs; 2373 alias zeroOrMore!(az) azs; 2374 2375 assert(getName!(as)() == `zeroOrMore!(literal!("a"))`); 2376 assert(getName!(abcs)() == `zeroOrMore!(literal!("abc"))`); 2377 assert(getName!(azs)() == `zeroOrMore!(charRange!('a','z'))`); 2378 2379 assert(as("").successful); 2380 assert(as("a").successful); 2381 assert(as("aa").successful); 2382 assert(as("aaa").successful); 2383 assert(as("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").successful); 2384 assert(as("b").successful); 2385 2386 ParseTree result = as("aaa"); 2387 2388 assert(result.name == `zeroOrMore!(literal!("a"))`); 2389 assert(result.successful); 2390 assert(result.matches == ["a","a","a"]); 2391 assert(result.begin == 0); 2392 assert(result.end == 3); 2393 assert(result.children.length == 3); 2394 assert(result.children == [ a("aaa"), a(a("aaa")), a(a(a("aaa")))]); 2395 2396 assert(abcs("").successful); 2397 assert(abcs("abc").successful); 2398 assert(abcs("abcabc").successful); 2399 assert(abcs("abcabcabc").successful); 2400 assert(abcs("abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc").successful); 2401 assert(abcs("ab").successful); 2402 2403 result = abcs("abcabcabc"); 2404 2405 assert(result.name == `zeroOrMore!(literal!("abc"))`); 2406 assert(result.successful); 2407 assert(result.matches == ["abc","abc","abc"]); 2408 assert(result.begin == 0); 2409 assert(result.end == 3*3); 2410 assert(result.children.length == 3); 2411 assert(result.children == [ abc("abcabcabc"), abc(abc("abcabcabc")), abc(abc(abc("abcabcabc")))]); 2412 2413 assert(azs("").successful); 2414 assert(azs("a").successful); 2415 assert(azs("abc").successful); 2416 assert(azs("abcdefghijklmnoqrstuvwxyz").successful); 2417 assert(azs("abcdefghijklmnoqrstuvwxyz 1234567890").successful); 2418 2419 result = azs("abc"); 2420 2421 assert(result.name == `zeroOrMore!(charRange!('a','z'))`); 2422 assert(result.successful); 2423 assert(result.matches == ["a","b","c"]); 2424 assert(result.begin == 0); 2425 assert(result.end == 3); 2426 assert(result.children.length == 3); 2427 assert(result.children == [ az("abc"), az(az("abc")), az(az(az("abc")))]); 2428 } 2429 2430 /** 2431 Tries to match subrule 'r' one or more times. If 'r' fails 2432 from the very beginning, it fails and else succeeds. 2433 2434 Its matches are those of its subrules (they might be different for each match) and its 2435 children are all the parse trees returned by the successive application of 'r'. 2436 2437 ---- 2438 alias oneOrMore!(or!(literal!"abc", literal!"d")) rule; // in PEG-speak: '("abc" / "d")*' 2439 ParseTree input = ParseTree("",false,[], "abcdabce"); 2440 2441 ParseTree result = rule(input); 2442 2443 assert(result.successful); 2444 assert(result.matches == ["abc", "d", "abc"]); 2445 assert(result.end == 7); // matched "abcdabce"[0..7] => "abcdabc". "e" at the end is not part of the parse tree. 2446 assert(result.children.length == 3); 2447 writeln(result); 2448 /+ 2449 writes: 2450 oneOrMore [0, 7]["abc", "d", "abc"] 2451 +-or [0, 3]["abc"] 2452 | +-literal(abc) [0, 3]["abc"] 2453 +-or [3, 4]["d"] 2454 | +-literal(d) [3, 4]["d"] 2455 +-or [4, 7]["abc"] 2456 +-literal(abc) [4, 7]["abc"] 2457 +/ 2458 ---- 2459 2460 So we know the first child used the 'literal!"abc"' sub-rule and matched input[0..3]. 2461 The second matched input[3..4] and the third input[4..7]. 2462 2463 ---- 2464 input = ParseTree("",false,[], "efgh"); 2465 result = rule(input); 2466 assert(!result.successful); // fails, since it failed on the first try. 2467 ---- 2468 */ 2469 template oneOrMore(alias r) 2470 { 2471 enum name = "oneOrMore!(" ~ getName!(r) ~ ")"; 2472 2473 ParseTree oneOrMore(ParseTree p) 2474 { 2475 auto result = ParseTree(name, false, [], p.input, p.end, p.end); 2476 version (tracer) 2477 { 2478 incTraceLevel(); 2479 if (shouldTrace(getName!(r)(), p)) 2480 trace(traceMsg(result, name, getName!(r)())); 2481 } 2482 auto temp = r(result); 2483 2484 if (!temp.successful) 2485 { 2486 result.matches = temp.matches; 2487 result.children = [temp]; 2488 result.end = temp.end; 2489 } 2490 else 2491 { 2492 while( temp.successful 2493 && (temp.begin < temp.end // To avoid infinite loops on epsilon-matching rules 2494 || temp.name.startsWith("discard!("))) 2495 { 2496 result.matches ~= temp.matches; 2497 result.children ~= temp; 2498 result.end = temp.end; 2499 result.failEnd = max(result.failEnd, temp.failEnd); 2500 version (tracer) 2501 { 2502 if (shouldTrace(getName!(r)(), p)) 2503 trace(traceMsg(result, name, getName!(r)())); 2504 } 2505 temp = r(result); 2506 } 2507 auto maxFail = max(temp.failEnd, temp.end); 2508 if (maxFail > result.failEnd && maxFail > result.end) { 2509 result.failedChild = [temp]; 2510 result.failEnd = maxFail; 2511 } 2512 result.successful = true; 2513 } 2514 version (tracer) 2515 { 2516 if (shouldTrace(getName!(r)(), p)) 2517 trace(traceResultMsg(result, getName!(r)())); 2518 decTraceLevel(); 2519 } 2520 return result; 2521 } 2522 2523 ParseTree oneOrMore(string input) 2524 { 2525 return .oneOrMore!(r)(ParseTree("",false,[],input)); 2526 } 2527 2528 string oneOrMore(GetName g) 2529 { 2530 return name; 2531 } 2532 } 2533 2534 unittest // 'oneOrMore' unit test 2535 { 2536 alias literal!"a" a; 2537 alias literal!"abc" abc; 2538 alias charRange!('a','z') az; 2539 2540 alias oneOrMore!(a) as; 2541 alias oneOrMore!(abc) abcs; 2542 alias oneOrMore!(az) azs; 2543 2544 assert(getName!(as)() == `oneOrMore!(literal!("a"))`); 2545 assert(getName!(abcs)() == `oneOrMore!(literal!("abc"))`); 2546 assert(getName!(azs)() == `oneOrMore!(charRange!('a','z'))`); 2547 2548 assert(!as("").successful); 2549 assert(as("a").successful); 2550 assert(as("aa").successful); 2551 assert(as("aaa").successful); 2552 assert(as("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").successful); 2553 assert(!as("b").successful); 2554 2555 ParseTree result = as("aaa"); 2556 2557 assert(result.name == `oneOrMore!(literal!("a"))`); 2558 assert(result.successful); 2559 assert(result.matches == ["a","a","a"]); 2560 assert(result.begin == 0); 2561 assert(result.end == 3); 2562 assert(result.children.length == 3); 2563 assert(result.children == [ a("aaa"), a(a("aaa")), a(a(a("aaa")))]); 2564 2565 assert(!abcs("").successful); 2566 assert(abcs("abc").successful); 2567 assert(abcs("abcabc").successful); 2568 assert(abcs("abcabcabc").successful); 2569 assert(abcs("abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc").successful); 2570 assert(!abcs("ab").successful); 2571 2572 result = abcs("abcabcabc"); 2573 2574 assert(result.name == `oneOrMore!(literal!("abc"))`); 2575 assert(result.successful); 2576 assert(result.matches == ["abc","abc","abc"]); 2577 assert(result.begin == 0); 2578 assert(result.end == 3*3); 2579 assert(result.children.length == 3); 2580 assert(result.children == [ abc("abcabcabc"), abc(abc("abcabcabc")), abc(abc(abc("abcabcabc")))]); 2581 2582 assert(!azs("").successful); 2583 assert(azs("a").successful); 2584 assert(azs("abc").successful); 2585 assert(azs("abcdefghijklmnoqrstuvwxyz").successful); 2586 assert(azs("abcdefghijklmnoqrstuvwxyz 1234567890").successful); 2587 assert(!azs(".").successful); 2588 2589 result = azs("abc"); 2590 2591 assert(result.name == `oneOrMore!(charRange!('a','z'))`); 2592 assert(result.successful); 2593 assert(result.matches == ["a","b","c"]); 2594 assert(result.begin == 0); 2595 assert(result.end == 3); 2596 assert(result.children.length == 3); 2597 assert(result.children == [ az("abc"), az(az("abc")), az(az(az("abc")))]); 2598 } 2599 2600 /** 2601 Given a subrule 'r', represents the expression 'r?'. It tries to match 'r' and if this matches 2602 successfully, it returns this match. If 'r' failed, 'r?' is still a success, but without any child nor match. 2603 2604 ---- 2605 alias option!(literal!"abc") rule; // Aka '"abc"?' 2606 ParseTree input = ParseTree("",false,[],"abcd"); 2607 2608 ParseTree result = rule(input); 2609 assert(result.successful); 2610 assert(result.matches == ["abc"]); 2611 assert(result.children.length == 1); 2612 assert(result.children[0] == literal!"abc"(input)); 2613 ---- 2614 */ 2615 template option(alias r) 2616 { 2617 enum name = "option!(" ~ getName!(r) ~ ")"; 2618 2619 ParseTree option(ParseTree p) 2620 { 2621 version (tracer) 2622 { 2623 if (shouldTrace(getName!(r)(), p)) 2624 trace(traceMsg(p, name, getName!(r)())); 2625 } 2626 ParseTree result = r(p); 2627 if (result.successful) 2628 return ParseTree(name, true, result.matches, result.input, result.begin, result.end, [result], result.failEnd); 2629 else 2630 return ParseTree(name, true, [], p.input, p.end, p.end, null, max(result.end,result.failEnd), [result]); 2631 } 2632 2633 ParseTree option(string input) 2634 { 2635 return .option!(r)(ParseTree("",false,[],input)); 2636 } 2637 2638 string option(GetName g) 2639 { 2640 return name; 2641 } 2642 } 2643 2644 unittest // 'option' unit test 2645 { 2646 alias literal!"a" a; 2647 alias literal!"abc" abc; 2648 2649 alias option!(a) a_; 2650 alias option!(abc) abc_; 2651 2652 assert(getName!(a_)() == `option!(literal!("a"))`); 2653 assert(getName!(abc_)() == `option!(literal!("abc"))`); 2654 2655 assert(a_("").successful); 2656 assert(a_("a").successful); 2657 assert(a_("aa").successful); 2658 assert(a_("b").successful); 2659 2660 ParseTree result = a_("a"); 2661 2662 assert(result.name == `option!(literal!("a"))`); 2663 assert(result.successful); 2664 assert(result.matches == ["a"]); 2665 assert(result.begin == 0); 2666 assert(result.end == 1); 2667 assert(result.children.length == 1); 2668 assert(result.children == [ a("a")]); 2669 2670 result = a_(""); 2671 2672 assert(result.name == `option!(literal!("a"))`); 2673 assert(result.successful); 2674 assert(result.matches is null); 2675 assert(result.begin == 0); 2676 assert(result.end == 0); 2677 assert(result.children.length == 0); 2678 2679 2680 assert(abc_("").successful); 2681 assert(abc_("abc").successful); 2682 assert(abc_("abcabc").successful); 2683 assert(abc_("ab").successful); 2684 2685 result = abc_("abcdef"); 2686 2687 assert(result.name == `option!(literal!("abc"))`); 2688 assert(result.successful); 2689 assert(result.matches == ["abc"]); 2690 assert(result.begin == 0); 2691 assert(result.end == 3); 2692 assert(result.children.length == 1); 2693 assert(result.children == [ abc("abcdef")]); 2694 2695 result = abc_("def"); 2696 2697 assert(result.name == `option!(literal!("abc"))`); 2698 assert(result.successful); 2699 assert(result.matches is null); 2700 assert(result.begin == 0); 2701 assert(result.end == 0); 2702 assert(result.children.length == 0); 2703 } 2704 2705 /** 2706 Tries 'r' on the input. If it succeeds, the rule also succeeds, without consuming any input. 2707 If 'r' fails, then posLookahead!r also fails. Low-level implementation of '&r'. 2708 */ 2709 template posLookahead(alias r) 2710 { 2711 enum name = "posLookahead!(" ~ getName!(r) ~ ")"; 2712 2713 ParseTree posLookahead(ParseTree p) 2714 { 2715 version (tracer) 2716 { 2717 if (shouldTrace(getName!(r)(), p)) 2718 trace(traceMsg(p, name, getName!(r)())); 2719 } 2720 ParseTree temp = r(p); 2721 if (temp.successful) 2722 return ParseTree(name, temp.successful, [], p.input, p.end, p.end); 2723 else 2724 return ParseTree(name, temp.successful, [temp.matches[$-1]], p.input, p.end, p.end); 2725 } 2726 2727 ParseTree posLookahead(string input) 2728 { 2729 return .posLookahead!(r)(ParseTree("",false,[],input)); 2730 } 2731 2732 string posLookahead(GetName g) 2733 { 2734 return name; 2735 } 2736 } 2737 2738 unittest // 'posLookahead' unit test 2739 { 2740 alias literal!"a" a; 2741 alias literal!"abc" abc; 2742 2743 alias posLookahead!(a) a_; 2744 alias posLookahead!(abc) abc_; 2745 2746 assert(getName!(a_)() == `posLookahead!(literal!("a"))`); 2747 assert(getName!(abc_)() == `posLookahead!(literal!("abc"))`); 2748 2749 assert(!a_("").successful); 2750 assert(a_("a").successful); 2751 assert(a_("aa").successful); 2752 assert(!a_("b").successful); 2753 2754 ParseTree result = a_("a"); 2755 2756 assert(result.name == `posLookahead!(literal!("a"))`); 2757 assert(result.successful); 2758 assert(result.matches is null); 2759 assert(result.begin == 0); 2760 assert(result.end == 0); 2761 assert(result.children.length == 0); 2762 2763 result = a_(""); 2764 2765 assert(result.name == `posLookahead!(literal!("a"))`); 2766 assert(!result.successful); 2767 assert(result.matches == [`"a"`], "posLookahead error message."); 2768 assert(result.begin == 0); 2769 assert(result.end == 0); 2770 assert(result.children.length == 0); 2771 2772 assert(!abc_("").successful); 2773 assert(abc_("abc").successful); 2774 assert(abc_("abcabc").successful); 2775 assert(!abc_("ab").successful); 2776 2777 result = abc_("abcdef"); 2778 2779 assert(result.name == `posLookahead!(literal!("abc"))`); 2780 assert(result.successful); 2781 assert(result.matches is null); 2782 assert(result.begin == 0); 2783 assert(result.end == 0); 2784 assert(result.children.length == 0); 2785 2786 result = abc_("def"); 2787 2788 assert(result.name == `posLookahead!(literal!("abc"))`); 2789 assert(!result.successful); 2790 assert(result.matches == [`"abc"`], "posLookahead error message."); 2791 assert(result.begin == 0); 2792 assert(result.end == 0); 2793 assert(result.children.length == 0); 2794 } 2795 2796 /** 2797 Tries 'r' on the input. If it fails, the rule succeeds, without consuming any input. 2798 If 'r' succeeds, then negLookahead!r fails. Low-level implementation of '!r'. 2799 */ 2800 template negLookahead(alias r) 2801 { 2802 enum name = "negLookahead!(" ~ getName!(r) ~ ")"; 2803 2804 ParseTree negLookahead(ParseTree p) 2805 { 2806 version (tracer) 2807 { 2808 if (shouldTrace(getName!(r)(), p)) 2809 trace(traceMsg(p, name, getName!(r)())); 2810 } 2811 ParseTree temp = r(p); 2812 if (temp.successful) 2813 return ParseTree(name, false, ["anything but \"" ~ p.input[temp.begin..temp.end] ~ "\""], p.input, p.end, p.end); 2814 else 2815 return ParseTree(name, true, [], p.input, p.end, p.end); 2816 } 2817 2818 ParseTree negLookahead(string input) 2819 { 2820 return .negLookahead!(r)(ParseTree("",false,[],input)); 2821 } 2822 2823 string negLookahead(GetName g) 2824 { 2825 return name; 2826 } 2827 } 2828 2829 unittest // 'negLookahead' unit test 2830 { 2831 alias literal!"a" a; 2832 alias literal!"abc" abc; 2833 2834 alias negLookahead!(a) a_; 2835 alias negLookahead!(abc) abc_; 2836 2837 assert(getName!(a_)() == `negLookahead!(literal!("a"))`); 2838 assert(getName!(abc_)() == `negLookahead!(literal!("abc"))`); 2839 2840 assert(a_("").successful); 2841 assert(!a_("a").successful); 2842 assert(!a_("aa").successful); 2843 assert(a_("b").successful); 2844 2845 ParseTree result = a_("a"); 2846 2847 assert(result.name == `negLookahead!(literal!("a"))`); 2848 assert(!result.successful); 2849 assert(result.matches == [`anything but "a"`]); 2850 assert(result.begin == 0); 2851 assert(result.end == 0); 2852 assert(result.children.length == 0); 2853 2854 result = a_(""); 2855 2856 assert(result.name == `negLookahead!(literal!("a"))`); 2857 assert(result.successful); 2858 assert(result.matches is null); 2859 assert(result.begin == 0); 2860 assert(result.end == 0); 2861 assert(result.children.length == 0); 2862 2863 assert(abc_("").successful); 2864 assert(!abc_("abc").successful); 2865 assert(!abc_("abcabc").successful); 2866 assert(abc_("ab").successful); 2867 2868 result = abc_("abcdef"); 2869 2870 assert(result.name == `negLookahead!(literal!("abc"))`); 2871 assert(!result.successful); 2872 assert(result.matches == [`anything but "abc"`]); 2873 assert(result.begin == 0); 2874 assert(result.end == 0); 2875 assert(result.children.length == 0); 2876 2877 result = abc_("def"); 2878 2879 assert(result.name == `negLookahead!(literal!("abc"))`); 2880 assert(result.successful); 2881 assert(result.matches is null); 2882 assert(result.begin == 0); 2883 assert(result.end == 0); 2884 assert(result.children.length == 0); 2885 } 2886 2887 /** 2888 Internal helper template, to get a parse tree node with a name. For example, given: 2889 ---- 2890 alias or!(literal!("abc"), charRange!('0','9')) myRule; 2891 ---- 2892 2893 myRule gives nodes named "or", since its the parent rule. If you want nodes to be named "myRule", 2894 use named. named just overwrites the original root node name, the children are left untouched. 2895 2896 See_Also: defined. 2897 2898 ---- 2899 alias or!(literal!("abc"), charRange!('0','9')) rule; 2900 alias named!(rule, "myRule") myRule; 2901 2902 auto input = "abc3"; 2903 auto p1 = rule(input); 2904 auto p2 = myRule(input); 2905 2906 // They are both successful 2907 assert(p1.successful && p2.successful); 2908 assert(p1.matches == p2.matches); 2909 // But the names are different 2910 assert(p1.name == `or!(literal!("abc"), charRange!('0','9'))`); 2911 assert(p2.name == `myRule`); 2912 // Same children: 2913 assert(p2.children == p1.children); 2914 2915 ---- 2916 */ 2917 template named(alias r, string name) 2918 { 2919 ParseTree named(ParseTree p) 2920 { 2921 ParseTree result = r(p); 2922 result.name = name; 2923 return result; 2924 } 2925 2926 ParseTree named(string input) 2927 { 2928 return .named!(r,name)(ParseTree("",false,[],input)); 2929 } 2930 2931 string named(GetName g) 2932 { 2933 return name; 2934 } 2935 } 2936 2937 unittest // 'named' unit test 2938 { 2939 alias or!(literal!("abc"), charRange!('0','9')) rule; 2940 alias named!(rule, "myRule") myRule; 2941 2942 assert(getName!(rule)() == `or!(literal!("abc"), charRange!('0','9'))`); 2943 assert(getName!(myRule)() == "myRule"); 2944 2945 // Equality on success (except for the name) 2946 ParseTree result = rule("abc0"); 2947 ParseTree myResult = myRule("abc0"); 2948 2949 assert(myResult.successful == result.successful); 2950 assert(myResult.name == "myRule"); 2951 assert(myResult.matches == result.matches); 2952 assert(myResult.begin == result.begin); 2953 assert(myResult.end == result.end); 2954 assert(myResult.children == result.children); 2955 2956 // Equality on failure (except for the name) 2957 result = rule("_abc"); 2958 myResult = myRule("_abc"); 2959 2960 assert(myResult.successful == result.successful); 2961 assert(myResult.name == "myRule"); 2962 assert(myResult.matches == result.matches); 2963 assert(myResult.begin == result.begin); 2964 assert(myResult.end == result.end); 2965 assert(myResult.children == result.children); 2966 } 2967 2968 /** 2969 Internal helper template, to get a parse tree node with a name, while keeping the original node (see also named). 2970 For example, given: 2971 ---- 2972 alias or!(literal!("abc"), charRange!('0','9')) myRule; 2973 ---- 2974 2975 myRule gives nodes named "or", since its the parent rule. If you want nodes to be named "myRule", 2976 use defined. Contrary to named (see before), the original node is pushed as the child. 2977 2978 See_Also: named. 2979 2980 ---- 2981 alias or!(literal!("abc"), charRange!('0','9')) rule; 2982 alias defined!(rule, "myRule") myRule; 2983 2984 auto input = "abc3"; 2985 auto p1 = rule(input); 2986 auto p2 = myRule(input); 2987 2988 // They are both successful 2989 assert(p1.successful && p2.successful); 2990 assert(p1.matches == p2.matches); 2991 // But the names are different 2992 assert(p1.name == `or!(literal!("abc"), charRange!('0','9'))`); 2993 assert(p2.name == `myRule`); 2994 // Here the original tree is not discarded: 2995 assert(p2.children[0] == p1); 2996 ---- 2997 */ 2998 template defined(alias r, string name) 2999 { 3000 ParseTree defined(ParseTree p) 3001 { 3002 ParseTree result = r(p); 3003 result.children = [result]; 3004 result.name = name; 3005 return result; 3006 } 3007 3008 ParseTree defined(string input) 3009 { 3010 return .defined!(r,name)(ParseTree("",false,[],input)); 3011 } 3012 3013 string defined(GetName g) 3014 { 3015 return name; 3016 } 3017 } 3018 3019 unittest // 'defined' unit test 3020 { 3021 alias or!(literal!("abc"), charRange!('0','9')) rule; 3022 alias defined!(rule, "myRule") myRule; 3023 3024 assert(getName!(rule)() == `or!(literal!("abc"), charRange!('0','9'))`); 3025 assert(getName!(myRule)() == "myRule"); 3026 3027 // Equality on success (except for the name) 3028 ParseTree result = rule("abc0"); 3029 ParseTree myResult = myRule("abc0"); 3030 3031 assert(myResult.successful && result.successful); 3032 assert(myResult.name == "myRule"); 3033 assert(myResult.matches == result.matches); 3034 assert(myResult.begin == result.begin); 3035 assert(myResult.end == result.end); 3036 assert(myResult.children[0] == result); 3037 3038 // Equality on failure (except for the name) 3039 result = rule("_abc"); 3040 myResult = myRule("_abc"); 3041 3042 assert(!myResult.successful && !result.successful); 3043 assert(myResult.name == "myRule"); 3044 assert(myResult.matches == result.matches); 3045 assert(myResult.begin == result.begin); 3046 assert(myResult.end == result.end); 3047 assert(myResult.children[0] == result); 3048 } 3049 3050 /** 3051 Low-level representation for the expression 'r {act}'. That is, it applies rule 'r' 3052 on the input and then calls 'act' on the resulting ParseTree. 3053 */ 3054 template action(alias r, alias act) 3055 { 3056 ParseTree action(ParseTree p) 3057 { 3058 return act(r(p)); 3059 } 3060 3061 ParseTree action(string input) 3062 { 3063 return .action!(r,act)(ParseTree("",false,[],input)); 3064 } 3065 3066 string action(GetName g) 3067 { 3068 enum name = "action!("~ getName!(r)() ~ ", " ~ __traits(identifier, act) ~ ")"; 3069 return name; 3070 } 3071 } 3072 3073 unittest // 'action' unit test 3074 { 3075 ParseTree foo(ParseTree p) 3076 { 3077 p.matches ~= p.matches; // doubling matches 3078 return p; 3079 } 3080 3081 alias literal!("abc") abc; 3082 3083 alias action!(abc, foo) abcfoo; 3084 3085 assert(getName!(abcfoo) == `action!(literal!("abc"), foo)`); 3086 3087 ParseTree result = abcfoo("abc"); 3088 ParseTree reference = abc("abc"); 3089 3090 assert(result.successful == reference.successful); 3091 assert(result.successful); 3092 assert(result.matches == reference.matches ~ reference.matches); 3093 assert(result.begin == reference.begin); 3094 assert(result.end == reference.end); 3095 assert(result.children == reference.children); 3096 3097 // On failure 3098 result = abcfoo("_abc"); 3099 reference = abc("_abc"); 3100 3101 assert(result.successful == reference.successful); 3102 assert(!result.successful); 3103 assert(result.matches == reference.matches ~ reference.matches); 3104 assert(result.begin == reference.begin); 3105 assert(result.end == reference.end); 3106 assert(result.children == reference.children); 3107 } 3108 3109 /** 3110 Concatenates a ParseTree's matches as one match and discards its children. Equivalent to the expression '~r'. 3111 */ 3112 template fuse(alias r) 3113 { 3114 ParseTree fuse(ParseTree p) 3115 { 3116 p = r(p); 3117 if(p.successful) 3118 { 3119 if (p.matches.length != 0) 3120 p.matches = [std.array.join(p.matches)]; 3121 3122 p.children = null; // also discard children 3123 } 3124 return p; 3125 } 3126 3127 ParseTree fuse(string input) 3128 { 3129 return .fuse!(r)(ParseTree("",false,[],input)); 3130 } 3131 3132 string fuse(GetName g) 3133 { 3134 enum name = "fuse!(" ~ getName!(r)() ~ ")"; 3135 return name; 3136 } 3137 } 3138 3139 unittest // 'fuse' unit test 3140 { 3141 alias oneOrMore!(literal!("abc")) abcs; 3142 3143 alias fuse!(abcs) f; 3144 3145 assert(getName!(f) == `fuse!(oneOrMore!(literal!("abc")))`); 3146 3147 ParseTree result = f("abcabcabc"); 3148 ParseTree reference = abcs("abcabcabc"); 3149 3150 assert(result.successful == reference.successful); 3151 assert(result.successful); 3152 assert(reference.matches == ["abc", "abc", "abc"]); 3153 assert(result.matches == ["abcabcabc"]); 3154 assert(result.begin == reference.begin); 3155 assert(result.end == reference.end); 3156 assert(result.children is null); 3157 3158 // On failure 3159 result = f("_abc"); 3160 reference = abcs("_abc"); 3161 3162 assert(result.successful == reference.successful); 3163 assert(!result.successful); 3164 assert(result.matches == reference.matches); 3165 assert(result.begin == reference.begin); 3166 assert(result.end == reference.end); 3167 assert(result.children == reference.children); 3168 3169 alias discard!(literal!("abc")) dabc; 3170 alias fuse!(dabc) f2; 3171 3172 result = f2("abcabc"); 3173 reference = dabc("abcabc"); 3174 3175 assert(result.successful); 3176 assert(result.matches is null); 3177 assert(result.begin == reference.begin); 3178 assert(result.end == reference.end); 3179 assert(result.children == reference.children); 3180 } 3181 3182 /** 3183 Calls 'r' on the input and then discards its children nodes. 3184 */ 3185 template discardChildren(alias r) 3186 { 3187 ParseTree discardChildren(ParseTree p) 3188 { 3189 p = r(p); 3190 p.children = null; 3191 return p; 3192 } 3193 3194 ParseTree discardChildren(string input) 3195 { 3196 return .discardChildren!(r)(ParseTree("",false,[],input)); 3197 } 3198 3199 string discardChildren(GetName g) 3200 { 3201 enum name = "discardChildren!(" ~ getName!(r)() ~ ")"; 3202 return name; 3203 } 3204 } 3205 3206 /** 3207 Calls 'r' on the input and then discards its matches. 3208 */ 3209 template discardMatches(alias r) 3210 { 3211 ParseTree discardMatches(ParseTree p) 3212 { 3213 p = r(p); 3214 if (p.successful) 3215 p.matches = null; 3216 return p; 3217 } 3218 3219 ParseTree discardMatches(string input) 3220 { 3221 return .discardMatches!(r)(ParseTree("",false,[],input)); 3222 } 3223 3224 string discardMatches(GetName g) 3225 { 3226 enum name = "discardMatches!(" ~ getName!(r)() ~ ")"; 3227 return name; 3228 } 3229 } 3230 3231 /** 3232 Calls 'r' on the input and then discard everything 'r' returned: no children, no match and index 3233 put at the end of the match. It's the low-level engine behind ':r'. 3234 */ 3235 template discard(alias r) 3236 { 3237 ParseTree discard(ParseTree p) 3238 { 3239 ParseTree result = r(p); 3240 result.name = "discard!(" ~ getName!(r)() ~ ")"; 3241 //result.begin = result.end; 3242 result.children = null; 3243 if (result.successful) 3244 result.matches = null;//to keep error messages, if any 3245 3246 return result; 3247 } 3248 3249 ParseTree discard(string input) 3250 { 3251 return .discard!(r)(ParseTree("",false,[],input)); 3252 } 3253 3254 string discard(GetName g) 3255 { 3256 return "discard!(" ~ getName!(r)() ~ ")"; 3257 } 3258 } 3259 3260 unittest // 'discard' unit test 3261 { 3262 alias literal!"abc" abc; 3263 alias oneOrMore!abc abcs; 3264 alias discard!(literal!("abc")) dabc; 3265 alias discard!(oneOrMore!(literal!("abc")))dabcs; 3266 3267 ParseTree reference = abc("abc"); 3268 ParseTree result =dabc("abc"); 3269 3270 assert(result.successful == reference.successful); 3271 assert(result.successful); 3272 assert(result.name == `discard!(literal!("abc"))`); 3273 assert(result.matches is null); 3274 assert(result.begin == reference.begin); 3275 assert(result.end == reference.end); 3276 assert(result.children is null); 3277 3278 reference = abcs("abcabcabc"); 3279 result = dabcs("abcabcabc"); 3280 3281 assert(result.successful == reference.successful); 3282 assert(result.successful); 3283 assert(result.name == `discard!(oneOrMore!(literal!("abc")))`); 3284 assert(result.matches is null); 3285 assert(result.begin == reference.begin); 3286 assert(result.end == reference.end); 3287 assert(result.children is null); 3288 3289 // On failure 3290 reference = abcs(""); 3291 result = dabcs(""); 3292 3293 assert(result.successful == reference.successful); 3294 assert(!result.successful); 3295 assert(result.name == `discard!(oneOrMore!(literal!("abc")))`); 3296 assert(result.matches == [`"abc"`], "discard error message."); 3297 assert(result.begin == reference.begin); 3298 assert(result.end == reference.end); 3299 assert(result.children is null); 3300 3301 // Action on 'and' 3302 alias and!(abc,dabc,abc) discardMiddle; 3303 3304 result = discardMiddle("abcabcabc"); 3305 assert(result.successful); 3306 assert(result.matches == ["abc", "abc"]); 3307 assert(result.begin == 0); 3308 assert(result.end == 3*3); 3309 assert(result.children.length == 2); 3310 assert(result.children == [abc("abcabcabc"), abc(abc(abc("abcabcabc")))]); 3311 } 3312 3313 /** 3314 Calls 'r' on the input and then discards everything 'r' did, except its matches (so that 3315 they propagate upwards). Equivalent to ';r'. 3316 */ 3317 template drop(alias r) 3318 { 3319 ParseTree drop(ParseTree p) 3320 { 3321 ParseTree result = r(p); 3322 //result.begin = result.end; 3323 result.children = null; 3324 if (result.successful) 3325 result.name = "drop!(" ~ getName!(r)() ~ ")"; 3326 return result; 3327 } 3328 3329 ParseTree drop(string input) 3330 { 3331 return .drop!(r)(ParseTree("",false,[],input)); 3332 } 3333 3334 string drop(GetName g) 3335 { 3336 return "drop!(" ~ getName!(r)() ~ ")"; 3337 } 3338 } 3339 3340 unittest // 'drop' unit test 3341 { 3342 alias literal!"abc" abc; 3343 alias oneOrMore!abc abcs; 3344 alias drop!(literal!("abc")) dabc; 3345 alias drop!(oneOrMore!(literal!("abc"))) dabcs; 3346 3347 ParseTree reference = abc("abc"); 3348 ParseTree result =dabc("abc"); 3349 3350 assert(result.successful == reference.successful); 3351 assert(result.successful); 3352 assert(result.name == `drop!(literal!("abc"))`); 3353 assert(result.matches == reference.matches); 3354 assert(result.begin == reference.begin); 3355 assert(result.end == reference.end); 3356 assert(result.children is null); 3357 3358 reference = abcs("abcabcabc"); 3359 result = dabcs("abcabcabc"); 3360 3361 assert(result.successful == reference.successful); 3362 assert(result.successful); 3363 assert(result.name == `drop!(oneOrMore!(literal!("abc")))`); 3364 assert(result.matches == reference.matches); 3365 assert(result.begin == reference.begin); 3366 assert(result.end == reference.end); 3367 assert(result.children is null); 3368 3369 // On failure 3370 reference = abcs(""); 3371 result = dabcs(""); 3372 3373 assert(result.successful == reference.successful); 3374 assert(!result.successful); 3375 assert(result.name == reference.name); 3376 assert(result.matches == [`"abc"`], "'drop' error message."); 3377 assert(result.begin == reference.begin); 3378 assert(result.end == reference.end); 3379 assert(result.children is null); 3380 3381 // Action on 'and' 3382 alias and!(abc,dabc,abc) discardMiddle; 3383 3384 result = discardMiddle("abcabcabc"); 3385 assert(result.successful); 3386 assert(result.matches == ["abc", "abc", "abc"], "3 matches."); 3387 assert(result.begin == 0); 3388 assert(result.end == 3*3); 3389 assert(result.children.length == 2, "but only 2 children."); 3390 assert(result.children == [abc("abcabcabc"), abc(abc(abc("abcabcabc")))]); 3391 } 3392 3393 /** 3394 Makes r disappear in a sequence, letting its children take its place. It's equivalent 3395 to the '%' operator. Given A <- B %C D and C <- E F, a successful parse for A will 3396 generate a three with four children: B, E, F and D parse trees. 3397 */ 3398 template propagate(alias r) 3399 { 3400 ParseTree propagate(ParseTree p) 3401 { 3402 ParseTree result = r(p); 3403 if (result.successful) 3404 result.name = "propagate!(" ~ getName!(r)() ~ ")"; 3405 return result; 3406 } 3407 3408 ParseTree propagate(string input) 3409 { 3410 return .propagate!(r)(ParseTree("",false,[],input)); 3411 } 3412 3413 string propagate(GetName g) 3414 { 3415 return "propagate!(" ~ getName!(r)() ~ ")"; 3416 } 3417 } 3418 3419 /** 3420 Makes 'r's result be kept when it would be discarded by the tree-decimation done by a grammar. 3421 Equivalent to '^r'. 3422 */ 3423 template keep(alias r) 3424 { 3425 ParseTree keep(ParseTree p) 3426 { 3427 ParseTree result = r(p); 3428 if (result.successful) 3429 { 3430 result.children = [result]; 3431 result.name = "keep!(" ~ getName!(r)() ~ ")"; 3432 } 3433 return result; 3434 } 3435 3436 ParseTree keep(string input) 3437 { 3438 return .keep!(r)(ParseTree("",false,[],input)); 3439 } 3440 3441 string keep(GetName g) 3442 { 3443 return "keep!(" ~ getName!(r)() ~ ")"; 3444 } 3445 } 3446 3447 unittest // 'keep' unit test 3448 { 3449 // Grammar mimicry 3450 struct KeepTest 3451 { 3452 static bool isRule(string s) 3453 { 3454 if (s == "A" || s == "KA") 3455 return true; 3456 else 3457 return false; 3458 } 3459 3460 mixin decimateTree; 3461 3462 // Equivalent to A <- 'a' 'b' 3463 static ParseTree A(string s) 3464 { 3465 return decimateTree(named!(and!(literal!"a", literal!"b"), "A")(s)); 3466 } 3467 3468 // Here we use keep to protect 'b' 3469 // Equivalent to KA <- 'a' ^'b' 3470 static ParseTree KA(string s) 3471 { 3472 return decimateTree(named!(and!(literal!"a", keep!(literal!"b")), "KA")(s)); 3473 } 3474 } 3475 3476 ParseTree reference = KeepTest.A("abc"); 3477 ParseTree result = KeepTest.KA("abc"); 3478 3479 assert(result.successful == reference.successful); 3480 assert(result.matches == reference.matches); 3481 assert(result.matches == ["a","b"]); 3482 assert(result.begin == reference.begin); 3483 assert(result.end == reference.end); 3484 assert(reference.children.length == 0); 3485 assert(result.children.length == 1); 3486 assert(result.children == [literal!("b")(literal!("a")("abc"))], "'b' node was kept."); 3487 } 3488 3489 /* ****************** predefined rules ******************** */ 3490 3491 alias named!(or!(literal!("\r\n"), literal!("\n"), literal!("\r")), "endOfLine") endOfLine; /// predefined end-of-line parser 3492 alias endOfLine eol; /// helper alias. 3493 3494 alias or!(literal!(" "), literal!("\t"), literal!("\v")) space; /// predefined space-recognizing parser (space or tabulation). 3495 alias named!(literal!"\t", "tab") tab; /// A parser recognizing \t (tabulation) 3496 alias named!(fuse!(discardChildren!(oneOrMore!space)), 3497 "spaces") spaces; /// aka '~space+' 3498 alias or!(space, endOfLine) blank; /// Any blank char (spaces or end of line). 3499 alias named!(discard!(zeroOrMore!blank), 3500 "spacing") spacing; /// The basic space-management parser: discard zero or more blank spaces. 3501 3502 alias charRange!('0', '9') digit; /// Decimal digit: [0-9] 3503 alias named!(fuse!(discardChildren!(oneOrMore!digit)), "digits") digits; /// [0-9]+ 3504 3505 alias or!(charRange!('0','9'), charRange!('a','f'), charRange!('A', 'F')) hexDigit; /// Hexadecimal digit: [0-9a-fA-F] 3506 3507 alias charRange!('a', 'z') alpha; /// [a-z] 3508 alias charRange!('A', 'Z') Alpha; /// [A-Z] 3509 3510 alias and!(oneOrMore!(or!(alpha, Alpha, literal!("_"))), zeroOrMore!(or!(digit, alpha, Alpha, literal!("_")))) ident; 3511 alias named!(fuse!(discardChildren!ident), 3512 "identifier") identifier; /// [a-zA-Z_][a-zA-Z_0-9]*, the basic C-family identifier 3513 alias named!(fuse!(discardChildren!(and!(identifier, zeroOrMore!(and!(literal!".", identifier))))), 3514 "qualifiedIdentifier") qualifiedIdentifier; /// qualified identifiers (ids separated by dots: abd.def.g). 3515 3516 alias named!(literal!"/", "slash") slash; /// A parser recognizing '/' 3517 alias named!(literal!"\\", "backslash") backslash; /// A parser recognizing '\' 3518 alias named!(literal!"'", "quote") quote; /// A parser recognizing ' (single quote) 3519 alias named!(literal!"\"", "doublequote") doublequote; /// A parser recognizing " (double quote) 3520 alias named!(literal!"`", "backquote") backquote; /// A parser recognizing $(BACKTICK) (backquote) 3521 3522 /// A list of elem's separated by sep's. One element minimum. 3523 template list(alias elem, alias sep) 3524 { 3525 alias named!(spaceAnd!(oneOrMore!blank, and!(elem, zeroOrMore!(spaceAnd!(discardMatches!(sep), elem)))), "list") list; 3526 } 3527 3528 /// A list of elem's separated by sep's. The empty list (no elem, no sep) is OK. 3529 template list0(alias elem, alias sep) 3530 { 3531 alias named!(spaceAnd!(oneOrMore!blank, option!(and!(elem, zeroOrMore!(spaceAnd!(discardMatches!(sep), elem))))), "list0") list0; 3532 } 3533 3534 template AddSpace(alias sp) 3535 { 3536 template AddSpace(alias r) 3537 { 3538 alias TypeTuple!(r, discard!sp) AddSpace; 3539 } 3540 } 3541 3542 /** 3543 The engine formerly behind the '< ' Pegged rule: all sequence subelements of a rule are interspersed 3544 with a space-consuming rule, given as the first template parameter. It's not used by Pegged anymore 3545 but can be useful for low-level code. It might become deprecated, but it's not there yet. 3546 3547 ---- 3548 alias and!(literal!"abc", literal!"def") rule1; // "abc" "def", equivalent to "abcdef" 3549 alias spaceAnd!(oneOrMore!blank, literal!"abc", literal!"def") rule2; // "abc" "def", but with spaces in-between. 3550 3551 string input1 = "abcdef"; 3552 string input2 = " abc 3553 3554 def "; // with spaces and end of line markers. 3555 3556 assert(rule1(input1).successful); // OK 3557 assert(!rule1(input2).successful); // NOK, couldn't find "def" after "abc" 3558 3559 assert(rule2(input1).successful); // OK 3560 assert(rule2(input2).successful); // Still OK 3561 assert(rule2(input2).matches == ["abc","def"]);// rule2 finds the literals among the spaces 3562 ---- 3563 3564 As you can see on the previous line, spaceAnd discards the matched spaces 3565 and returns matches only for the 'real' subrules. 3566 3567 Note: by using a non-space rule as the first template argument, 3568 you can use spaceAnd as a generic 'find these patterns, possibly separated by this pattern' rule. 3569 3570 For example, using digits as separators: 3571 ---- 3572 alias spaceAnd!(digit, literal!"abc", litera!"def") rule3; 3573 3574 ParseTree result = rule3("123abc45def67890"); 3575 assert(rule3.successful); 3576 assert(rule3.matches == ["abc", "def"]); 3577 assert(rule3.children.length == 2); 3578 3579 assert(rule3.begin == 0;) 3580 assert(rule3.end == "123abc45def67890".length); 3581 ---- 3582 */ 3583 template spaceAnd(alias sp, rules...) 3584 { 3585 alias and!(discard!(zeroOrMore!sp), staticMap!(AddSpace!(zeroOrMore!sp), rules)) spaceAnd; 3586 } 3587 3588 unittest // 'spaceAnd' unit test 3589 { 3590 alias literal!"a" a; 3591 alias literal!"b" b; 3592 3593 //Basic use 3594 alias and!(a,b) ab; 3595 alias spaceAnd!(oneOrMore!blank, a, b) a_b; 3596 3597 ParseTree reference = ab("ab"); 3598 ParseTree result = a_b("ab"); 3599 3600 assert(reference.successful); 3601 assert(result.successful); 3602 assert(result.matches == reference.matches); 3603 assert(result.begin == reference.begin); 3604 assert(result.end == reference.end); 3605 assert(result.children == reference.children); 3606 3607 reference = ab(" a b "); 3608 result = a_b( " a b "); 3609 3610 assert(!reference.successful); 3611 assert(result.successful); 3612 assert(result.matches == ["a","b"]); 3613 assert(result.begin == 0); 3614 assert(result.end == " a b ".length); 3615 assert(result.children.length == 2); 3616 3617 reference = ab("ac"); 3618 result = a_b("ac"); 3619 3620 assert(!reference.successful); 3621 assert(!result.successful); 3622 3623 reference = ab(" a c "); 3624 result = a_b(" a c "); 3625 3626 assert(!reference.successful); 3627 assert(!result.successful); 3628 3629 // With another separator than spaces 3630 alias spaceAnd!(digit, a, b) a0b; 3631 3632 assert(a0b("ab").successful); 3633 assert(!a0b(" a b ").successful); 3634 3635 result = a0b("012a34b567"); 3636 3637 assert(result.successful); 3638 assert(result.matches == ["a", "b"]); 3639 assert(result.begin == 0); 3640 assert(result.end == "012a34b567".length); 3641 assert(result.children.length == 2); 3642 } 3643 3644 /// Mixin to simplify a parse tree inside a grammar 3645 mixin template decimateTree() 3646 { 3647 static ParseTree decimateTree(ParseTree p) 3648 { 3649 if(p.children.length == 0) return p; 3650 3651 bool parseFailed = !p.successful; 3652 3653 ParseTree[] filterChildren(ParseTree pt) 3654 { 3655 ParseTree[] result; 3656 foreach(child; pt.children) 3657 { 3658 import std.algorithm : startsWith; 3659 3660 if ( (isRule(child.name) && (child.matches.length != 0 || parseFailed)) 3661 || (!child.successful && child.children.length == 0) 3662 || (!child.successful && child.name.startsWith("or!") && child.children.length > 1) 3663 || (!pt.successful && child.successful && child.children.length == 0 && child.failedChild.length > 0)) 3664 { 3665 child.children = filterChildren(child); 3666 result ~= child; 3667 } 3668 else if (child.name.startsWith("keep!(")) // 'keep' node are never discarded. 3669 // They have only one child, the node to keep 3670 { 3671 result ~= child.children[0]; 3672 } 3673 else // discard this node, but see if its children contain nodes to keep 3674 { 3675 result ~= filterChildren(child); 3676 } 3677 } 3678 return result; 3679 } 3680 void filterFailedChildren(ref ParseTree pt) 3681 { 3682 foreach(ref child; pt.children) 3683 { 3684 filterFailedChildren(child); 3685 import std.algorithm : startsWith; 3686 3687 if ( (isRule(child.name) && (child.matches.length != 0 || parseFailed)) 3688 || (!child.successful && child.children.length == 0) 3689 || (!child.successful && child.name.startsWith("or!") && child.children.length > 1) 3690 || (!pt.successful && child.successful && child.children.length == 0 && child.failedChild.length > 0)) 3691 { 3692 } 3693 else if (child.name.startsWith("keep!(")) // 'keep' node are never discarded. 3694 // They have only one child, the node to keep 3695 { 3696 } 3697 else if (child.failedChild.length > 0)// discard this node, but see if its children contain nodes to keep 3698 { 3699 pt.failedChild ~= child.failedChild; 3700 child.failedChild = []; 3701 } 3702 } 3703 foreach(ref child; pt.failedChild) 3704 { 3705 filterFailedChildren(child); 3706 child.children = filterChildren(child); 3707 } 3708 } 3709 if (!p.successful) 3710 filterFailedChildren(p); 3711 p.children = filterChildren(p); 3712 return p; 3713 } 3714 } 3715 3716 /** 3717 Discard one-child nodes and replace them with their children. 3718 Most grammar tend to produce long one-child/one-child branches, 3719 simplifyTree compacts these. 3720 */ 3721 ParseTree simplifyTree(ParseTree p) 3722 { 3723 foreach(ref child; p.children) 3724 child = simplifyTree(child); 3725 3726 if (p.children.length != 1) 3727 return p; 3728 else // linear tree 3729 return p.children[0]; 3730 } 3731 3732 /** 3733 Returns: the number of nodes in a parse tree. 3734 */ 3735 size_t size(ParseTree p) 3736 { 3737 size_t result = 1; 3738 foreach(child; p.children) 3739 result += size(child); 3740 return result; 3741 } 3742 3743 /** 3744 Generic ParseTree modifier: 3745 predicate must be callable with a ParseTree and return a boolean. 3746 modifier must be callable with a ParseTree and return a ParseTree. 3747 3748 If predicate is true on input, modify calls modifier on input and return the result. 3749 If not, it continues with the children. 3750 */ 3751 ParseTree modify(alias predicate, alias modifier)(ParseTree input) 3752 { 3753 foreach(ref child; input.children) 3754 child = modify!(predicate, modifier)(child); 3755 3756 if (predicate(input)) 3757 input = modifier(input); 3758 3759 return input; 3760 }