1 /** 2 This module contains the engine behind Pegged, the expression templates building blocks to create a top-down 3 recursive-descent parser. 4 5 The terminals and non-terminals described here are meant to be used inside a Pegged grammar. 6 As such, they are a bit less user-friendly than what's output by pegged.grammar. 7 For example they take a ParseTree as input, not a string. 8 9 See the /docs directory for the full documentation as markdown files. 10 */ 11 module pegged.peg; 12 13 /* 14 NOTE: 15 Do not use the GrammarTester for unittesting in this module. This module needs 16 to be able to pass its unittests before the GrammarTester is even trustable. 17 Writing tests the long way is preferred here, as it will avoid the circular 18 dependency. 19 */ 20 21 import std.algorithm: equal, map, startsWith, max, countUntil, maxElement, filter; 22 import std.uni : isAlpha, icmp; 23 import std.array; 24 import std.conv; 25 import std.string: strip; 26 import std.typetuple; 27 28 @safe: 29 30 // Returns quoted and escaped version of the input, but if the input is null, then return `"end of input"`. 31 package string stringified(string inp) pure 32 { 33 import std.format : format; 34 35 if (inp is null) 36 return `"end of input"`; 37 38 string[1] shell = [inp]; 39 /* TODO: replace `format` with call to substitute!(isASCIIControlChar) that 40 * only does escaping of control characters to make this `nothrow`. Reuse 41 * http://mir-algorithm.libmir.org/mir_format.html#.printEscaped. */ 42 return "%(%s%)".format(shell); 43 } 44 45 unittest // Run- & Compile-time. 46 { 47 static bool doTest() 48 { 49 const caseSuit1 = [ 50 "" : `""`, 51 52 "\0" : `"\0"`, 53 "\n" : `"\n"`, 54 "\t" : `"\t"`, 55 56 `"` : `"\""`, 57 "`" : q"("`")", 58 "'" : `"'"`, 59 60 "42" : `"42"`, 61 "\"some text\"\n" : `"\"some text\"\n"` 62 ]; 63 64 const caseSuit2 = [cast(string)null : `"end of input"`]; 65 66 const auto testSuit = [caseSuit1, caseSuit2]; 67 68 foreach (const ref suit; testSuit) 69 { 70 foreach (const ref value, const ref expected; suit) 71 { 72 import std.format : format; 73 74 const result = value.stringified; 75 assert(result == expected, "from %s expected %s but got %s".format(value, expected, result)); 76 } 77 } 78 79 return true; 80 } 81 82 // Result is always true, but here, we just force CTFE-mode. 83 static assert(doTest); // Compile-time. 84 doTest; // Run-time. 85 } 86 87 version (tracer) 88 { 89 import std.experimental.logger; 90 import std.algorithm.comparison : min; 91 92 // Function pointers. 93 private static bool function(string ruleName, const ref ParseTree p) @safe traceConditionFunction; 94 private static bool delegate(string ruleName, const ref ParseTree p) @safe traceConditionDelegate; 95 private static int traceLevel; 96 private static bool traceBlocked; 97 private static bool logTraceLevel = false; 98 99 private void incTraceLevel() 100 { 101 if (!__ctfe) 102 traceLevel++; 103 } 104 105 private void decTraceLevel() 106 { 107 if (!__ctfe) 108 traceLevel--; 109 } 110 111 private bool shouldTrace(string ruleName, const ref ParseTree p) 112 { 113 if (__ctfe || traceBlocked) 114 return false; 115 if (traceConditionDelegate != null) 116 return traceConditionDelegate(ruleName, p); 117 if (traceConditionFunction != null) 118 return traceConditionFunction(ruleName, p); 119 return false; 120 } 121 122 static this() 123 { 124 traceLevel = 0; 125 traceNothing(); 126 traceBlocked = false; 127 } 128 129 /++ Supply a function to dynamically switch tracing on and off based on the rule name. 130 + 131 + Example: 132 + --- 133 + /* Exclude build-in parsers, only trace parsers generated from MyGrammar. */ 134 + setTraceConditionFunction(ruleName => ruleName.startsWith("MyGrammar")); 135 + --- 136 +/ 137 void setTraceConditionFunction(bool delegate(string ruleName, const ref ParseTree p) @safe condition) 138 { 139 traceConditionDelegate = condition; 140 traceConditionFunction = null; 141 } 142 143 /// ditto 144 void setTraceConditionFunction(bool function(string ruleName, const ref ParseTree p) @safe condition) 145 { 146 traceConditionFunction = condition; 147 traceConditionDelegate = null; 148 } 149 150 /** Trace all rules. 151 * 152 * This can produce a lot of output. 153 */ 154 void traceAll() 155 { 156 setTraceConditionFunction(function(string ruleName, const ref ParseTree p) {return true;}); 157 } 158 159 /** Do not trace any rules. */ 160 void traceNothing() 161 { 162 traceConditionFunction = null; 163 traceConditionDelegate = null; 164 } 165 166 private string traceMsg(ParseTree p, string expression, string name) 167 { 168 import std.format; 169 Position pos = position(p); 170 enum inputLength = 15; 171 string result; 172 for (auto i = 1; i <= traceLevel; i++) 173 result ~= format("%d|", i); 174 result ~= format(" (l:%d, c:%d, i:%d)\t", pos.line + 1, pos.col + 1, pos.index) ~ 175 expression.stringified ~ " considering rule " ~ name.stringified ~ " on " ~ 176 p.input[p.end .. min(p.input.length, p.end + inputLength)].stringified ~ 177 (p.end + inputLength > p.input.length ? "" : "..."); 178 return result; 179 } 180 181 private string traceResultMsg(ParseTree p, string name) 182 { 183 import std.format; 184 import std.range: chain; 185 import std.algorithm.iteration: joiner; 186 Position pos = position(p); 187 enum inputLength = 15; 188 string result; 189 for (auto i = 1; i <= traceLevel; i++) 190 result ~= format("%d|", i); 191 if (p.successful) 192 { 193 string consumed; 194 foreach (match; p.matches) 195 consumed ~= match; 196 result ~= format(" (l:%d, c:%d, i:%d)\t", pos.line + 1, pos.col + 1, pos.index) ~ name.stringified ~ " SUCCEEDED on " ~ 197 consumed.stringified; 198 } 199 else 200 result ~= format(" (l:%d, c:%d, i:%d)\t", pos.line + 1, pos.col + 1, pos.index) ~ name.stringified ~ " FAILED on " ~ 201 p.input[p.end .. min(p.input.length, p.end + inputLength)].stringified ~ 202 (p.end + inputLength > p.input.length ? "" : "..."); 203 return result; 204 } 205 206 /** 207 Overrides FileLogger to remove the time stamp. 208 209 Example: 210 --- 211 sharedLog = new TraceLogger("TraceLog.txt"); 212 --- 213 */ 214 class TraceLogger : FileLogger 215 { 216 this(in string fn) @safe 217 { 218 super(fn); 219 } 220 import std.concurrency : Tid; 221 import std.datetime : SysTime; 222 override protected void beginLogMsg(string file, int line, string funcName, 223 string prettyFuncName, string moduleName, LogLevel logLevel, 224 Tid threadId, SysTime timestamp, Logger logger) 225 @safe 226 { 227 } 228 } 229 } 230 231 /** 232 CT Switch for testing 'keywords' implementations 233 */ 234 enum 235 { 236 IFCHAIN, 237 TRIE 238 } 239 enum KEYWORDS = IFCHAIN; 240 241 /** 242 The basic parse tree, as used throughout the project. 243 You can define your own parse tree node, but respect the basic layout. 244 */ 245 struct ParseTree 246 { 247 string name; /// The node name 248 bool successful; /// Indicates whether a parsing was successful or not 249 string[] matches; /// The matched input's parts. Some expressions match at more than one place, hence matches is an array. 250 251 string input; /// The input string that generated the parse tree. Stored here for the parse tree to be passed to other expressions, as input. 252 size_t begin, end; /// Indices for the matched part from the very beginning of the first match to the last char of the last match. 253 254 ParseTree[] children; /// The sub-trees created by sub-rules parsing. 255 256 size_t failEnd; // The furthest this tree could match the input (including !successful rules). 257 ParseTree[] failedChild; /// The !successful child that could still be partially parsed. 258 259 /** 260 Basic toString for easy pretty-printing. 261 */ 262 string toString(string tabs = "") const pure 263 { 264 string result = name; 265 266 string childrenString; 267 bool allChildrenSuccessful = true; 268 269 foreach(i,child; children) 270 { 271 childrenString ~= tabs ~ " +-" ~ child.toString(tabs ~ ((i < children.length -1 ) ? " | " : " ")); 272 if (!child.successful) { 273 allChildrenSuccessful = false; 274 } 275 } 276 result ~= this.toStringThisNode(allChildrenSuccessful); 277 return result ~ childrenString; 278 } 279 280 /** 281 * Basic toString of only this node, without the children 282 */ 283 private string toStringThisNode(bool allChildrenSuccessful) const pure 284 { 285 if (successful) { 286 return to!string([begin, end]) ~ to!string(matches) ~ "\n"; 287 } else { // some failure info is needed 288 if (allChildrenSuccessful) { // no one calculated the position yet 289 return " " ~ this.failMsg ~ "\n"; 290 } else { 291 return " (failure)\n"; 292 } 293 } 294 } 295 296 /** 297 * Generates a generic error when a node fails 298 * 299 * @param successMsg String returned when there isn't an error 300 * @param formatFailMsg Formating delegate function that generates the error message. 301 */ 302 string failMsg(string delegate(Position, string, string, const ParseTree) @safe pure formatFailMsg = defaultFormatFailMsg, 303 string successMsg = "Sucess") const @property pure 304 { 305 foreach(i, child; children) { 306 if (!child.successful) { 307 return child.failMsg(formatFailMsg, successMsg); 308 } 309 } 310 311 if (!successful) { 312 Position pos = position(this); 313 string left, right; 314 315 if (pos.index < 10) { 316 left = input[0 .. pos.index]; 317 } else { 318 left = input[pos.index - 10 .. pos.index]; 319 } 320 if (pos.index + 10 < input.length) { 321 right = input[pos.index .. pos.index + 10]; 322 } else { 323 right = input[pos.index .. $]; 324 } 325 return formatFailMsg(pos, left, right, this); 326 } 327 328 return successMsg; 329 } 330 331 ParseTree dup() const @property pure nothrow 332 { 333 ParseTree result; 334 result.name = name; 335 result.successful = successful; 336 result.matches = matches.dup; 337 result.input = input; 338 result.begin = begin; 339 result.end = end; 340 result.failEnd = failEnd; 341 result.failedChild = map!(p => p.dup)(failedChild).array(); 342 result.children = map!(p => p.dup)(children).array(); 343 return result; 344 } 345 346 immutable(ParseTree) idup() const @property @trusted pure nothrow 347 { 348 return cast(immutable)dup(); 349 } 350 351 // Override opIndex operators 352 ref inout(ParseTree) opIndex(size_t index) inout pure nothrow @nogc { 353 return children[index]; 354 } 355 356 ref inout(ParseTree[]) opIndex() inout return pure { 357 return children; 358 } 359 360 size_t opDollar(size_t pos)() const pure nothrow @nogc 361 { 362 return children.length; 363 } 364 365 inout(ParseTree)[] opSlice(size_t i, size_t j) inout pure nothrow @nogc { 366 return children[i..j]; 367 } 368 } 369 370 /** 371 * Default fail message formating function 372 */ 373 immutable defaultFormatFailMsg = delegate (Position pos, string left, string right, const ParseTree pt) pure 374 { 375 return "Failure at line " ~ to!string(pos.line) ~ ", col " ~ to!string(pos.col) ~ ", " 376 ~ (left.length > 0 ? "after " ~ left.stringified ~ " " : "") 377 ~ "expected " ~ (pt.matches.length > 0 ? pt.matches[$ - 1].stringified : "NO MATCH") 378 ~ `, but got ` ~ right.stringified; 379 }; 380 381 382 unittest // ParseTree testing 383 { 384 ParseTree p; 385 assert(p == p, "Self-identity on null tree."); 386 387 p = ParseTree("Name", true, ["abc", "", "def"], "input", 0, 1, null); 388 assert(p == p, "Self identity on non-null tree."); 389 390 ParseTree child = ParseTree("Child", true, ["abc", "", "def"], "input", 0, 1, null); 391 p.children = [child, child]; 392 393 assert(p.children[0] == p[0], "override opIndex allows to write less verbose code to navigate the tree"); 394 assert(p.children == p[] ); 395 assert(p.children[0..1] == p[0..1] ); 396 assert(p.children[0..$] == p[0..$] ); 397 398 ParseTree q = p; 399 assert(p == q, "Copying creates equal trees."); 400 401 assert(p.children == q.children); 402 p.children = [child, child, child]; 403 assert(q.children != p.children, "Disconnecting children."); 404 405 p = ParseTree("Name", true, ["abc", "", "def"], "input", 0, 1, null); 406 p.children = [child, child]; 407 408 q = p.dup; 409 assert(p == q, "Dupping creates equal trees."); 410 411 assert(p.children == q.children, "Equal children for dupped trees."); 412 p.children = null; 413 assert(q.children != p.children); 414 415 immutable iq = p.idup; 416 q = iq.dup; 417 assert(p == q, "Dupping to/from immutable creates equal trees."); 418 419 q.children = [p,p]; 420 assert(p != q, "Tree with different children are not equal."); 421 422 p.children = [p,p]; 423 assert(p == q, "Adding equivalent children is OK."); 424 425 p.matches = null; 426 assert(p != q, "Nulling matches makes trees unequal."); 427 428 p.matches = q.matches; 429 assert(p == q, "Copying matches makes equal trees."); 430 431 p.children[0].successful = false; 432 assert(p.children[0].failMsg == `Failure at line 0, col 1, after "i" expected "def", but got "nput"`); 433 assert(p.children[1].failMsg == "Sucess"); 434 assert(p.failMsg == `Failure at line 0, col 1, after "i" expected "def", but got "nput"`); 435 } 436 437 /// To compare two trees for content (not bothering with node names) 438 /// That's useful to compare the results from two different grammars. 439 bool softCompare(const ParseTree p1, const ParseTree p2) 440 { 441 return p1.successful == p2.successful 442 && p1.matches == p2.matches 443 && p1.begin == p2.begin 444 && p1.end == p2.end 445 && equal!(softCompare)(p1.children, p2.children); // the same for children 446 } 447 448 unittest // softCompare 449 { 450 ParseTree p = ParseTree("Name", true, ["abc", "", "def"], "input", 0, 1, null); 451 ParseTree child = ParseTree("Child", true, ["abc", "", "def"], "input", 0, 1, null); 452 p.children = [child, child]; 453 454 ParseTree q = p; 455 assert(p == q, "Copy => Equal trees."); 456 assert(softCompare(p,q), "Copy => Trees equal for softCompare."); 457 458 q.name = "Another One"; 459 assert(p != q, "Name change => non-equal trees."); 460 assert(softCompare(p,q), "Name change => Trees equal for softCompare."); 461 } 462 463 /// To record a position in a text 464 struct Position 465 { 466 size_t line;/// line number (starts at 0) 467 size_t col;/// column number (starts at 0) 468 size_t index;/// index (starts at 0) 469 } 470 471 /** 472 Given an input string, returns the position corresponding to the end of the string. 473 474 For example: 475 --- 476 assert(position("abc") == Position(0,3,3)); 477 assert(position("abc 478 ") == Position(1,0,4)); 479 assert(position("abc 480 481 ") == Position(2,4,8)); 482 --- 483 */ 484 Position position(string s) pure nothrow 485 { 486 size_t col, line, index, prev_i; 487 char prev_c; 488 foreach(i,c; s) 489 { 490 if ((c == '\n' && !(i == prev_i + 1 && prev_c == '\r')) || // new line except when directly following a carriage return. 491 c == '\r') 492 { 493 col = 0; 494 ++line; 495 ++index; 496 prev_i = i; 497 prev_c = c; 498 } 499 else 500 { 501 if (c != '\n') 502 ++col; 503 ++index; 504 } 505 } 506 507 return Position(line,col,index); 508 } 509 510 /** 511 Same as previous overload, but from the begin of P.input to p.end 512 */ 513 Position position(const ParseTree p) pure nothrow 514 { 515 return position(p.input[0..p.end]); 516 } 517 518 unittest 519 { 520 assert(position("") == Position(0,0,0), "Null string, position 0."); 521 assert(position("abc") == Position(0,3,3), "length 3 string, no line feed."); 522 assert(position("abc 523 ") == Position(1,0,4), "One end of line."); 524 assert(position("abc 525 526 ----") == Position(2,4,9), "Three lines (second one empty)."); 527 assert(position("abc 528 ---- 529 ----") == Position(2,4,13), "Three lines."); 530 assert(position(" 531 532 533 ") == Position(3,0,3), "Four lines, all empty."); 534 assert(position("one\r\ntwo\r\nthree") == Position(2, 5, 15), "Three lines, DOS line endings"); 535 } 536 537 string getName(alias expr)() @property @safe 538 { 539 static if (is(typeof( { expr(GetName()); }))) 540 return expr(GetName()); 541 else 542 return __traits(identifier, expr); 543 } 544 545 struct GetName {} 546 547 /** 548 Basic rule, that always fail without consuming. 549 */ 550 ParseTree fail(ParseTree p) pure nothrow @nogc 551 { 552 return ParseTree("fail", false, [], p.input, p.end, p.end, null); 553 } 554 555 /// ditto 556 ParseTree fail(string input) pure nothrow @nogc 557 { 558 return fail(ParseTree("", false, [], input)); 559 } 560 561 string fail(GetName g) pure nothrow @nogc 562 { 563 return "fail"; 564 } 565 566 unittest // 'fail' unit test 567 { 568 ParseTree input = ParseTree("input", true, [], "This is the input string.", 0,0, null); 569 ParseTree result = fail(input); 570 assert(result.name == "fail"); 571 assert(!result.successful, "'fail' fails."); 572 assert(result.matches is null, "'fail' makes no match."); 573 assert(result.input == input.input, "'fail' does not change the input."); 574 assert(result.end == input.end, "'fail' puts the index after the previous parse."); 575 assert(result.children is null, "'fail' has no children."); 576 577 result = fail("This is the input string."); 578 assert(!result.successful, "'fail' fails."); 579 } 580 581 /** 582 Matches the end of input. Fails if there is any character left. 583 */ 584 ParseTree eoi(ParseTree p) pure nothrow 585 { 586 if (p.end == p.input.length) 587 return ParseTree("eoi", true, [], p.input, p.end, p.end); 588 else 589 return ParseTree("eoi", false, ["end of input"], p.input, p.end, p.end); 590 } 591 592 /// ditto 593 ParseTree eoi(string input) pure nothrow 594 { 595 return eoi(ParseTree("", false, [], input)); 596 } 597 598 string eoi(GetName g) pure nothrow 599 { 600 return "eoi"; 601 } 602 603 alias eoi endOfInput; /// helper alias. 604 605 unittest // 'eoi' unit test 606 { 607 ParseTree input = ParseTree("input", true, [], "This is the input string.", 0,0, null); 608 ParseTree result = eoi(input); 609 assert(result.name == "eoi"); 610 assert(!result.successful, "'eoi' fails on non-null string."); 611 assert(result.matches == ["end of input"], "'eoi' error message."); 612 assert(result.input == input.input, "'eoi' does not change the input."); 613 assert(result.end == input.end, "'eoi' puts the index after the previous parse."); 614 assert(result.children is null, "'eoi' has no children."); 615 616 input = ParseTree("input", true, [], "", 0,0, null); 617 result = eoi(input); 618 assert(result.successful, "'eoi' succeeds on strings of length 0."); 619 result = eoi(""); 620 assert(result.successful, "'eoi' succeeds on strings of length 0."); 621 result = eoi(null); 622 assert(result.successful, "'eoi' succeeds on null strings"); 623 } 624 625 /** 626 Match any character. As long as there is at least a character left in the input, it succeeds. 627 Conversely, it fails only if called at the end of the input. 628 */ 629 ParseTree any(ParseTree p) pure nothrow 630 { 631 if (p.end < p.input.length) 632 return ParseTree("any", true, [p.input[p.end..p.end+1]], p.input, p.end, p.end+1); 633 else 634 return ParseTree("any", false, ["any char"], p.input, p.end, p.end); 635 } 636 637 /// ditto 638 ParseTree any(string input) pure nothrow 639 { 640 return any(ParseTree("", false, [], input)); 641 } 642 643 string any(GetName g) pure nothrow 644 { 645 return "any"; 646 } 647 648 unittest // 'any' unit test 649 { 650 ParseTree input = ParseTree("input", true, [], "This is the input string.", 0,0, null); 651 ParseTree result = any(input); 652 653 assert(result.name == "any"); 654 assert(result.successful, "'any' succeeds on non-null strings."); 655 assert(result.matches == ["T"], "'any' matches the first char in an input."); 656 assert(result.input == input.input, "'any' does not change the input."); 657 assert(result.end == input.end+1, "'any' advances the index by one position."); 658 assert(result.children is null, "'any' has no children."); 659 660 result = any("a"); 661 assert(result.successful, "'any' matches on strings of length one."); 662 assert(result.matches == ["a"], "'any' matches the first char in an input."); 663 assert(result.input == "a", "'any' does not change the input."); 664 assert(result.end == 1, "'any' advances the index by one position."); 665 assert(result.children is null, "'any' has no children."); 666 667 input = ParseTree("input", true, [], "", 0,0, null); 668 669 result = any(input); 670 assert(!result.successful, "'any' fails on strings of length 0."); 671 assert(result.matches == ["any char"], "'any' error message on strings of length 0."); 672 assert(result.end == 0, "'any' does not advance the index."); 673 674 result = any(""); 675 assert(!result.successful, "'any' fails on strings of length 0."); 676 assert(result.matches == ["any char"], "'any' error message on strings of length 0."); 677 assert(result.end == 0, "'any' does not advance the index."); 678 679 result = any(null); 680 assert(!result.successful, "'any' fails on null strings."); 681 assert(result.matches == ["any char"], "'any' error message on strings of length 0."); 682 assert(result.end == 0, "'any' does not advance the index."); 683 } 684 685 /** 686 Predefined parser: matches word boundaries, as \b for regexes. 687 */ 688 ParseTree wordBoundary(ParseTree p) pure // TODO: nothrow? 689 { 690 // TODO: I added more indexing guards and now this could probably use 691 // some simplification. Too tired to write it better. --Chad 692 bool matched = (p.end == 0 && isAlpha(p.input.front())) 693 || (p.end == p.input.length && isAlpha(p.input.back())) 694 || (p.end > 0 && isAlpha(p.input[p.end-1]) && p.end < p.input.length && !isAlpha(p.input[p.end])) 695 || (p.end > 0 && !isAlpha(p.input[p.end-1]) && p.end < p.input.length && isAlpha(p.input[p.end])); 696 if (matched) 697 return ParseTree("wordBoundary", matched, [], p.input, p.end, p.end, null); 698 else 699 return ParseTree("wordBoundary", matched, ["word boundary"], p.input, p.end, p.end, null); 700 } 701 702 /// ditto 703 ParseTree wordBoundary(string input) pure // TODO: nothrow? 704 { 705 return ParseTree("wordBoundary", isAlpha(input.front()), [], input, 0,0, null); 706 } 707 708 string wordBoundary(GetName g) pure nothrow 709 { 710 return "wordBoundary"; 711 } 712 713 unittest // word boundary 714 { 715 ParseTree input = ParseTree("", false, [], "This is a word."); 716 auto wb = [// "This" 717 cast(size_t)(0):true, 1:false, 2:false, 3:false, 4: true, 718 // "is" 719 5: true, 6:false, 7: true, 720 // "a" 721 8: true, 9:true, 722 // "word" 723 10:true, 11:false, 12:false, 13:false, 14:true, 724 // "." 725 15:false 726 ]; 727 728 foreach(size_t index; 0 .. input.input.length) 729 { 730 input.end = index; 731 ParseTree result = wordBoundary(input); 732 733 assert(result.name == "wordBoundary"); 734 assert(result.successful == wb[index]); // true, false, ... 735 // for errors, there is an error message 736 assert(result.successful && result.matches is null || !result.successful); 737 assert(result.begin == input.end); 738 assert(result.end == input.end); 739 assert(result.children is null); 740 } 741 } 742 743 /** 744 Represents a literal in a PEG, like "abc" or 'abc' (or even ''). 745 It succeeds if a prefix of the input is equal to its template parameter and fails otherwise. 746 */ 747 template literal(string s) 748 { 749 enum name = "literal!(\""~s~"\")"; 750 751 ParseTree literal(ParseTree p) 752 { 753 enum lit = "\"" ~ s ~ "\""; 754 755 if (p.end+s.length <= p.input.length && p.input[p.end..p.end+s.length] == s) 756 return ParseTree(name, true, [s], p.input, p.end, p.end+s.length); 757 else { 758 import std.algorithm : commonPrefix; 759 import std.utf : byCodeUnit; 760 auto prefix = p.input[p.end..$].byCodeUnit.commonPrefix(s.byCodeUnit); 761 return ParseTree(name, false, [lit], p.input, p.end, p.end, null, p.end + prefix.length); 762 } 763 } 764 765 ParseTree literal(string input) 766 { 767 return .literal!(s)(ParseTree("", false, [], input)); 768 } 769 770 771 string literal(GetName g) 772 { 773 return name; 774 } 775 776 } 777 778 unittest // 'literal' unit test 779 { 780 ParseTree input = ParseTree("input", true, [], "abcdef", 0,0, null); 781 782 alias literal!"a" a; 783 alias literal!"abc" abc; 784 alias literal!"" empty; 785 786 ParseTree result = a(input); 787 788 assert(result.name == `literal!("a")`, "Literal name test."); 789 assert(result.successful, "'a' succeeds on inputs beginning with 'a'."); 790 assert(result.matches == ["a"], "'a' matches the 'a' at the beginning."); 791 assert(result.input == input.input, "'a' does not change the input."); 792 assert(result.end == input.end+1, "'a' advances the index by one position."); 793 assert(result.children is null, "'a' has no children."); 794 795 result = a("abcdef"); 796 797 assert(result.successful, "'a' succeeds on inputs beginning with 'a'."); 798 assert(result.matches == ["a"], "'a' matches the 'a' at the beginning."); 799 assert(result.input == input.input, "'a' does not change the input."); 800 assert(result.end == input.end+1, "'a' advances the index by one position."); 801 assert(result.children is null, "'a' has no children."); 802 803 result = abc(input); 804 805 assert(result.name == `literal!("abc")`, "Literal name test."); 806 assert(result.successful, "'abc' succeeds on inputs beginning with 'abc'."); 807 assert(result.matches == ["abc"], "'abc' matches 'abc' at the beginning."); 808 assert(result.input == input.input, "'abc' does not change the input."); 809 assert(result.end == input.end+3, "'abc' advances the index by 3 positions."); 810 assert(result.children is null, "'abc' has no children."); 811 812 result = abc("abcdef"); 813 814 assert(result.successful, "'abc' succeeds on inputs beginning with 'abc'."); 815 assert(result.matches == ["abc"], "'abc' matches 'abc' at the beginning."); 816 assert(result.input == input.input, "'abc' does not change the input."); 817 assert(result.end == input.end+3, "'abc' advances the index by 3 positions."); 818 assert(result.children is null, "'abc' has no children."); 819 820 result = empty(input); 821 822 assert(result.name == `literal!("")`, "Literal name test."); 823 assert(result.successful, "'' succeeds on non-null inputs."); 824 assert(result.matches == [""], "'' matches '' at the beginning."); 825 assert(result.input == input.input, "'' does not change the input."); 826 assert(result.end == input.end+0, "'' does not advance the index."); 827 assert(result.children is null, "'' has no children."); 828 829 result = empty("abcdef"); 830 831 assert(result.successful, "'' succeeds on non-null inputs."); 832 assert(result.matches == [""], "'' matches '' at the beginning."); 833 assert(result.input == input.input, "'' does not change the input."); 834 assert(result.end == input.end+0, "'' does not advance the index."); 835 assert(result.children is null, "'' has no children."); 836 837 input.input = "bcdef"; 838 839 result = a(input); 840 841 assert(!result.successful, "'a' fails on inputs not beginning with 'a'."); 842 assert(result.matches == ["\"a\""], "'a' makes no match on 'bcdef'."); 843 assert(result.input == input.input, "'a' does not change the input."); 844 assert(result.end == input.end, "'a' does not advances the index on 'bcdef'."); 845 assert(result.children is null, "'a' has no children."); 846 847 result = abc(input); 848 849 assert(!result.successful, "'abc' fails on inputs not beginning with 'abc'."); 850 assert(result.matches == ["\"abc\""], "'abc' does no match on 'bcdef'."); 851 assert(result.input == input.input, "'abc' does not change the input."); 852 assert(result.end == input.end, "'abc' does not advance the index on 'bcdef'."); 853 assert(result.children is null, "'abc' has no children."); 854 855 result = empty(input); 856 857 assert(result.successful, "'' succeeds on non-null inputs."); 858 assert(result.matches == [""], "'' matches '' at the beginning."); 859 assert(result.input == input.input, "'' does not change the input."); 860 assert(result.end == input.end+0, "'' does not advance the index."); 861 assert(result.children is null, "'' has no children."); 862 863 input.input = ""; 864 865 result = a(input); 866 867 assert(!result.successful, "'a' fails on empty strings."); 868 assert(result.matches == ["\"a\""], "'a' does not match ''."); 869 assert(result.input == input.input, "'a' does not change the input."); 870 assert(result.end == input.end, "'a' does not advance the index on 'bcdef'."); 871 assert(result.children is null, "'a' has no children."); 872 873 result = abc(input); 874 875 assert(!result.successful, "'abc' fails on empty strings."); 876 assert(result.matches == ["\"abc\""], "'abc' does not match ''."); 877 assert(result.input == input.input, "'abc' does not change the input."); 878 assert(result.end == input.end, "'abc' does not advance the index on 'bcdef'."); 879 assert(result.children is null, "'abc' has no children."); 880 881 result = empty(input); 882 883 assert(result.successful, "'' succeeds on empty strings."); 884 assert(result.matches == [""], "'' matches '' at the beginning, even on empty strings."); 885 assert(result.input == input.input, "'' does not change the input."); 886 assert(result.end == input.end+0, "'' does not advance the index."); 887 assert(result.children is null, "'' has no children."); 888 } 889 890 /** 891 Represents a case insensitive literal in a PEG, like "abc"i or 'abc'i (or even ''i). 892 It succeeds if a case insensitive comparison of a prefix of the input and its template 893 parameter yields no difference and fails otherwise. 894 */ 895 template caseInsensitiveLiteral(string s) 896 { 897 enum name = "caseInsensitiveLiteral!(\""~s~"\")"; 898 899 ParseTree caseInsensitiveLiteral(ParseTree p) 900 { 901 enum lit = "\"" ~ s ~ "\""; 902 if (p.end+s.length <= p.input.length && icmp(p.input[p.end..p.end+s.length], s) == 0) 903 return ParseTree(name, true, [s], p.input, p.end, p.end+s.length); 904 else 905 return ParseTree(name, false, [lit], p.input, p.end, p.end); 906 } 907 908 ParseTree caseInsensitiveLiteral(string input) 909 { 910 return .caseInsensitiveLiteral!(s)(ParseTree("", false, [], input)); 911 } 912 913 914 string caseInsensitiveLiteral(GetName g) 915 { 916 return name; 917 } 918 919 } 920 921 unittest // 'caseInsensitiveLiteral' unit test 922 { 923 ParseTree input = ParseTree("input", true, [], "AbCdEf", 0,0, null); 924 925 alias caseInsensitiveLiteral!"a" a; 926 alias caseInsensitiveLiteral!"aBC" abc; 927 alias caseInsensitiveLiteral!"" empty; 928 929 ParseTree result = a(input); 930 931 assert(result.name == `caseInsensitiveLiteral!("a")`, "caseInsensitiveLiteral name test."); 932 assert(result.successful, "'a' succeeds on inputs beginning with 'A'."); 933 assert(result.matches == ["a"], "'a' matches the 'A' at the beginning."); 934 assert(result.input == input.input, "'a' does not change the input."); 935 assert(result.end == input.end+1, "'a' advances the index by one position."); 936 assert(result.children is null, "'a' has no children."); 937 938 result = a("AbCdEf"); 939 940 assert(result.successful, "'a' succeeds on inputs beginning with 'A'."); 941 assert(result.matches == ["a"], "'a' matches the 'A' at the beginning."); 942 assert(result.input == input.input, "'a' does not change the input."); 943 assert(result.end == input.end+1, "'a' advances the index by one position."); 944 assert(result.children is null, "'a' has no children."); 945 946 result = abc(input); 947 948 assert(result.name == `caseInsensitiveLiteral!("aBC")`, "caseInsensitiveLiteral name test."); 949 assert(result.successful, "'aBC' succeeds on inputs beginning with 'AbC'."); 950 assert(result.matches == ["aBC"], "'AbC' matches 'aBC' at the beginning."); 951 assert(result.input == input.input, "'aBC' does not change the input."); 952 assert(result.end == input.end+3, "'aBC' advances the index by 3 positions."); 953 assert(result.children is null, "'aBC' has no children."); 954 955 result = abc("AbCdEf"); 956 957 assert(result.successful, "'aBC' succeeds on inputs beginning with 'AbC'."); 958 assert(result.matches == ["aBC"], "'AbC' matches 'aBC' at the beginning."); 959 assert(result.input == input.input, "'aBC' does not change the input."); 960 assert(result.end == input.end+3, "'aBC' advances the index by 3 positions."); 961 assert(result.children is null, "'aBC' has no children."); 962 963 result = empty(input); 964 965 assert(result.name == `caseInsensitiveLiteral!("")`, "caseInsensitiveLiteral name test."); 966 assert(result.successful, "'' succeeds on non-null inputs."); 967 assert(result.matches == [""], "'' matches '' at the beginning."); 968 assert(result.input == input.input, "'' does not change the input."); 969 assert(result.end == input.end+0, "'' does not advance the index."); 970 assert(result.children is null, "'' has no children."); 971 972 result = empty("AbCdEf"); 973 974 assert(result.successful, "'' succeeds on non-null inputs."); 975 assert(result.matches == [""], "'' matches '' at the beginning."); 976 assert(result.input == input.input, "'' does not change the input."); 977 assert(result.end == input.end+0, "'' does not advance the index."); 978 assert(result.children is null, "'' has no children."); 979 980 input.input = "bCdEf"; 981 982 result = a(input); 983 984 assert(!result.successful, "'a' fails on inputs not beginning with 'a' or 'A'."); 985 assert(result.matches == ["\"a\""], "'a' makes no match on 'bCdEf'."); 986 assert(result.input == input.input, "'a' does not change the input."); 987 assert(result.end == input.end, "'a' does not advances the index on 'bCdEf'."); 988 assert(result.children is null, "'a' has no children."); 989 990 result = abc(input); 991 992 assert(!result.successful, "'aBC' fails on inputs beginning with 'bCdEf'."); 993 assert(result.matches == ["\"aBC\""], "'aBC' does no match on 'bCdEf'."); 994 assert(result.input == input.input, "'aBC' does not change the input."); 995 assert(result.end == input.end, "'aBC' does not advance the index on 'bCdEf'."); 996 assert(result.children is null, "'aBC' has no children."); 997 998 result = empty(input); 999 1000 assert(result.successful, "'' succeeds on non-null inputs."); 1001 assert(result.matches == [""], "'' matches '' at the beginning."); 1002 assert(result.input == input.input, "'' does not change the input."); 1003 assert(result.end == input.end+0, "'' does not advance the index."); 1004 assert(result.children is null, "'' has no children."); 1005 1006 input.input = ""; 1007 1008 result = a(input); 1009 1010 assert(!result.successful, "'a' fails on empty strings."); 1011 assert(result.matches == ["\"a\""], "'a' does not match ''."); 1012 assert(result.input == input.input, "'a' does not change the input."); 1013 assert(result.end == input.end, "'a' does not advance the index on 'bCdEf'."); 1014 assert(result.children is null, "'a' has no children."); 1015 1016 result = abc(input); 1017 1018 assert(!result.successful, "'aBC' fails on empty strings."); 1019 assert(result.matches == ["\"aBC\""], "'aBC' does not match ''."); 1020 assert(result.input == input.input, "'aBC' does not change the input."); 1021 assert(result.end == input.end, "'aBC' does not advance the index on 'bCdEf'."); 1022 assert(result.children is null, "'aBC' has no children."); 1023 1024 result = empty(input); 1025 1026 assert(result.successful, "'' succeeds on empty strings."); 1027 assert(result.matches == [""], "'' matches '' at the beginning, even on empty strings."); 1028 assert(result.input == input.input, "'' does not change the input."); 1029 assert(result.end == input.end+0, "'' does not advance the index."); 1030 assert(result.children is null, "'' has no children."); 1031 1032 input.input = "ÄöÜæØå"; 1033 result = caseInsensitiveLiteral!"äÖüÆøÅ"(input); 1034 1035 assert(result.successful, "Unicode characters are matched case insensitively."); 1036 } 1037 1038 /** 1039 Represents a range of chars, from begin to end, included. So charRange!('a','z') matches 1040 all English lowercase letters. If fails if the input is empty or does not begin with a character 1041 between begin and end. 1042 1043 If begin == end, it will match one char (begin... or end). 1044 1045 begin > end is non-legal. 1046 */ 1047 template charRange(dchar begin, dchar end) if (begin <= end) 1048 { 1049 enum name = "charRange!('"~to!string(begin)~"','" ~ to!string(end) ~ "')"; 1050 1051 ParseTree charRange(ParseTree p) 1052 { 1053 enum longname = "a char between '"~to!string(begin)~"' and '"~to!string(end)~"'"; 1054 if (p.end < p.input.length && p.input[p.end] >= begin && p.input[p.end] <= end) 1055 return ParseTree(name, true, [p.input[p.end..p.end+1]], p.input, p.end, p.end+1); 1056 else 1057 return ParseTree(name, false, [longname], p.input, p.end, p.end); 1058 } 1059 1060 ParseTree charRange(string input) 1061 { 1062 return .charRange!(begin,end)(ParseTree("",false,[],input)); 1063 } 1064 1065 string charRange(GetName g) 1066 { 1067 return name; 1068 } 1069 } 1070 1071 unittest // 'charRange' unit test 1072 { 1073 ParseTree input = ParseTree("input", true, [], "abcdef", 0,0, null); 1074 1075 alias charRange!('a','a') aa; 1076 alias charRange!('a','b') ab; 1077 alias charRange!('a','z') az; 1078 alias charRange!(dchar.min,dchar.max) allChars; 1079 alias charRange!('\U00000000','\U000000FF') ASCII; 1080 1081 static assert(!__traits(compiles, {alias charRange!('z','a') za;})); 1082 1083 ParseTree result = aa(input); 1084 1085 assert(result.name == "charRange!('a','a')", "charRange name test."); 1086 assert(result.successful, "'a-a' succeeds on inputs beginning with 'a'."); 1087 assert(result.matches == ["a"], "'a-a' matches the 'a' at the beginning."); 1088 assert(result.input == input.input, "'a-a' does not change the input."); 1089 assert(result.end == input.end+1, "'a-a' advances the index by one position."); 1090 assert(result.children is null, "'a-a' has no children."); 1091 1092 result = ab("abcdef"); 1093 1094 assert(result.name == "charRange!('a','b')", "charRange name test."); 1095 assert(result.successful, "'a-b' succeeds on inputs beginning with 'a'."); 1096 assert(result.matches == ["a"], "'a-b' matches the 'a' at the beginning."); 1097 assert(result.input == input.input, "'a-b' does not change the input."); 1098 assert(result.end == input.end+1, "'a-b' advances the index by one position."); 1099 assert(result.children is null, "'a-b' has no children."); 1100 1101 result = az(input); 1102 1103 assert(result.name == "charRange!('a','z')", "charRange name test."); 1104 assert(result.successful, "'a-z' succeeds on inputs beginning with 'abc'."); 1105 assert(result.matches == ["a"], "'a-z' matches 'a' at the beginning."); 1106 assert(result.input == input.input, "'a-z' does not change the input."); 1107 assert(result.end == input.end+1, "'a-z' advances the index by one position."); 1108 assert(result.children is null, "'a-z' has no children."); 1109 1110 input.input = "bcdef"; 1111 1112 result = aa(input); 1113 1114 assert(!result.successful, "'a-a' fails on inputs not beginning with 'a'."); 1115 assert(result.matches == ["a char between 'a' and 'a'"], "'a-a' makes no match on 'bcdef'."); 1116 assert(result.input == input.input, "'a-a' does not change the input."); 1117 assert(result.end == input.end, "'a-a' does not advances the index on 'bcdef'."); 1118 assert(result.children is null, "'a-a' has no children."); 1119 1120 result = ab(input); 1121 1122 assert(result.successful, "'a-b' succeeds on inputs beginning with 'b'."); 1123 assert(result.matches == ["b"], "'a-b' matches on 'bcdef'."); 1124 assert(result.input == input.input, "'a-b' does not change the input."); 1125 assert(result.end == input.end+1, "'a-b' advances the index by one position'."); 1126 assert(result.children is null, "'a-b' has no children."); 1127 1128 result = az(input); 1129 1130 assert(result.successful, "'a-z' succeeds on 'bcdef'."); 1131 assert(result.matches == ["b"], "'a-z' matches 'b' at the beginning of 'bcdef'."); 1132 assert(result.input == input.input, "'a-z' does not change the input."); 1133 assert(result.end == input.end+1, "'a-z' advances the index by one position."); 1134 assert(result.children is null, "'a-z' has no children."); 1135 1136 input.input = ""; 1137 1138 result = aa(input); 1139 1140 assert(!result.successful, "'a-a' fails on empty strings."); 1141 assert(result.matches == ["a char between 'a' and 'a'"], "'a-a' does not match ''."); 1142 assert(result.input == input.input, "'a-a' does not change the input."); 1143 assert(result.end == input.end, "'a-a' does not advance the index on ''."); 1144 assert(result.children is null, "'a-a' has no children."); 1145 1146 result = ab(input); 1147 1148 assert(!result.successful, "'a-b' fails on empty strings."); 1149 assert(result.matches == ["a char between 'a' and 'b'"], "'a-b' does not match ''."); 1150 assert(result.input == input.input, "'a-b' does not change the input."); 1151 assert(result.end == input.end, "'a-b' does not advance the index on ''."); 1152 assert(result.children is null, "'a-b' has no children."); 1153 1154 result = az(input); 1155 1156 assert(!result.successful, "'a-z' fails on empty strings."); 1157 assert(result.matches == ["a char between 'a' and 'z'"], "'a-z' does not match ''."); 1158 assert(result.input == input.input, "'a-z' does not change the input."); 1159 assert(result.end == input.end, "'a-z' does not advance the index on ''."); 1160 assert(result.children is null, "'a-z' has no children."); 1161 1162 input.input = "123"; 1163 1164 result = aa(input); 1165 1166 assert(!result.successful, "'a-a' fails on '123'."); 1167 assert(result.matches == ["a char between 'a' and 'a'"], "'a-a' does not match '123'."); 1168 assert(result.input == input.input, "'a-a' does not change the input."); 1169 assert(result.end == input.end, "'a-a' does not advance the index on '123'."); 1170 assert(result.children is null, "'a-a' has no children."); 1171 1172 result = ab(input); 1173 1174 assert(!result.successful, "'a-b' fails on '123'."); 1175 assert(result.matches == ["a char between 'a' and 'b'"], "'a-b' does not match '123'."); 1176 assert(result.input == input.input, "'a-b' does not change the input."); 1177 assert(result.end == input.end, "'a-b' does not advance the index on '123'."); 1178 assert(result.children is null, "'a-b' has no children."); 1179 1180 result = az(input); 1181 1182 assert(!result.successful, "'a-z' fails on '123'."); 1183 assert(result.matches == ["a char between 'a' and 'z'"], "'a-z' does not match '123'."); 1184 assert(result.input == input.input, "'a-z' does not change the input."); 1185 assert(result.end == input.end, "'a-z' does not advance the index on '123'."); 1186 assert(result.children is null, "'a-z' has no children."); 1187 1188 1189 foreach(dchar ch; 0..256*(128+64+16+8)) 1190 assert(allChars(to!string(ch)).successful); 1191 1192 assert(!allChars("").successful); 1193 } 1194 1195 /** 1196 eps matches the empty string (usually denoted by the Greek letter 'epsilon') and always succeeds. 1197 It's equivalent to literal!"" (for example, it creates a match of [""]: one match, the empty string). 1198 */ 1199 ParseTree eps(ParseTree p) pure nothrow 1200 { 1201 return ParseTree("eps", true, [""], p.input, p.end, p.end); 1202 } 1203 1204 ParseTree eps(string input) pure nothrow 1205 { 1206 return eps(ParseTree("",false,[], input)); 1207 } 1208 1209 string eps(GetName g) pure nothrow 1210 { 1211 return "eps"; 1212 } 1213 1214 unittest // 'eps' unit test 1215 { 1216 ParseTree input = ParseTree("input", true, [], "abcdef", 0,0, null); 1217 1218 ParseTree result = eps(input); 1219 1220 assert(result.name == "eps"); 1221 assert(result.successful, "'eps' succeeds on non-null inputs."); 1222 assert(result.matches == [""], "'eps' matches '' at the beginning."); 1223 assert(result.input == input.input, "'eps' does not change the input."); 1224 assert(result.end == input.end+0, "'eps' does not advance the index."); 1225 assert(result.children is null, "'eps' has no children."); 1226 1227 input.input = ""; 1228 1229 result = eps(input); 1230 assert(result.name == "eps"); 1231 assert(result.successful, "'eps' succeeds on empty strings."); 1232 assert(result.matches == [""], "'eps' matches '' at the beginning, even on empty strings."); 1233 assert(result.input == input.input, "'eps' does not change the input."); 1234 assert(result.end == input.end+0, "'eps' does not advance the index."); 1235 assert(result.children is null, "'eps' has no children."); 1236 } 1237 1238 1239 /** 1240 Basic operator: it matches if all its subrules (stored in the rules template parameter tuple) match 1241 the input successively. Its subrules parse trees are stored as its children and its matches field 1242 will contain all its subrules matches, in order. 1243 1244 ---- 1245 alias and!(literal!"abc", charRange!('a','z')) rule; // abc followed by any letter between a and z. 1246 ParseTree input = ParseTree("",false,[],"abcd"); // low-level plumbing, 1247 // the rules described here act on ParseTree's not strings. 1248 // It's equivalent to "abcd" as input 1249 auto result = rule(input); 1250 1251 assert(result.successful); // OK, 'abc' followed by 'd' 1252 assert(result.matches == ["abc", "d"]); // stores the matches 1253 assert(result.children.length == 2); // two children, the result of "abc" on "abcd" and the result of [a-z] on "d" 1254 1255 input.input = "abc"; // changing the input string; 1256 assert(!rule(input)).successful); // NOK, abc alone 1257 input.input = "ab"; 1258 assert(!rule(input)).successful); // NOK, does not begin by abc 1259 ---- 1260 1261 If it fails, the last children will contain the failed node. That way, when printing, as sort of diagnostic is given: 1262 1263 ---- 1264 alias and!(literal!"abc", charRange!('a','z')) rule; // 'abc[a-z]', aka 'abc' followed by any letter between 'a' and 'z'. 1265 ParseTree input = ParseTree("",false,[],"abc1"); // equivalent to "abc1" 1266 1267 auto failure = rule(input); 1268 writeln(failure); 1269 /+ 1270 writes: 1271 and (failure) 1272 +-literal(abc) [0, 3]["abc"] 1273 +-charRange(a,z) failure at line 0, col 3, after "abc" a char between 'a' and 'z', but got "1" 1274 +/ 1275 ---- 1276 1277 So we know the global 'and' failed, that the first sub-rule ('abc') succeeded on input[0..3] with "abc" 1278 and that the second subrule ('[a-z]') failed at position 3 (so, on '1'). 1279 */ 1280 template and(rules...) if (rules.length > 0) 1281 { 1282 string ctfeGetNameAnd() 1283 { 1284 string name = "and!("; 1285 foreach(i,rule; rules) 1286 name ~= __traits(identifier, rule) // because using getName!(rule) causes an infinite loop during compilation 1287 // for recursive rules 1288 ~ (i < rules.length -1 ? ", " : ""); 1289 name ~= ")"; 1290 return name; 1291 } 1292 1293 enum name = ctfeGetNameAnd(); 1294 1295 ParseTree and(ParseTree p) 1296 { 1297 bool keepNode(ParseTree node) 1298 { 1299 return node.name.startsWith("keep!(") 1300 || ( !node.name.startsWith("discard!(") 1301 //&& !node.name.startsWith("drop!(") 1302 && (node.matches !is null 1303 || node.failEnd >= node.end) 1304 //&& node.begin != node.end 1305 ); 1306 } 1307 1308 version (tracer) 1309 { 1310 incTraceLevel(); 1311 } 1312 1313 ParseTree result = ParseTree(name, false, [], p.input, p.end, p.end, []); 1314 1315 foreach(i,r; rules) 1316 { 1317 version (tracer) 1318 { 1319 if (shouldTrace(getName!(r)(), p)) 1320 trace(traceMsg(result, name, getName!(r)())); 1321 } 1322 ParseTree temp = r(result); 1323 result.end = temp.end; 1324 result.failEnd = max(result.failEnd, temp.failEnd); 1325 if (temp.successful) 1326 { 1327 if (keepNode(temp)) 1328 { 1329 result.matches ~= temp.matches; 1330 if (temp.name.startsWith("drop!(")) 1331 {} 1332 else if (temp.name.startsWith("propagate!(")) 1333 result.children ~= temp.children; 1334 else 1335 result.children ~= temp; 1336 } 1337 } 1338 else 1339 { 1340 auto firstLongestFailedMatch = result.children.firstLongestFailedMatch(temp.end); 1341 if (firstLongestFailedMatch == -1) { 1342 result.children ~= temp;// add the failed node, to indicate which failed 1343 if (temp.matches.length > 0) 1344 result.matches ~= temp.matches[$-1]; 1345 } else { 1346 // don't add the failed node because a previous one already failed further back 1347 result.children = result.children[0 .. firstLongestFailedMatch+1]; // discard any intermediate correct nodes 1348 // This current 'and' rule has failed parsing and there is a successful child 1349 // that had a longer failing match. We now want to revisit that child and modify it 1350 // so that it is no longer successful and we want to move its failedChild into its children. 1351 failedChildFixup(result.children[firstLongestFailedMatch], result.children[firstLongestFailedMatch].failEnd); 1352 } 1353 result.end = result.children.maxEnd(); 1354 result.failEnd = result.children.maxFailEnd(); 1355 version (tracer) 1356 { 1357 if (shouldTrace(getName!(r)(), p)) 1358 trace(traceResultMsg(result, getName!(r)())); 1359 decTraceLevel(); 1360 } 1361 return result; // and end the parsing attempt right there 1362 } 1363 } 1364 result.successful = true; 1365 version (tracer) 1366 { 1367 foreach(i, r; rules) 1368 if (shouldTrace(getName!(r)(), p)) 1369 { 1370 trace(traceResultMsg(result, name)); 1371 break; 1372 } 1373 decTraceLevel(); 1374 } 1375 return result; 1376 } 1377 1378 ParseTree and(string input) 1379 { 1380 return .and!(rules)(ParseTree("",false,[],input)); 1381 } 1382 1383 string and(GetName g) 1384 { 1385 return name; 1386 } 1387 1388 } 1389 1390 auto firstLongestFailedMatch(ParseTree[] children, size_t threshold) pure nothrow { 1391 return children.countUntil!(c => c.failEnd > threshold); 1392 } 1393 1394 auto maxFailEnd(ParseTree[] children) pure nothrow 1395 { 1396 return children.map!(c => c.failEnd).maxElement; 1397 } 1398 1399 auto maxEnd(ParseTree[] children) pure nothrow 1400 { 1401 return children.map!(c => c.end).maxElement; 1402 } 1403 1404 // A child ParseTree has kept track of an alternate ParseTree (in failedChild) that matches longer. 1405 // whenever the 'and' rule fails we want to rewrite that child so that the failedChild is 1406 // moved into its children, the successful is set to false, the end is set the its failEnd, 1407 // the failEnd is reset, and all that info is propagated upwards the tree so intermediate 1408 // nodes reflect the proper state. 1409 bool failedChildFixup(ref ParseTree p, size_t failEnd) pure nothrow 1410 { 1411 if (p.failedChild.length > 0) 1412 { 1413 p.children ~= p.failedChild[0]; 1414 p.failedChild = []; 1415 p.successful = false; 1416 p.end = p.failEnd; 1417 p.failEnd = p.children.map!(c => c.failEnd).maxElement(); 1418 return true; 1419 } 1420 else 1421 { 1422 bool result = false; 1423 foreach (ref c; p.children) 1424 { 1425 if (c.failEnd != failEnd) 1426 continue; 1427 if (failedChildFixup(c, failEnd)) 1428 { 1429 p.end = c.end; 1430 p.successful = false; 1431 p.failEnd = p.children.map!(c => c.failEnd).maxElement(); 1432 result = true; 1433 } 1434 } 1435 return result; 1436 } 1437 } 1438 1439 unittest // 'and' unit test 1440 { 1441 alias literal!"abc" abc; 1442 alias literal!"de" de; 1443 alias literal!"f" f; 1444 1445 alias and!(abc) abcAnd; 1446 alias and!(abc,de) abcde; 1447 alias and!(abc,de,f) abcdef; 1448 alias and!(eps, abc, eps, de, eps, f, eps) withEps; 1449 1450 //assert(getName!(abcAnd)() == `and!(literal!("abc"))`); 1451 //assert(getName!(abcde)() == `and!(literal!("abc"), literal!("de"))`); 1452 //assert(getName!(abcdef)() == `and!(literal!("abc"), literal!("de"), literal!("f"))`); 1453 1454 ParseTree input = ParseTree("",false,[], "abcdefghi"); 1455 1456 ParseTree result = abcAnd(input); 1457 1458 assert(result.successful, "and!('abc') parses 'abcdefghi'"); 1459 assert(result.matches == ["abc"], "and!('abc') matches 'abc' at the beginning of 'abcdefghi'"); 1460 assert(result.end == input.end+3, "and!('abc') advances the index by 'abc' size (3)."); 1461 assert(result.children == [abc(input)], "and!('abc') has one child: the one created by 'abc'."); 1462 1463 result = abcde(input); 1464 1465 assert(result.successful, "and!('abc','de') parses 'abcdefghi'"); 1466 assert(result.matches == ["abc","de"], 1467 "and!('abc','de') matches 'abc' and 'de' at the beginning of 'abcdefghi'"); 1468 assert(result.end == input.end+5, "and!('abc','de') advances the index by 3+2 positions."); 1469 assert(result.children == [abc(input), de(abc(input))], 1470 "and!('abc','de') has two children, created by 'abc' and 'de'."); 1471 1472 result = abcdef(input); 1473 1474 assert(result.successful, "and!('abc','de','f') parses 'abcdefghi'"); 1475 assert(result.matches == ["abc","de","f"], 1476 "and!('abc','de','f') matches 'abcdef' at the beginning of 'abcdefghi'"); 1477 assert(result.end == input.end+6, "and!('abc','de','f') advances the index by 3+2+1 positions."); 1478 assert(result.children == [abc(input), de(abc(input)), f(de(abc(input)))] 1479 , "and!('abc','de') has two children, created by 'abc' and 'de'."); 1480 1481 result = withEps(input); 1482 1483 assert(result.successful, "and!('','abc','','de','','f','') parses 'abcdefghi'"); 1484 assert(result.matches == ["","abc","","de","","f",""], 1485 "and!('','abc','','de','','f','') matches 'abcdef' at the beginning of 'abcdefghi'"); 1486 assert(result.end == input.end+6, 1487 "and!('','abc','','de','','f','') advances the index by 0+3+0+2+0+1+0 positions."); 1488 1489 input.input = "bcdefghi"; 1490 1491 result = abcdef(input); 1492 1493 assert(!result.successful, "'abc' 'de' 'f' fails on 'bcdefghi'"); 1494 //assert(result.matches is null, "'abc' 'de' 'f' has no match on 'bcdefghi'"); 1495 assert(result.end == input.end); 1496 assert(result.children == [abc(input)], "'abc' 'de' 'f' has one child (a failure) on 'bcdefghi'"); 1497 1498 input.input = "abc_efghi"; 1499 1500 result = abcdef(input); 1501 1502 assert(!result.successful, "'abc' 'de' 'f' fails on 'abc_efghi'"); 1503 //assert(result.matches == ["abc"], "Only the first match, from 'abc'."); 1504 assert(result.end == input.end+3, "Advances by 3 positions, due to 'abc'"); 1505 assert(result.children == [abc(input), de(abc(input))] 1506 , "'abc' 'de' 'f' has two child on 'abc_efghi', the one from 'abc' (success) and the one from 'de' (failure)."); 1507 } 1508 1509 version (unittest) { 1510 static ParseTree getError(ref ParseTree p) pure nothrow @nogc { 1511 if (p.children.length > 0) 1512 return getError(p.children[$-1]); 1513 return p; 1514 } 1515 } 1516 1517 unittest // 'and' unit test with zeroOrMore and longest failing match 1518 { 1519 alias literal!"abc" A; 1520 alias literal!"def" B; 1521 alias literal!"ghi" C; 1522 1523 alias and!(zeroOrMore!(and!(A,B)), C) Thing; 1524 1525 ParseTree input = ParseTree("",false,[], "abc"); 1526 ParseTree result = Thing(input); 1527 1528 assert(!result.successful); 1529 assert(getError(result).matches[$-1] == "\"def\"", "and!(zeroOrMore!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abc\""); 1530 assert(result.matches == []); 1531 } 1532 1533 unittest // 'and' unit test with option and longest failing match 1534 { 1535 alias literal!"abc" A; 1536 alias literal!"def" B; 1537 alias literal!"ghi" C; 1538 1539 alias and!(option!(and!(A,B)), C) Thing; 1540 1541 ParseTree input = ParseTree("",false,[], "abc"); 1542 ParseTree result = Thing(input); 1543 1544 assert(!result.successful); 1545 assert(getError(result).matches[$-1] == "\"def\"", "and!(option!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abc\""); 1546 assert(result.matches == []); 1547 } 1548 1549 unittest // 'and' unit test with oneOrMore and longest failing match 1550 { 1551 alias literal!"abc" A; 1552 alias literal!"def" B; 1553 alias literal!"ghi" C; 1554 1555 alias and!(oneOrMore!(and!(A,B)), C) Thing; 1556 1557 ParseTree input = ParseTree("",false,[], "abcdefabc"); 1558 ParseTree result = Thing(input); 1559 1560 assert(!result.successful); 1561 assert(getError(result).matches[$-1] == "\"def\"", "and!(oneOrMore!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abcdefabc\""); 1562 assert(result.matches == ["abc", "def"]); 1563 } 1564 1565 template wrapAround(alias before, alias target, alias after) 1566 { 1567 ParseTree wrapAround(ParseTree p) @safe 1568 { 1569 ParseTree temp = before(p); 1570 if (!temp.successful) 1571 return temp; 1572 1573 ParseTree result = target(temp); 1574 if (!result.successful) 1575 return result; 1576 result.begin = temp.begin; 1577 1578 temp = after(result); 1579 if (!temp.successful) 1580 return temp; 1581 1582 result.end = temp.end; 1583 return result; 1584 } 1585 1586 ParseTree wrapAround(string input) @safe 1587 { 1588 return .wrapAround!(before, target, after)(ParseTree("",false,[],input)); 1589 } 1590 1591 string wrapAround(GetName g) @safe 1592 { 1593 return "wrapAround!(" ~ getName!(before)() ~ 1594 ", " ~ getName!(target)() ~ 1595 ", " ~ getName!(after)() ~ ")"; 1596 } 1597 } 1598 1599 /** 1600 Basic operator: it matches if one of its subrules (stored in the rules template parameter tuple) match 1601 the input. The subrules are tested in order, from rules[0] to rules[$-1]. 1602 1603 The matching subrule parse trees is stored as its only child and its matches field 1604 will contain all the subrule matches, in order. 1605 1606 ---- 1607 alias or!(literal!"abc", charRange!('a','z')) rule; // abc or, failing that, any letter between a and z. 1608 ParseTree input = ParseTree("",false,[],"defg"); // low-level plumbing, the rules described here act on ParseTree's not strings. 1609 // It's equivalent to "defg" as input 1610 auto result = rule(input); 1611 1612 assert(result.successful); // OK 1613 assert(result.matches == ["d"]); // stores the (in this case) only match 1614 assert(result.children.length == 1); // one child, the result of "abc" or [a-z], depending on which rule succeeded. 1615 1616 input.input = "abc"; // changing the input string; 1617 assert(rule(input)).successful); // Still OK 1618 input.input = "1abc"; 1619 assert(!rule(input)).successful); // NOK, does not begin by abc nor by [a-z] 1620 ---- 1621 1622 If it fails, the last children will contain the failed node that matched furthest (longes match). 1623 That way, when printing, as sort of diagnostic is given: 1624 1625 ---- 1626 alias or!(literal!"abc", and!(literal!"ab", charRange!('0','9'))) rule; // 'abc' or 'ab[0-9]' 1627 ParseTree input = ParseTree("",false,[],"abd"); // equivalent to "abd" 1628 1629 auto failure = rule(input); 1630 writeln(failure); 1631 /+ 1632 or (failure) 1633 +-and (failure) 1634 +-literal(ab) [0, 2]["ab"] 1635 +-charRange(0,9) failure at line 0, col 2, after "ab" expected a char between '0' and '9', but got "d" 1636 +/ 1637 ---- 1638 1639 So we know 'or' failed, that the 'and' sub-rule had the longest match, matching 'ab' and failing for [0-9] on index 2. 1640 */ 1641 template or(rules...) if (rules.length > 0) 1642 { 1643 string ctfeGetNameOr() 1644 { 1645 string name = "or!("; 1646 foreach(i,rule; rules) 1647 name ~= getName!(rule) 1648 ~ (i < rules.length -1 ? ", " : ""); 1649 name ~= ")"; 1650 return name; 1651 } 1652 1653 enum name = ctfeGetNameOr(); 1654 1655 ParseTree or(ParseTree p) 1656 { 1657 // error-management 1658 ParseTree longestFail = ParseTree(name, false, [], p.input, p.end, 0); 1659 string[] errorStrings; 1660 size_t errorStringChars; 1661 string orErrorString; 1662 1663 ParseTree[rules.length] results; 1664 string[rules.length] names; 1665 size_t[rules.length] failedLength; 1666 size_t maxFailedLength; 1667 1668 version (tracer) 1669 { 1670 incTraceLevel(); 1671 } 1672 1673 // Real 'or' loop 1674 foreach(i,r; rules) 1675 { 1676 version (tracer) 1677 { 1678 if (shouldTrace(getName!(r)(), p)) 1679 trace(traceMsg(p, name, getName!(r)())); 1680 } 1681 ParseTree temp = r(p); 1682 if (temp.successful) 1683 { 1684 temp.children = [temp]; 1685 temp.name = name; 1686 // if there is a child that failed but parsed more 1687 if (longestFail.failEnd > temp.end) { 1688 temp.failEnd = longestFail.failEnd; 1689 temp.failedChild = [longestFail]; 1690 } 1691 version (tracer) 1692 { 1693 if (shouldTrace(getName!(r)(), p)) 1694 trace(traceResultMsg(temp, getName!(r)())); 1695 decTraceLevel(); 1696 } 1697 return temp; 1698 } 1699 else 1700 { 1701 version (tracer) 1702 { 1703 if (shouldTrace(getName!(r)(), p)) 1704 trace(traceResultMsg(temp, getName!(r)())); 1705 } 1706 enum errName = " (" ~ getName!(r)() ~")"; 1707 failedLength[i] = temp.end; 1708 if (temp.end >= longestFail.end) 1709 { 1710 if (temp.end == longestFail.end) 1711 errorStringChars += (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4; 1712 else 1713 errorStringChars = (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4; 1714 maxFailedLength = temp.end; 1715 longestFail = temp; 1716 names[i] = errName; 1717 results[i] = temp; 1718 1719 } 1720 // Else, this error parsed less input than another one: we discard it. 1721 } 1722 } 1723 version (tracer) 1724 { 1725 decTraceLevel(); 1726 } 1727 1728 1729 // All subrules failed, we will take the longest match as the result 1730 // If more than one node failed at the same (farthest) position, we concatenate their error messages 1731 1732 1733 char[] errString;// = new char[](errorStringChars); 1734 errString.length = errorStringChars; 1735 uint start = 0; 1736 foreach(i; 0..rules.length) 1737 { 1738 if (failedLength[i] == maxFailedLength && results[i].matches.length > 0) 1739 { 1740 auto temp = results[i]; 1741 auto len = temp.matches[$-1].length; 1742 auto nlen = names[i].length; 1743 errString[start .. start+len] = temp.matches[$-1][]; 1744 errString[start+len .. start+len+names[i].length] = names[i][]; 1745 errString[start+len+nlen .. start+len+nlen+4] = " or "; 1746 start += len + names[i].length + 4; 1747 } 1748 } 1749 () @trusted { orErrorString = cast(string)(errString[0..$-4]); } (); 1750 1751 longestFail.matches = longestFail.matches.length == 0 ? [orErrorString] : 1752 longestFail.matches[0..$-1] // discarding longestFail error message 1753 ~ [orErrorString]; // and replacing it by the new, concatenated one. 1754 auto children = results[].getUpto(maxFailedLength); 1755 return ParseTree(name, false, longestFail.matches, p.input, p.end, longestFail.end, children, children.maxFailEnd); 1756 } 1757 1758 ParseTree or(string input) 1759 { 1760 return .or!(rules)(ParseTree("",false,[],input)); 1761 } 1762 1763 string or(GetName g) 1764 { 1765 return name; 1766 } 1767 } 1768 1769 auto getUpto(ParseTree[] children, size_t minFailedLength) pure nothrow { 1770 return children.filter!(r => max(r.end, r.failEnd) >= minFailedLength).array(); 1771 } 1772 1773 unittest // 'or' unit test 1774 { 1775 alias charRange!('a','b') ab; 1776 alias charRange!('c','d') cd; 1777 1778 alias or!(ab) abOr; 1779 alias or!(ab,cd) abOrcd; 1780 1781 assert(getName!(ab)() == "charRange!('a','b')"); 1782 assert(getName!(cd)() == "charRange!('c','d')"); 1783 1784 ParseTree input = ParseTree("",false,[], "abcdefghi"); 1785 1786 ParseTree result = abOr(input); 1787 1788 assert(result.name == "or!(charRange!('a','b'))", "or name test."); 1789 assert(result.successful, "or!([a-b]) parses 'abcdefghi'"); 1790 assert(result.matches == ["a"], "or!([a-b]) matches 'a' at the beginning of 'abcdefghi'"); 1791 assert(result.end == input.end+1, "or!([a-b]) advances the index by 'a' size (1)."); 1792 assert(result.children == [ab(input)], "or!([a-b]) has one child: the one created by '[a-b]'."); 1793 1794 result = abOrcd(input); 1795 1796 assert(result.name == "or!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 1797 assert(result.successful, "or!([a-b],[c-d]) parses 'abcdefghi'"); 1798 assert(result.matches == ["a"], "or!([a-b],[c-d]) matches 'a' at the beginning of 'abcdefghi'"); 1799 assert(result.end == input.end+1, "or!([a-b],[c-d]) advances the index by 1 position."); 1800 1801 assert(result.children == [ab(input)], "or!([a-b],[c-d]) has one child, created by [a-b]."); 1802 1803 input.input = "cdefghi"; 1804 1805 result = abOrcd(input); 1806 1807 assert(result.name == "or!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 1808 assert(result.successful, "or!([a-b],[c-d]) parses 'cdefghi'"); 1809 assert(result.matches == ["c"], "or!([a-b],[c-d]) matches 'c' at the beginning of 'cdefghi'"); 1810 assert(result.end == input.end+1, "or!([a-b],[c-d]) advances the index by 1 position."); 1811 1812 assert(result.children == [cd(input)], "or!([a-b],[c-d]) has one child, created by [c-d]."); 1813 1814 input.input = "_abcdefghi"; 1815 1816 result = abOrcd(input); 1817 1818 assert(!result.successful, "or!([a-b],[c-d]) fails on '_abcdefghi'"); 1819 assert(result.end == input.end+0, "or!([a-b],[c-d]) does not advance the index."); 1820 assert(result.matches == 1821 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 1822 , "or!([a-b],[c-d]) error message. |" ~ result.matches[0]~ "|"); 1823 1824 input.input = ""; 1825 1826 result = abOrcd(input); 1827 1828 assert(!result.successful, "or!([a-b],[c-d]) fails on and empty input"); 1829 assert(result.end == input.end+0, "or!([a-b],[c-d]) does not advance the index."); 1830 assert(result.matches == 1831 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 1832 , "or!([a-b],[c-d]) error message."); 1833 } 1834 1835 /** 1836 Basic operator: it matches if one of its subrules (stored in the rules template parameter tuple) match 1837 the input. All subrules are tested and the one producing the longest match is taken. 1838 1839 The longest matching subrule's parse tree is stored as its only child and its matches field 1840 will contain all the subrule matches, in order. 1841 */ 1842 template longest_match(rules...) if (rules.length > 0) 1843 { 1844 string ctfeGetNameOr() 1845 { 1846 string name = "longest_match!("; 1847 foreach(i,rule; rules) 1848 name ~= getName!(rule) 1849 ~ (i < rules.length -1 ? ", " : ""); 1850 name ~= ")"; 1851 return name; 1852 } 1853 1854 enum name = ctfeGetNameOr(); 1855 1856 ParseTree longest_match(ParseTree p) 1857 { 1858 // error-management 1859 ParseTree longest, longestFail = ParseTree(name, false, [], p.input, p.end, 0); 1860 string[] errorStrings; 1861 size_t errorStringChars; 1862 string orErrorString; 1863 1864 ParseTree[rules.length] results; 1865 string[rules.length] names; 1866 size_t[rules.length] failedLength; 1867 size_t maxFailedLength; 1868 1869 version (tracer) 1870 { 1871 incTraceLevel(); 1872 } 1873 1874 // Real 'longest_match' loop 1875 foreach(i,r; rules) 1876 { 1877 version (tracer) 1878 { 1879 if (shouldTrace(getName!(r)(), p)) 1880 trace(traceMsg(p, name, getName!(r)())); 1881 } 1882 ParseTree temp = r(p); 1883 version (tracer) 1884 { 1885 if (shouldTrace(getName!(r)(), p)) 1886 trace(traceResultMsg(temp, getName!(r)())); 1887 } 1888 1889 if (temp.successful) 1890 { 1891 if (temp.end > longest.end) 1892 longest = temp; 1893 // Else, this rule parsed less input than another one: we discard it. 1894 } 1895 else 1896 { 1897 enum errName = " (" ~ getName!(r)() ~")"; 1898 failedLength[i] = temp.end; 1899 if (temp.end >= longestFail.end) 1900 { 1901 maxFailedLength = temp.end; 1902 longestFail = temp; 1903 names[i] = errName; 1904 results[i] = temp; 1905 1906 if (temp.end == longestFail.end) 1907 errorStringChars += (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4; 1908 else 1909 errorStringChars = (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4; 1910 } 1911 // Else, this error parsed less input than another one: we discard it. 1912 } 1913 } 1914 version (tracer) 1915 { 1916 decTraceLevel(); 1917 } 1918 if (longest.successful) 1919 { 1920 longest.children = [longest]; 1921 longest.name = name; 1922 return longest; 1923 } 1924 1925 // All subrules failed, we will take the longest match as the result 1926 // If more than one node failed at the same (farthest) position, we concatenate their error messages 1927 1928 char[] errString;// = new char[](errorStringChars); 1929 errString.length = errorStringChars; 1930 uint start = 0; 1931 foreach(i; 0..rules.length) 1932 { 1933 if (failedLength[i] == maxFailedLength && results[i].matches.length > 0) 1934 { 1935 auto temp = results[i]; 1936 auto len = temp.matches[$-1].length; 1937 auto nlen = names[i].length; 1938 errString[start .. start+len] = temp.matches[$-1][]; 1939 errString[start+len .. start+len+names[i].length] = names[i][]; 1940 errString[start+len+nlen .. start+len+nlen+4] = " or "; 1941 start += len + names[i].length + 4; 1942 } 1943 } 1944 () @trusted { orErrorString = cast(string)(errString[0..$-4]); } (); 1945 1946 longestFail.matches = longestFail.matches.length == 0 ? [orErrorString] : 1947 longestFail.matches[0..$-1] // discarding longestFail error message 1948 ~ [orErrorString]; // and replacing it by the new, concatenated one. 1949 longestFail.name = name; 1950 longestFail.begin = p.end; 1951 return longestFail; 1952 } 1953 1954 ParseTree longest_match(string input) 1955 { 1956 return .or!(rules)(ParseTree("",false,[],input)); 1957 } 1958 1959 string longest_match(GetName g) 1960 { 1961 return name; 1962 } 1963 } 1964 1965 unittest // 'longest_match' unit test 1966 { 1967 alias charRange!('a','b') ab; 1968 alias charRange!('c','d') cd; 1969 1970 alias longest_match!(ab) abOr; 1971 alias longest_match!(ab,cd) abOrcd; 1972 1973 assert(getName!(ab)() == "charRange!('a','b')"); 1974 assert(getName!(cd)() == "charRange!('c','d')"); 1975 1976 // These are the same tests as for 'or': 1977 1978 ParseTree input = ParseTree("",false,[], "abcdefghi"); 1979 1980 ParseTree result = abOr(input); 1981 1982 assert(result.name == "longest_match!(charRange!('a','b'))", "or name test."); 1983 assert(result.successful, "longest_match!([a-b]) parses 'abcdefghi'"); 1984 assert(result.matches == ["a"], "longest_match!([a-b]) matches 'a' at the beginning of 'abcdefghi'"); 1985 assert(result.end == input.end+1, "longest_match!([a-b]) advances the index by 'a' size (1)."); 1986 assert(result.children == [ab(input)], "longest_match!([a-b]) has one child: the one created by '[a-b]'."); 1987 1988 result = abOrcd(input); 1989 1990 assert(result.name == "longest_match!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 1991 assert(result.successful, "longest_match!([a-b],[c-d]) parses 'abcdefghi'"); 1992 assert(result.matches == ["a"], "longest_match!([a-b],[c-d]) matches 'a' at the beginning of 'abcdefghi'"); 1993 assert(result.end == input.end+1, "longest_match!([a-b],[c-d]) advances the index by 1 position."); 1994 1995 assert(result.children == [ab(input)], "longest_match!([a-b],[c-d]) has one child, created by [a-b]."); 1996 1997 input.input = "cdefghi"; 1998 1999 result = abOrcd(input); 2000 2001 assert(result.name == "longest_match!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 2002 assert(result.successful, "longest_match!([a-b],[c-d]) parses 'cdefghi'"); 2003 assert(result.matches == ["c"], "longest_match!([a-b],[c-d]) matches 'c' at the beginning of 'cdefghi'"); 2004 assert(result.end == input.end+1, "longest_match!([a-b],[c-d]) advances the index by 1 position."); 2005 2006 assert(result.children == [cd(input)], "longest_match!([a-b],[c-d]) has one child, created by [c-d]."); 2007 2008 input.input = "_abcdefghi"; 2009 2010 result = abOrcd(input); 2011 2012 assert(!result.successful, "longest_match!([a-b],[c-d]) fails on '_abcdefghi'"); 2013 assert(result.end == input.end+0, "longest_match!([a-b],[c-d]) does not advance the index."); 2014 assert(result.matches == 2015 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 2016 , "longest_match!([a-b],[c-d]) error message. |" ~ result.matches[0]~ "|"); 2017 2018 input.input = ""; 2019 2020 result = abOrcd(input); 2021 2022 assert(!result.successful, "longest_match!([a-b],[c-d]) fails on and empty input"); 2023 assert(result.end == input.end+0, "longest_match!([a-b],[c-d]) does not advance the index."); 2024 assert(result.matches == 2025 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 2026 , "longest_match!([a-b],[c-d]) error message."); 2027 2028 // Now test longest match behaviour: 2029 2030 input.input = "aaaccccb"; 2031 result = or!(oneOrMore!(literal!("a")), and!(oneOrMore!(literal!("a")), literal!("cc")))(input); 2032 assert(result.matches == ["a", "a", "a"], "or! takes the first matching rule."); 2033 result = longest_match!(oneOrMore!(literal!("a")), and!(oneOrMore!(literal!("a")), literal!("cc")))(input); 2034 assert(result.matches == ["a", "a", "a", "cc"], "longest_match! takes the longest matching rule."); 2035 } 2036 2037 2038 /** 2039 Compile-time switch trie from Brian Schott 2040 */ 2041 class Trie(V) : TrieNode!(V) 2042 { 2043 /** 2044 * Adds the given value to the trie with the given key 2045 */ 2046 void add(string key, V value) pure 2047 { 2048 TrieNode!(V) current = this; 2049 foreach(dchar keyPart; key) 2050 { 2051 if ((keyPart in current.children) is null) 2052 { 2053 auto node = new TrieNode!(V); 2054 current.children[keyPart] = node; 2055 current = node; 2056 } 2057 else 2058 current = current.children[keyPart]; 2059 } 2060 current.value = value; 2061 } 2062 } 2063 2064 class TrieNode(V) 2065 { 2066 V value; 2067 TrieNode!(V)[dchar] children; 2068 } 2069 2070 string printCaseStatements(V)(TrieNode!(V) node, string indentString) 2071 { 2072 string s = ""; 2073 string idnt = indentString; 2074 2075 void incIndent() { idnt ~= " "; } 2076 void decIndent() { idnt = idnt[2..$]; } 2077 2078 void put(string k) { s ~= idnt ~ k; } 2079 void append(string k) { s ~= k;} 2080 2081 foreach(dchar k, TrieNode!(V) v; node.children) 2082 { 2083 put("case '"); 2084 switch(k) 2085 { 2086 case '\n': append("\\n"); break; 2087 case '\t': append("\\t"); break; 2088 case '\r': append("\\r"); break; 2089 case 92: append("\\"); break; 2090 default: append(to!string(k)); 2091 } 2092 append("':\n"); 2093 incIndent(); 2094 2095 put("temp.end++;\n"); 2096 if (v.children.length > 0) 2097 { 2098 put("if (temp.end >= temp.input.length)\n"); 2099 put("{\n"); 2100 incIndent(); 2101 2102 if (node.children[k].value.length != 0) 2103 put("return ParseTree(name, true, [`" ~ node.children[k].value ~ "`], temp.input, p.end, temp.end)"); 2104 else 2105 put("return ParseTree(name, false, [failString], p.input, p.end, p.end)"); 2106 2107 append(";\n"); 2108 decIndent(); 2109 2110 put("}\n"); 2111 put("switch (temp.input[temp.end])\n"); 2112 put("{\n"); 2113 2114 incIndent(); 2115 append(printCaseStatements(v, idnt)); 2116 2117 put("default:\n"); 2118 incIndent(); 2119 if (v.value.length != 0) 2120 put("return ParseTree(name, true, [`" ~ v.value ~ "`], temp.input, p.end, temp.end)"); 2121 else 2122 put("return ParseTree(name, false, [failString], p.input, p.end, p.end)"); 2123 append(";\n"); 2124 decIndent(); 2125 decIndent(); 2126 put("}\n"); 2127 } 2128 else 2129 { 2130 if (v.value.length != 0) 2131 put("return ParseTree(name, true, [`" ~ v.value ~ "`], temp.input, p.end, temp.end)"); 2132 else 2133 put("return ParseTree(name, false, [failString], p.input, p.end, p.end)"); 2134 append(";\n"); 2135 } 2136 } 2137 return s; 2138 } 2139 2140 string generateCaseTrie(string[] args ...) 2141 { 2142 auto t = new Trie!(string); 2143 foreach(arg; args) 2144 { 2145 t.add(arg, arg); 2146 } 2147 return printCaseStatements(t, ""); 2148 } 2149 2150 /** 2151 or special case for literal list ("abstract"/"alias"/...) 2152 */ 2153 template keywords(kws...) if (kws.length > 0) 2154 { 2155 string ctfeGetNameKeywords() 2156 { 2157 string name= "keywords!("; 2158 foreach(i,kw;kws) 2159 name ~= "\"" ~ kw ~ "\""~ (i < kws.length -1 ? ", " : ""); 2160 name ~= ")"; 2161 return name; 2162 } 2163 2164 string ctfeConcatKeywords() 2165 { 2166 string s = "["; 2167 foreach(i, k; kws) 2168 { 2169 s ~= "\"" ~ k ~ "\""; 2170 if (i < kws.length - 1) 2171 s ~= ", "; 2172 } 2173 return s ~= "]"; 2174 } 2175 2176 enum name = ctfeGetNameKeywords(); 2177 enum failString = "one among " ~ ctfeConcatKeywords(); 2178 2179 ParseTree keywords(ParseTree p) 2180 { 2181 string keywordCode(string[] keywords) 2182 { 2183 string result; 2184 foreach(kw; keywords) 2185 result ~= "if (p.end+"~to!string(kw.length) ~ " <= p.input.length " 2186 ~" && p.input[p.end..p.end+"~to!string(kw.length)~"]==`" 2187 ~kw~"`) return ParseTree(`" 2188 ~name~"`,true,[`"~kw~"`],p.input,p.end,p.end+" 2189 ~to!string(kw.length)~");\n"; 2190 2191 result ~= "return ParseTree(`"~name~"`,false,[`" ~ failString ~ "`],p.input,p.end,p.end);"; 2192 2193 return result; 2194 } 2195 2196 static if (KEYWORDS == IFCHAIN) 2197 { 2198 mixin(keywordCode([kws])); 2199 } 2200 else static if (KEYWORDS == TRIE) 2201 { 2202 auto temp = p; 2203 if (p.end < p.input.length) // is this conditional required? 2204 { 2205 switch(p.input[p.end]) 2206 { 2207 mixin(generateCaseTrie([kws])); 2208 mixin("default: return ParseTree(`"~name~"`,false,[`" ~ failString ~ "`],p.input,p.end,p.end);"); 2209 } 2210 } 2211 else 2212 { 2213 mixin("return ParseTree(`"~name~"`,false,[`" ~ failString ~ "`],p.input,p.end,p.end);"); 2214 } 2215 } 2216 } 2217 2218 ParseTree keywords(string input) 2219 { 2220 return .keywords!(kws)(ParseTree("",false,[],input)); 2221 } 2222 2223 string keywords(GetName g) 2224 { 2225 return name; 2226 } 2227 } 2228 2229 unittest 2230 { 2231 alias keywords!("abc","de","f") kw; 2232 2233 assert(getName!(kw)() == `keywords!("abc", "de", "f")`); 2234 2235 ParseTree input = ParseTree("",false,[],"abcd"); 2236 2237 ParseTree result = kw(input); 2238 2239 assert(result.name == `keywords!("abc", "de", "f")`, "keywords name test."); 2240 assert(result.successful, "keywords success on `abcd`"); 2241 assert(result.matches == ["abc"], "keywords matches `abc` on `abcd`"); 2242 assert(result.end == input.end+3, "keywords advances the index by 3 positions."); 2243 assert(result.children is null, "No children for `keywords`."); 2244 2245 input.input = "def"; 2246 2247 result = kw(input); 2248 2249 assert(result.successful, "keywords success on `def`"); 2250 assert(result.matches == ["de"], "keywords matches `de` on `def`"); 2251 assert(result.end == input.end+2, "keywords advances the index by 2 positions."); 2252 assert(result.children is null, "No children for `keywords`."); 2253 2254 2255 input.input = "ab_def"; 2256 2257 result = kw(input); 2258 2259 assert(!result.successful, "keywords fails on `ab_def`."); 2260 assert(result.matches == [`one among ["abc", "de", "f"]`], "keywords error message." ~ result.matches[0]); 2261 assert(result.end == input.end, "keywords does not advance the index."); 2262 assert(result.children is null, "No children for `keywords`."); 2263 2264 input.input = ""; 2265 2266 result = kw(input); 2267 2268 assert(!result.successful, "keywords fails on an empty input."); 2269 assert(result.matches == [`one among ["abc", "de", "f"]`], "keywords error message."); 2270 assert(result.end == input.end, "keywords does not advance the index."); 2271 assert(result.children is null, "No children for `keywords`."); 2272 } 2273 2274 2275 /** 2276 Tries to match subrule 'r' zero or more times. It always succeeds, since if 'r' fails 2277 from the very beginning, it matched 'r' zero times... 2278 2279 Its matches are those of its subrules (they might be different for each match) and its 2280 children are all the parse trees returned by the successive application of 'r'. 2281 2282 ---- 2283 alias zeroOrMore!(or!(literal!"abc", literal!"d")) rule; // in PEG-speak: '("abc" / "d")*' 2284 ParseTree input = ParseTree("",false,[], "abcdabce"); 2285 2286 ParseTree result = rule(input); 2287 2288 assert(result.successful); 2289 assert(result.matches == ["abc", "d", "abc"]); 2290 assert(result.end == 7); // matched "abcdabce"[0..7] => "abcdabc". "e" at the end is not part of the parse tree. 2291 assert(result.children.length == 3); 2292 writeln(result); 2293 /+ 2294 writes: 2295 zeroOrMore [0, 7]["abc", "d", "abc"] 2296 +-or [0, 3]["abc"] 2297 | +-literal(abc) [0, 3]["abc"] 2298 +-or [3, 4]["d"] 2299 | +-literal(d) [3, 4]["d"] 2300 +-or [4, 7]["abc"] 2301 +-literal(abc) [4, 7]["abc"] 2302 +/ 2303 ---- 2304 2305 So we know the first child used the 'literal!"abc"' sub-rule and matched input[0..3]. 2306 The second matched input[3..4] and the third input[4..7]. 2307 2308 ---- 2309 input = ParseTree("",false,[], "efgh"); 2310 result = rule(input); 2311 assert(result.successful); // succeed, even though all patterns failed. 2312 assert(result.children.length == 0); 2313 ---- 2314 */ 2315 template zeroOrMore(alias r) 2316 { 2317 enum name = "zeroOrMore!(" ~ getName!(r) ~ ")"; 2318 2319 ParseTree zeroOrMore(ParseTree p) 2320 { 2321 auto result = ParseTree(name, true, [], p.input, p.end, p.end); 2322 version (tracer) 2323 { 2324 incTraceLevel(); 2325 if (shouldTrace(getName!(r)(), p)) 2326 trace(traceMsg(result, name, getName!(r)())); 2327 } 2328 auto temp = r(result); 2329 while(temp.successful 2330 && (temp.begin < temp.end // To avoid infinite loops on epsilon-matching rules 2331 || temp.name.startsWith("discard!("))) 2332 { 2333 result.matches ~= temp.matches; 2334 result.children ~= temp; 2335 result.end = temp.end; 2336 result.failEnd = max(result.failEnd, temp.failEnd); 2337 version (tracer) 2338 { 2339 if (shouldTrace(getName!(r)(), p)) 2340 trace(traceMsg(result, name, getName!(r)())); 2341 } 2342 temp = r(result); 2343 } 2344 auto maxFail = max(temp.failEnd, temp.end); 2345 if (maxFail > result.failEnd && maxFail > result.end) { 2346 result.failedChild = [temp]; 2347 result.failEnd = maxFail; 2348 } 2349 result.successful = true; 2350 version (tracer) 2351 { 2352 if (shouldTrace(getName!(r)(), p)) 2353 trace(traceResultMsg(result, getName!(r)())); 2354 decTraceLevel(); 2355 } 2356 return result; 2357 } 2358 2359 ParseTree zeroOrMore(string input) 2360 { 2361 return .zeroOrMore!(r)(ParseTree("",false,[],input)); 2362 } 2363 2364 string zeroOrMore(GetName g) 2365 { 2366 return name; 2367 } 2368 } 2369 2370 unittest // 'zeroOrMore' unit test 2371 { 2372 alias literal!"a" a; 2373 alias literal!"abc" abc; 2374 alias charRange!('a','z') az; 2375 2376 alias zeroOrMore!(a) as; 2377 alias zeroOrMore!(abc) abcs; 2378 alias zeroOrMore!(az) azs; 2379 2380 assert(getName!(as)() == `zeroOrMore!(literal!("a"))`); 2381 assert(getName!(abcs)() == `zeroOrMore!(literal!("abc"))`); 2382 assert(getName!(azs)() == `zeroOrMore!(charRange!('a','z'))`); 2383 2384 assert(as("").successful); 2385 assert(as("a").successful); 2386 assert(as("aa").successful); 2387 assert(as("aaa").successful); 2388 assert(as("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").successful); 2389 assert(as("b").successful); 2390 2391 ParseTree result = as("aaa"); 2392 2393 assert(result.name == `zeroOrMore!(literal!("a"))`); 2394 assert(result.successful); 2395 assert(result.matches == ["a","a","a"]); 2396 assert(result.begin == 0); 2397 assert(result.end == 3); 2398 assert(result.children.length == 3); 2399 assert(result.children == [ a("aaa"), a(a("aaa")), a(a(a("aaa")))]); 2400 2401 assert(abcs("").successful); 2402 assert(abcs("abc").successful); 2403 assert(abcs("abcabc").successful); 2404 assert(abcs("abcabcabc").successful); 2405 assert(abcs("abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc").successful); 2406 assert(abcs("ab").successful); 2407 2408 result = abcs("abcabcabc"); 2409 2410 assert(result.name == `zeroOrMore!(literal!("abc"))`); 2411 assert(result.successful); 2412 assert(result.matches == ["abc","abc","abc"]); 2413 assert(result.begin == 0); 2414 assert(result.end == 3*3); 2415 assert(result.children.length == 3); 2416 assert(result.children == [ abc("abcabcabc"), abc(abc("abcabcabc")), abc(abc(abc("abcabcabc")))]); 2417 2418 assert(azs("").successful); 2419 assert(azs("a").successful); 2420 assert(azs("abc").successful); 2421 assert(azs("abcdefghijklmnoqrstuvwxyz").successful); 2422 assert(azs("abcdefghijklmnoqrstuvwxyz 1234567890").successful); 2423 2424 result = azs("abc"); 2425 2426 assert(result.name == `zeroOrMore!(charRange!('a','z'))`); 2427 assert(result.successful); 2428 assert(result.matches == ["a","b","c"]); 2429 assert(result.begin == 0); 2430 assert(result.end == 3); 2431 assert(result.children.length == 3); 2432 assert(result.children == [ az("abc"), az(az("abc")), az(az(az("abc")))]); 2433 } 2434 2435 /** 2436 Tries to match subrule 'r' one or more times. If 'r' fails 2437 from the very beginning, it fails and else succeeds. 2438 2439 Its matches are those of its subrules (they might be different for each match) and its 2440 children are all the parse trees returned by the successive application of 'r'. 2441 2442 ---- 2443 alias oneOrMore!(or!(literal!"abc", literal!"d")) rule; // in PEG-speak: '("abc" / "d")*' 2444 ParseTree input = ParseTree("",false,[], "abcdabce"); 2445 2446 ParseTree result = rule(input); 2447 2448 assert(result.successful); 2449 assert(result.matches == ["abc", "d", "abc"]); 2450 assert(result.end == 7); // matched "abcdabce"[0..7] => "abcdabc". "e" at the end is not part of the parse tree. 2451 assert(result.children.length == 3); 2452 writeln(result); 2453 /+ 2454 writes: 2455 oneOrMore [0, 7]["abc", "d", "abc"] 2456 +-or [0, 3]["abc"] 2457 | +-literal(abc) [0, 3]["abc"] 2458 +-or [3, 4]["d"] 2459 | +-literal(d) [3, 4]["d"] 2460 +-or [4, 7]["abc"] 2461 +-literal(abc) [4, 7]["abc"] 2462 +/ 2463 ---- 2464 2465 So we know the first child used the 'literal!"abc"' sub-rule and matched input[0..3]. 2466 The second matched input[3..4] and the third input[4..7]. 2467 2468 ---- 2469 input = ParseTree("",false,[], "efgh"); 2470 result = rule(input); 2471 assert(!result.successful); // fails, since it failed on the first try. 2472 ---- 2473 */ 2474 template oneOrMore(alias r) 2475 { 2476 enum name = "oneOrMore!(" ~ getName!(r) ~ ")"; 2477 2478 ParseTree oneOrMore(ParseTree p) 2479 { 2480 auto result = ParseTree(name, false, [], p.input, p.end, p.end); 2481 version (tracer) 2482 { 2483 incTraceLevel(); 2484 if (shouldTrace(getName!(r)(), p)) 2485 trace(traceMsg(result, name, getName!(r)())); 2486 } 2487 auto temp = r(result); 2488 2489 if (!temp.successful) 2490 { 2491 result.matches = temp.matches; 2492 result.children = [temp]; 2493 result.end = temp.end; 2494 } 2495 else 2496 { 2497 while( temp.successful 2498 && (temp.begin < temp.end // To avoid infinite loops on epsilon-matching rules 2499 || temp.name.startsWith("discard!("))) 2500 { 2501 result.matches ~= temp.matches; 2502 result.children ~= temp; 2503 result.end = temp.end; 2504 result.failEnd = max(result.failEnd, temp.failEnd); 2505 version (tracer) 2506 { 2507 if (shouldTrace(getName!(r)(), p)) 2508 trace(traceMsg(result, name, getName!(r)())); 2509 } 2510 temp = r(result); 2511 } 2512 auto maxFail = max(temp.failEnd, temp.end); 2513 if (maxFail > result.failEnd && maxFail > result.end) { 2514 result.failedChild = [temp]; 2515 result.failEnd = maxFail; 2516 } 2517 result.successful = true; 2518 } 2519 version (tracer) 2520 { 2521 if (shouldTrace(getName!(r)(), p)) 2522 trace(traceResultMsg(result, getName!(r)())); 2523 decTraceLevel(); 2524 } 2525 return result; 2526 } 2527 2528 ParseTree oneOrMore(string input) 2529 { 2530 return .oneOrMore!(r)(ParseTree("",false,[],input)); 2531 } 2532 2533 string oneOrMore(GetName g) 2534 { 2535 return name; 2536 } 2537 } 2538 2539 unittest // 'oneOrMore' unit test 2540 { 2541 alias literal!"a" a; 2542 alias literal!"abc" abc; 2543 alias charRange!('a','z') az; 2544 2545 alias oneOrMore!(a) as; 2546 alias oneOrMore!(abc) abcs; 2547 alias oneOrMore!(az) azs; 2548 2549 assert(getName!(as)() == `oneOrMore!(literal!("a"))`); 2550 assert(getName!(abcs)() == `oneOrMore!(literal!("abc"))`); 2551 assert(getName!(azs)() == `oneOrMore!(charRange!('a','z'))`); 2552 2553 assert(!as("").successful); 2554 assert(as("a").successful); 2555 assert(as("aa").successful); 2556 assert(as("aaa").successful); 2557 assert(as("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").successful); 2558 assert(!as("b").successful); 2559 2560 ParseTree result = as("aaa"); 2561 2562 assert(result.name == `oneOrMore!(literal!("a"))`); 2563 assert(result.successful); 2564 assert(result.matches == ["a","a","a"]); 2565 assert(result.begin == 0); 2566 assert(result.end == 3); 2567 assert(result.children.length == 3); 2568 assert(result.children == [ a("aaa"), a(a("aaa")), a(a(a("aaa")))]); 2569 2570 assert(!abcs("").successful); 2571 assert(abcs("abc").successful); 2572 assert(abcs("abcabc").successful); 2573 assert(abcs("abcabcabc").successful); 2574 assert(abcs("abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc").successful); 2575 assert(!abcs("ab").successful); 2576 2577 result = abcs("abcabcabc"); 2578 2579 assert(result.name == `oneOrMore!(literal!("abc"))`); 2580 assert(result.successful); 2581 assert(result.matches == ["abc","abc","abc"]); 2582 assert(result.begin == 0); 2583 assert(result.end == 3*3); 2584 assert(result.children.length == 3); 2585 assert(result.children == [ abc("abcabcabc"), abc(abc("abcabcabc")), abc(abc(abc("abcabcabc")))]); 2586 2587 assert(!azs("").successful); 2588 assert(azs("a").successful); 2589 assert(azs("abc").successful); 2590 assert(azs("abcdefghijklmnoqrstuvwxyz").successful); 2591 assert(azs("abcdefghijklmnoqrstuvwxyz 1234567890").successful); 2592 assert(!azs(".").successful); 2593 2594 result = azs("abc"); 2595 2596 assert(result.name == `oneOrMore!(charRange!('a','z'))`); 2597 assert(result.successful); 2598 assert(result.matches == ["a","b","c"]); 2599 assert(result.begin == 0); 2600 assert(result.end == 3); 2601 assert(result.children.length == 3); 2602 assert(result.children == [ az("abc"), az(az("abc")), az(az(az("abc")))]); 2603 } 2604 2605 /** 2606 Given a subrule 'r', represents the expression 'r?'. It tries to match 'r' and if this matches 2607 successfully, it returns this match. If 'r' failed, 'r?' is still a success, but without any child nor match. 2608 2609 ---- 2610 alias option!(literal!"abc") rule; // Aka '"abc"?' 2611 ParseTree input = ParseTree("",false,[],"abcd"); 2612 2613 ParseTree result = rule(input); 2614 assert(result.successful); 2615 assert(result.matches == ["abc"]); 2616 assert(result.children.length == 1); 2617 assert(result.children[0] == literal!"abc"(input)); 2618 ---- 2619 */ 2620 template option(alias r) 2621 { 2622 enum name = "option!(" ~ getName!(r) ~ ")"; 2623 2624 ParseTree option(ParseTree p) 2625 { 2626 version (tracer) 2627 { 2628 if (shouldTrace(getName!(r)(), p)) 2629 trace(traceMsg(p, name, getName!(r)())); 2630 } 2631 ParseTree result = r(p); 2632 if (result.successful) 2633 return ParseTree(name, true, result.matches, result.input, result.begin, result.end, [result], result.failEnd); 2634 else 2635 return ParseTree(name, true, [], p.input, p.end, p.end, null, max(result.end,result.failEnd), [result]); 2636 } 2637 2638 ParseTree option(string input) 2639 { 2640 return .option!(r)(ParseTree("",false,[],input)); 2641 } 2642 2643 string option(GetName g) 2644 { 2645 return name; 2646 } 2647 } 2648 2649 unittest // 'option' unit test 2650 { 2651 alias literal!"a" a; 2652 alias literal!"abc" abc; 2653 2654 alias option!(a) a_; 2655 alias option!(abc) abc_; 2656 2657 assert(getName!(a_)() == `option!(literal!("a"))`); 2658 assert(getName!(abc_)() == `option!(literal!("abc"))`); 2659 2660 assert(a_("").successful); 2661 assert(a_("a").successful); 2662 assert(a_("aa").successful); 2663 assert(a_("b").successful); 2664 2665 ParseTree result = a_("a"); 2666 2667 assert(result.name == `option!(literal!("a"))`); 2668 assert(result.successful); 2669 assert(result.matches == ["a"]); 2670 assert(result.begin == 0); 2671 assert(result.end == 1); 2672 assert(result.children.length == 1); 2673 assert(result.children == [ a("a")]); 2674 2675 result = a_(""); 2676 2677 assert(result.name == `option!(literal!("a"))`); 2678 assert(result.successful); 2679 assert(result.matches is null); 2680 assert(result.begin == 0); 2681 assert(result.end == 0); 2682 assert(result.children.length == 0); 2683 2684 2685 assert(abc_("").successful); 2686 assert(abc_("abc").successful); 2687 assert(abc_("abcabc").successful); 2688 assert(abc_("ab").successful); 2689 2690 result = abc_("abcdef"); 2691 2692 assert(result.name == `option!(literal!("abc"))`); 2693 assert(result.successful); 2694 assert(result.matches == ["abc"]); 2695 assert(result.begin == 0); 2696 assert(result.end == 3); 2697 assert(result.children.length == 1); 2698 assert(result.children == [ abc("abcdef")]); 2699 2700 result = abc_("def"); 2701 2702 assert(result.name == `option!(literal!("abc"))`); 2703 assert(result.successful); 2704 assert(result.matches is null); 2705 assert(result.begin == 0); 2706 assert(result.end == 0); 2707 assert(result.children.length == 0); 2708 } 2709 2710 /** 2711 Tries 'r' on the input. If it succeeds, the rule also succeeds, without consuming any input. 2712 If 'r' fails, then posLookahead!r also fails. Low-level implementation of '&r'. 2713 */ 2714 template posLookahead(alias r) 2715 { 2716 enum name = "posLookahead!(" ~ getName!(r) ~ ")"; 2717 2718 ParseTree posLookahead(ParseTree p) 2719 { 2720 version (tracer) 2721 { 2722 if (shouldTrace(getName!(r)(), p)) 2723 trace(traceMsg(p, name, getName!(r)())); 2724 } 2725 ParseTree temp = r(p); 2726 if (temp.successful) 2727 return ParseTree(name, temp.successful, [], p.input, p.end, p.end); 2728 else 2729 return ParseTree(name, temp.successful, [temp.matches[$-1]], p.input, p.end, p.end); 2730 } 2731 2732 ParseTree posLookahead(string input) 2733 { 2734 return .posLookahead!(r)(ParseTree("",false,[],input)); 2735 } 2736 2737 string posLookahead(GetName g) 2738 { 2739 return name; 2740 } 2741 } 2742 2743 unittest // 'posLookahead' unit test 2744 { 2745 alias literal!"a" a; 2746 alias literal!"abc" abc; 2747 2748 alias posLookahead!(a) a_; 2749 alias posLookahead!(abc) abc_; 2750 2751 assert(getName!(a_)() == `posLookahead!(literal!("a"))`); 2752 assert(getName!(abc_)() == `posLookahead!(literal!("abc"))`); 2753 2754 assert(!a_("").successful); 2755 assert(a_("a").successful); 2756 assert(a_("aa").successful); 2757 assert(!a_("b").successful); 2758 2759 ParseTree result = a_("a"); 2760 2761 assert(result.name == `posLookahead!(literal!("a"))`); 2762 assert(result.successful); 2763 assert(result.matches is null); 2764 assert(result.begin == 0); 2765 assert(result.end == 0); 2766 assert(result.children.length == 0); 2767 2768 result = a_(""); 2769 2770 assert(result.name == `posLookahead!(literal!("a"))`); 2771 assert(!result.successful); 2772 assert(result.matches == [`"a"`], "posLookahead error message."); 2773 assert(result.begin == 0); 2774 assert(result.end == 0); 2775 assert(result.children.length == 0); 2776 2777 assert(!abc_("").successful); 2778 assert(abc_("abc").successful); 2779 assert(abc_("abcabc").successful); 2780 assert(!abc_("ab").successful); 2781 2782 result = abc_("abcdef"); 2783 2784 assert(result.name == `posLookahead!(literal!("abc"))`); 2785 assert(result.successful); 2786 assert(result.matches is null); 2787 assert(result.begin == 0); 2788 assert(result.end == 0); 2789 assert(result.children.length == 0); 2790 2791 result = abc_("def"); 2792 2793 assert(result.name == `posLookahead!(literal!("abc"))`); 2794 assert(!result.successful); 2795 assert(result.matches == [`"abc"`], "posLookahead error message."); 2796 assert(result.begin == 0); 2797 assert(result.end == 0); 2798 assert(result.children.length == 0); 2799 } 2800 2801 /** 2802 Tries 'r' on the input. If it fails, the rule succeeds, without consuming any input. 2803 If 'r' succeeds, then negLookahead!r fails. Low-level implementation of '!r'. 2804 */ 2805 template negLookahead(alias r) 2806 { 2807 enum name = "negLookahead!(" ~ getName!(r) ~ ")"; 2808 2809 ParseTree negLookahead(ParseTree p) 2810 { 2811 version (tracer) 2812 { 2813 if (shouldTrace(getName!(r)(), p)) 2814 trace(traceMsg(p, name, getName!(r)())); 2815 } 2816 ParseTree temp = r(p); 2817 if (temp.successful) 2818 return ParseTree(name, false, ["anything but \"" ~ p.input[temp.begin..temp.end] ~ "\""], p.input, p.end, p.end); 2819 else 2820 return ParseTree(name, true, [], p.input, p.end, p.end); 2821 } 2822 2823 ParseTree negLookahead(string input) 2824 { 2825 return .negLookahead!(r)(ParseTree("",false,[],input)); 2826 } 2827 2828 string negLookahead(GetName g) 2829 { 2830 return name; 2831 } 2832 } 2833 2834 unittest // 'negLookahead' unit test 2835 { 2836 alias literal!"a" a; 2837 alias literal!"abc" abc; 2838 2839 alias negLookahead!(a) a_; 2840 alias negLookahead!(abc) abc_; 2841 2842 assert(getName!(a_)() == `negLookahead!(literal!("a"))`); 2843 assert(getName!(abc_)() == `negLookahead!(literal!("abc"))`); 2844 2845 assert(a_("").successful); 2846 assert(!a_("a").successful); 2847 assert(!a_("aa").successful); 2848 assert(a_("b").successful); 2849 2850 ParseTree result = a_("a"); 2851 2852 assert(result.name == `negLookahead!(literal!("a"))`); 2853 assert(!result.successful); 2854 assert(result.matches == [`anything but "a"`]); 2855 assert(result.begin == 0); 2856 assert(result.end == 0); 2857 assert(result.children.length == 0); 2858 2859 result = a_(""); 2860 2861 assert(result.name == `negLookahead!(literal!("a"))`); 2862 assert(result.successful); 2863 assert(result.matches is null); 2864 assert(result.begin == 0); 2865 assert(result.end == 0); 2866 assert(result.children.length == 0); 2867 2868 assert(abc_("").successful); 2869 assert(!abc_("abc").successful); 2870 assert(!abc_("abcabc").successful); 2871 assert(abc_("ab").successful); 2872 2873 result = abc_("abcdef"); 2874 2875 assert(result.name == `negLookahead!(literal!("abc"))`); 2876 assert(!result.successful); 2877 assert(result.matches == [`anything but "abc"`]); 2878 assert(result.begin == 0); 2879 assert(result.end == 0); 2880 assert(result.children.length == 0); 2881 2882 result = abc_("def"); 2883 2884 assert(result.name == `negLookahead!(literal!("abc"))`); 2885 assert(result.successful); 2886 assert(result.matches is null); 2887 assert(result.begin == 0); 2888 assert(result.end == 0); 2889 assert(result.children.length == 0); 2890 } 2891 2892 /** 2893 Internal helper template, to get a parse tree node with a name. For example, given: 2894 ---- 2895 alias or!(literal!("abc"), charRange!('0','9')) myRule; 2896 ---- 2897 2898 myRule gives nodes named "or", since its the parent rule. If you want nodes to be named "myRule", 2899 use named. named just overwrites the original root node name, the children are left untouched. 2900 2901 See_Also: defined. 2902 2903 ---- 2904 alias or!(literal!("abc"), charRange!('0','9')) rule; 2905 alias named!(rule, "myRule") myRule; 2906 2907 auto input = "abc3"; 2908 auto p1 = rule(input); 2909 auto p2 = myRule(input); 2910 2911 // They are both successful 2912 assert(p1.successful && p2.successful); 2913 assert(p1.matches == p2.matches); 2914 // But the names are different 2915 assert(p1.name == `or!(literal!("abc"), charRange!('0','9'))`); 2916 assert(p2.name == `myRule`); 2917 // Same children: 2918 assert(p2.children == p1.children); 2919 2920 ---- 2921 */ 2922 template named(alias r, string name) 2923 { 2924 ParseTree named(ParseTree p) 2925 { 2926 ParseTree result = r(p); 2927 result.name = name; 2928 return result; 2929 } 2930 2931 ParseTree named(string input) 2932 { 2933 return .named!(r,name)(ParseTree("",false,[],input)); 2934 } 2935 2936 string named(GetName g) 2937 { 2938 return name; 2939 } 2940 } 2941 2942 unittest // 'named' unit test 2943 { 2944 alias or!(literal!("abc"), charRange!('0','9')) rule; 2945 alias named!(rule, "myRule") myRule; 2946 2947 assert(getName!(rule)() == `or!(literal!("abc"), charRange!('0','9'))`); 2948 assert(getName!(myRule)() == "myRule"); 2949 2950 // Equality on success (except for the name) 2951 ParseTree result = rule("abc0"); 2952 ParseTree myResult = myRule("abc0"); 2953 2954 assert(myResult.successful == result.successful); 2955 assert(myResult.name == "myRule"); 2956 assert(myResult.matches == result.matches); 2957 assert(myResult.begin == result.begin); 2958 assert(myResult.end == result.end); 2959 assert(myResult.children == result.children); 2960 2961 // Equality on failure (except for the name) 2962 result = rule("_abc"); 2963 myResult = myRule("_abc"); 2964 2965 assert(myResult.successful == result.successful); 2966 assert(myResult.name == "myRule"); 2967 assert(myResult.matches == result.matches); 2968 assert(myResult.begin == result.begin); 2969 assert(myResult.end == result.end); 2970 assert(myResult.children == result.children); 2971 } 2972 2973 /** 2974 Internal helper template, to get a parse tree node with a name, while keeping the original node (see also named). 2975 For example, given: 2976 ---- 2977 alias or!(literal!("abc"), charRange!('0','9')) myRule; 2978 ---- 2979 2980 myRule gives nodes named "or", since its the parent rule. If you want nodes to be named "myRule", 2981 use defined. Contrary to named (see before), the original node is pushed as the child. 2982 2983 See_Also: named. 2984 2985 ---- 2986 alias or!(literal!("abc"), charRange!('0','9')) rule; 2987 alias defined!(rule, "myRule") myRule; 2988 2989 auto input = "abc3"; 2990 auto p1 = rule(input); 2991 auto p2 = myRule(input); 2992 2993 // They are both successful 2994 assert(p1.successful && p2.successful); 2995 assert(p1.matches == p2.matches); 2996 // But the names are different 2997 assert(p1.name == `or!(literal!("abc"), charRange!('0','9'))`); 2998 assert(p2.name == `myRule`); 2999 // Here the original tree is not discarded: 3000 assert(p2.children[0] == p1); 3001 ---- 3002 */ 3003 template defined(alias r, string name) 3004 { 3005 ParseTree defined(ParseTree p) 3006 { 3007 ParseTree result = r(p); 3008 result.children = [result]; 3009 result.name = name; 3010 return result; 3011 } 3012 3013 ParseTree defined(string input) 3014 { 3015 return .defined!(r,name)(ParseTree("",false,[],input)); 3016 } 3017 3018 string defined(GetName g) 3019 { 3020 return name; 3021 } 3022 } 3023 3024 unittest // 'defined' unit test 3025 { 3026 alias or!(literal!("abc"), charRange!('0','9')) rule; 3027 alias defined!(rule, "myRule") myRule; 3028 3029 assert(getName!(rule)() == `or!(literal!("abc"), charRange!('0','9'))`); 3030 assert(getName!(myRule)() == "myRule"); 3031 3032 // Equality on success (except for the name) 3033 ParseTree result = rule("abc0"); 3034 ParseTree myResult = myRule("abc0"); 3035 3036 assert(myResult.successful && result.successful); 3037 assert(myResult.name == "myRule"); 3038 assert(myResult.matches == result.matches); 3039 assert(myResult.begin == result.begin); 3040 assert(myResult.end == result.end); 3041 assert(myResult.children[0] == result); 3042 3043 // Equality on failure (except for the name) 3044 result = rule("_abc"); 3045 myResult = myRule("_abc"); 3046 3047 assert(!myResult.successful && !result.successful); 3048 assert(myResult.name == "myRule"); 3049 assert(myResult.matches == result.matches); 3050 assert(myResult.begin == result.begin); 3051 assert(myResult.end == result.end); 3052 assert(myResult.children[0] == result); 3053 } 3054 3055 /** 3056 Low-level representation for the expression 'r {act}'. That is, it applies rule 'r' 3057 on the input and then calls 'act' on the resulting ParseTree. 3058 */ 3059 template action(alias r, alias act) 3060 { 3061 ParseTree action(ParseTree p) 3062 { 3063 return act(r(p)); 3064 } 3065 3066 ParseTree action(string input) 3067 { 3068 return .action!(r,act)(ParseTree("",false,[],input)); 3069 } 3070 3071 string action(GetName g) 3072 { 3073 enum name = "action!("~ getName!(r)() ~ ", " ~ __traits(identifier, act) ~ ")"; 3074 return name; 3075 } 3076 } 3077 3078 unittest // 'action' unit test 3079 { 3080 ParseTree foo(ParseTree p) 3081 { 3082 p.matches ~= p.matches; // doubling matches 3083 return p; 3084 } 3085 3086 alias literal!("abc") abc; 3087 3088 alias action!(abc, foo) abcfoo; 3089 3090 assert(getName!(abcfoo) == `action!(literal!("abc"), foo)`); 3091 3092 ParseTree result = abcfoo("abc"); 3093 ParseTree reference = abc("abc"); 3094 3095 assert(result.successful == reference.successful); 3096 assert(result.successful); 3097 assert(result.matches == reference.matches ~ reference.matches); 3098 assert(result.begin == reference.begin); 3099 assert(result.end == reference.end); 3100 assert(result.children == reference.children); 3101 3102 // On failure 3103 result = abcfoo("_abc"); 3104 reference = abc("_abc"); 3105 3106 assert(result.successful == reference.successful); 3107 assert(!result.successful); 3108 assert(result.matches == reference.matches ~ reference.matches); 3109 assert(result.begin == reference.begin); 3110 assert(result.end == reference.end); 3111 assert(result.children == reference.children); 3112 } 3113 3114 /** 3115 Concatenates a ParseTree's matches as one match and discards its children. Equivalent to the expression '~r'. 3116 */ 3117 template fuse(alias r) 3118 { 3119 ParseTree fuse(ParseTree p) 3120 { 3121 p = r(p); 3122 if(p.successful) 3123 { 3124 if (p.matches.length != 0) 3125 p.matches = [std.array.join(p.matches)]; 3126 3127 p.children = null; // also discard children 3128 } 3129 return p; 3130 } 3131 3132 ParseTree fuse(string input) 3133 { 3134 return .fuse!(r)(ParseTree("",false,[],input)); 3135 } 3136 3137 string fuse(GetName g) 3138 { 3139 enum name = "fuse!(" ~ getName!(r)() ~ ")"; 3140 return name; 3141 } 3142 } 3143 3144 unittest // 'fuse' unit test 3145 { 3146 alias oneOrMore!(literal!("abc")) abcs; 3147 3148 alias fuse!(abcs) f; 3149 3150 assert(getName!(f) == `fuse!(oneOrMore!(literal!("abc")))`); 3151 3152 ParseTree result = f("abcabcabc"); 3153 ParseTree reference = abcs("abcabcabc"); 3154 3155 assert(result.successful == reference.successful); 3156 assert(result.successful); 3157 assert(reference.matches == ["abc", "abc", "abc"]); 3158 assert(result.matches == ["abcabcabc"]); 3159 assert(result.begin == reference.begin); 3160 assert(result.end == reference.end); 3161 assert(result.children is null); 3162 3163 // On failure 3164 result = f("_abc"); 3165 reference = abcs("_abc"); 3166 3167 assert(result.successful == reference.successful); 3168 assert(!result.successful); 3169 assert(result.matches == reference.matches); 3170 assert(result.begin == reference.begin); 3171 assert(result.end == reference.end); 3172 assert(result.children == reference.children); 3173 3174 alias discard!(literal!("abc")) dabc; 3175 alias fuse!(dabc) f2; 3176 3177 result = f2("abcabc"); 3178 reference = dabc("abcabc"); 3179 3180 assert(result.successful); 3181 assert(result.matches is null); 3182 assert(result.begin == reference.begin); 3183 assert(result.end == reference.end); 3184 assert(result.children == reference.children); 3185 } 3186 3187 /** 3188 Calls 'r' on the input and then discards its children nodes. 3189 */ 3190 template discardChildren(alias r) 3191 { 3192 ParseTree discardChildren(ParseTree p) 3193 { 3194 p = r(p); 3195 p.children = null; 3196 return p; 3197 } 3198 3199 ParseTree discardChildren(string input) 3200 { 3201 return .discardChildren!(r)(ParseTree("",false,[],input)); 3202 } 3203 3204 string discardChildren(GetName g) 3205 { 3206 enum name = "discardChildren!(" ~ getName!(r)() ~ ")"; 3207 return name; 3208 } 3209 } 3210 3211 /** 3212 Calls 'r' on the input and then discards its matches. 3213 */ 3214 template discardMatches(alias r) 3215 { 3216 ParseTree discardMatches(ParseTree p) 3217 { 3218 p = r(p); 3219 if (p.successful) 3220 p.matches = null; 3221 return p; 3222 } 3223 3224 ParseTree discardMatches(string input) 3225 { 3226 return .discardMatches!(r)(ParseTree("",false,[],input)); 3227 } 3228 3229 string discardMatches(GetName g) 3230 { 3231 enum name = "discardMatches!(" ~ getName!(r)() ~ ")"; 3232 return name; 3233 } 3234 } 3235 3236 /** 3237 Calls 'r' on the input and then discard everything 'r' returned: no children, no match and index 3238 put at the end of the match. It's the low-level engine behind ':r'. 3239 */ 3240 template discard(alias r) 3241 { 3242 ParseTree discard(ParseTree p) 3243 { 3244 ParseTree result = r(p); 3245 result.name = "discard!(" ~ getName!(r)() ~ ")"; 3246 //result.begin = result.end; 3247 result.children = null; 3248 if (result.successful) 3249 result.matches = null;//to keep error messages, if any 3250 3251 return result; 3252 } 3253 3254 ParseTree discard(string input) 3255 { 3256 return .discard!(r)(ParseTree("",false,[],input)); 3257 } 3258 3259 string discard(GetName g) 3260 { 3261 return "discard!(" ~ getName!(r)() ~ ")"; 3262 } 3263 } 3264 3265 unittest // 'discard' unit test 3266 { 3267 alias literal!"abc" abc; 3268 alias oneOrMore!abc abcs; 3269 alias discard!(literal!("abc")) dabc; 3270 alias discard!(oneOrMore!(literal!("abc")))dabcs; 3271 3272 ParseTree reference = abc("abc"); 3273 ParseTree result =dabc("abc"); 3274 3275 assert(result.successful == reference.successful); 3276 assert(result.successful); 3277 assert(result.name == `discard!(literal!("abc"))`); 3278 assert(result.matches is null); 3279 assert(result.begin == reference.begin); 3280 assert(result.end == reference.end); 3281 assert(result.children is null); 3282 3283 reference = abcs("abcabcabc"); 3284 result = dabcs("abcabcabc"); 3285 3286 assert(result.successful == reference.successful); 3287 assert(result.successful); 3288 assert(result.name == `discard!(oneOrMore!(literal!("abc")))`); 3289 assert(result.matches is null); 3290 assert(result.begin == reference.begin); 3291 assert(result.end == reference.end); 3292 assert(result.children is null); 3293 3294 // On failure 3295 reference = abcs(""); 3296 result = dabcs(""); 3297 3298 assert(result.successful == reference.successful); 3299 assert(!result.successful); 3300 assert(result.name == `discard!(oneOrMore!(literal!("abc")))`); 3301 assert(result.matches == [`"abc"`], "discard error message."); 3302 assert(result.begin == reference.begin); 3303 assert(result.end == reference.end); 3304 assert(result.children is null); 3305 3306 // Action on 'and' 3307 alias and!(abc,dabc,abc) discardMiddle; 3308 3309 result = discardMiddle("abcabcabc"); 3310 assert(result.successful); 3311 assert(result.matches == ["abc", "abc"]); 3312 assert(result.begin == 0); 3313 assert(result.end == 3*3); 3314 assert(result.children.length == 2); 3315 assert(result.children == [abc("abcabcabc"), abc(abc(abc("abcabcabc")))]); 3316 } 3317 3318 /** 3319 Calls 'r' on the input and then discards everything 'r' did, except its matches (so that 3320 they propagate upwards). Equivalent to ';r'. 3321 */ 3322 template drop(alias r) 3323 { 3324 ParseTree drop(ParseTree p) 3325 { 3326 ParseTree result = r(p); 3327 //result.begin = result.end; 3328 result.children = null; 3329 if (result.successful) 3330 result.name = "drop!(" ~ getName!(r)() ~ ")"; 3331 return result; 3332 } 3333 3334 ParseTree drop(string input) 3335 { 3336 return .drop!(r)(ParseTree("",false,[],input)); 3337 } 3338 3339 string drop(GetName g) 3340 { 3341 return "drop!(" ~ getName!(r)() ~ ")"; 3342 } 3343 } 3344 3345 unittest // 'drop' unit test 3346 { 3347 alias literal!"abc" abc; 3348 alias oneOrMore!abc abcs; 3349 alias drop!(literal!("abc")) dabc; 3350 alias drop!(oneOrMore!(literal!("abc"))) dabcs; 3351 3352 ParseTree reference = abc("abc"); 3353 ParseTree result =dabc("abc"); 3354 3355 assert(result.successful == reference.successful); 3356 assert(result.successful); 3357 assert(result.name == `drop!(literal!("abc"))`); 3358 assert(result.matches == reference.matches); 3359 assert(result.begin == reference.begin); 3360 assert(result.end == reference.end); 3361 assert(result.children is null); 3362 3363 reference = abcs("abcabcabc"); 3364 result = dabcs("abcabcabc"); 3365 3366 assert(result.successful == reference.successful); 3367 assert(result.successful); 3368 assert(result.name == `drop!(oneOrMore!(literal!("abc")))`); 3369 assert(result.matches == reference.matches); 3370 assert(result.begin == reference.begin); 3371 assert(result.end == reference.end); 3372 assert(result.children is null); 3373 3374 // On failure 3375 reference = abcs(""); 3376 result = dabcs(""); 3377 3378 assert(result.successful == reference.successful); 3379 assert(!result.successful); 3380 assert(result.name == reference.name); 3381 assert(result.matches == [`"abc"`], "'drop' error message."); 3382 assert(result.begin == reference.begin); 3383 assert(result.end == reference.end); 3384 assert(result.children is null); 3385 3386 // Action on 'and' 3387 alias and!(abc,dabc,abc) discardMiddle; 3388 3389 result = discardMiddle("abcabcabc"); 3390 assert(result.successful); 3391 assert(result.matches == ["abc", "abc", "abc"], "3 matches."); 3392 assert(result.begin == 0); 3393 assert(result.end == 3*3); 3394 assert(result.children.length == 2, "but only 2 children."); 3395 assert(result.children == [abc("abcabcabc"), abc(abc(abc("abcabcabc")))]); 3396 } 3397 3398 /** 3399 Makes r disappear in a sequence, letting its children take its place. It's equivalent 3400 to the '%' operator. Given A <- B %C D and C <- E F, a successful parse for A will 3401 generate a three with four children: B, E, F and D parse trees. 3402 */ 3403 template propagate(alias r) 3404 { 3405 ParseTree propagate(ParseTree p) 3406 { 3407 ParseTree result = r(p); 3408 if (result.successful) 3409 result.name = "propagate!(" ~ getName!(r)() ~ ")"; 3410 return result; 3411 } 3412 3413 ParseTree propagate(string input) 3414 { 3415 return .propagate!(r)(ParseTree("",false,[],input)); 3416 } 3417 3418 string propagate(GetName g) 3419 { 3420 return "propagate!(" ~ getName!(r)() ~ ")"; 3421 } 3422 } 3423 3424 /** 3425 Makes 'r's result be kept when it would be discarded by the tree-decimation done by a grammar. 3426 Equivalent to '^r'. 3427 */ 3428 template keep(alias r) 3429 { 3430 ParseTree keep(ParseTree p) 3431 { 3432 ParseTree result = r(p); 3433 if (result.successful) 3434 { 3435 result.children = [result]; 3436 result.name = "keep!(" ~ getName!(r)() ~ ")"; 3437 } 3438 return result; 3439 } 3440 3441 ParseTree keep(string input) 3442 { 3443 return .keep!(r)(ParseTree("",false,[],input)); 3444 } 3445 3446 string keep(GetName g) 3447 { 3448 return "keep!(" ~ getName!(r)() ~ ")"; 3449 } 3450 } 3451 3452 unittest // 'keep' unit test 3453 { 3454 // Grammar mimicry 3455 struct KeepTest 3456 { 3457 static bool isRule(string s) 3458 { 3459 if (s == "A" || s == "KA") 3460 return true; 3461 else 3462 return false; 3463 } 3464 3465 mixin decimateTree; 3466 3467 // Equivalent to A <- 'a' 'b' 3468 static ParseTree A(string s) 3469 { 3470 return decimateTree(named!(and!(literal!"a", literal!"b"), "A")(s)); 3471 } 3472 3473 // Here we use keep to protect 'b' 3474 // Equivalent to KA <- 'a' ^'b' 3475 static ParseTree KA(string s) 3476 { 3477 return decimateTree(named!(and!(literal!"a", keep!(literal!"b")), "KA")(s)); 3478 } 3479 } 3480 3481 ParseTree reference = KeepTest.A("abc"); 3482 ParseTree result = KeepTest.KA("abc"); 3483 3484 assert(result.successful == reference.successful); 3485 assert(result.matches == reference.matches); 3486 assert(result.matches == ["a","b"]); 3487 assert(result.begin == reference.begin); 3488 assert(result.end == reference.end); 3489 assert(reference.children.length == 0); 3490 assert(result.children.length == 1); 3491 assert(result.children == [literal!("b")(literal!("a")("abc"))], "'b' node was kept."); 3492 } 3493 3494 /* ****************** predefined rules ******************** */ 3495 3496 alias named!(or!(literal!("\r\n"), literal!("\n"), literal!("\r")), "endOfLine") endOfLine; /// predefined end-of-line parser 3497 alias endOfLine eol; /// helper alias. 3498 3499 alias or!(literal!(" "), literal!("\t"), literal!("\v")) space; /// predefined space-recognizing parser (space or tabulation). 3500 alias named!(literal!"\t", "tab") tab; /// A parser recognizing \t (tabulation) 3501 alias named!(fuse!(discardChildren!(oneOrMore!space)), 3502 "spaces") spaces; /// aka '~space+' 3503 alias or!(space, endOfLine) blank; /// Any blank char (spaces or end of line). 3504 alias named!(discard!(zeroOrMore!blank), 3505 "spacing") spacing; /// The basic space-management parser: discard zero or more blank spaces. 3506 3507 alias charRange!('0', '9') digit; /// Decimal digit: [0-9] 3508 alias named!(fuse!(discardChildren!(oneOrMore!digit)), "digits") digits; /// [0-9]+ 3509 3510 alias or!(charRange!('0','9'), charRange!('a','f'), charRange!('A', 'F')) hexDigit; /// Hexadecimal digit: [0-9a-fA-F] 3511 3512 alias charRange!('a', 'z') alpha; /// [a-z] 3513 alias charRange!('A', 'Z') Alpha; /// [A-Z] 3514 3515 alias and!(oneOrMore!(or!(alpha, Alpha, literal!("_"))), zeroOrMore!(or!(digit, alpha, Alpha, literal!("_")))) ident; 3516 alias named!(fuse!(discardChildren!ident), 3517 "identifier") identifier; /// [a-zA-Z_][a-zA-Z_0-9]*, the basic C-family identifier 3518 alias named!(fuse!(discardChildren!(and!(identifier, zeroOrMore!(and!(literal!".", identifier))))), 3519 "qualifiedIdentifier") qualifiedIdentifier; /// qualified identifiers (ids separated by dots: abd.def.g). 3520 3521 alias named!(literal!"/", "slash") slash; /// A parser recognizing '/' 3522 alias named!(literal!"\\", "backslash") backslash; /// A parser recognizing '\' 3523 alias named!(literal!"'", "quote") quote; /// A parser recognizing ' (single quote) 3524 alias named!(literal!"\"", "doublequote") doublequote; /// A parser recognizing " (double quote) 3525 alias named!(literal!"`", "backquote") backquote; /// A parser recognizing $(BACKTICK) (backquote) 3526 3527 /// A list of elem's separated by sep's. One element minimum. 3528 template list(alias elem, alias sep) 3529 { 3530 alias named!(spaceAnd!(oneOrMore!blank, and!(elem, zeroOrMore!(spaceAnd!(discardMatches!(sep), elem)))), "list") list; 3531 } 3532 3533 /// A list of elem's separated by sep's. The empty list (no elem, no sep) is OK. 3534 template list0(alias elem, alias sep) 3535 { 3536 alias named!(spaceAnd!(oneOrMore!blank, option!(and!(elem, zeroOrMore!(spaceAnd!(discardMatches!(sep), elem))))), "list0") list0; 3537 } 3538 3539 template AddSpace(alias sp) 3540 { 3541 template AddSpace(alias r) 3542 { 3543 alias TypeTuple!(r, discard!sp) AddSpace; 3544 } 3545 } 3546 3547 /** 3548 The engine formerly behind the '< ' Pegged rule: all sequence subelements of a rule are interspersed 3549 with a space-consuming rule, given as the first template parameter. It's not used by Pegged anymore 3550 but can be useful for low-level code. It might become deprecated, but it's not there yet. 3551 3552 ---- 3553 alias and!(literal!"abc", literal!"def") rule1; // "abc" "def", equivalent to "abcdef" 3554 alias spaceAnd!(oneOrMore!blank, literal!"abc", literal!"def") rule2; // "abc" "def", but with spaces in-between. 3555 3556 string input1 = "abcdef"; 3557 string input2 = " abc 3558 3559 def "; // with spaces and end of line markers. 3560 3561 assert(rule1(input1).successful); // OK 3562 assert(!rule1(input2).successful); // NOK, couldn't find "def" after "abc" 3563 3564 assert(rule2(input1).successful); // OK 3565 assert(rule2(input2).successful); // Still OK 3566 assert(rule2(input2).matches == ["abc","def"]);// rule2 finds the literals among the spaces 3567 ---- 3568 3569 As you can see on the previous line, spaceAnd discards the matched spaces 3570 and returns matches only for the 'real' subrules. 3571 3572 Note: by using a non-space rule as the first template argument, 3573 you can use spaceAnd as a generic 'find these patterns, possibly separated by this pattern' rule. 3574 3575 For example, using digits as separators: 3576 ---- 3577 alias spaceAnd!(digit, literal!"abc", litera!"def") rule3; 3578 3579 ParseTree result = rule3("123abc45def67890"); 3580 assert(rule3.successful); 3581 assert(rule3.matches == ["abc", "def"]); 3582 assert(rule3.children.length == 2); 3583 3584 assert(rule3.begin == 0;) 3585 assert(rule3.end == "123abc45def67890".length); 3586 ---- 3587 */ 3588 template spaceAnd(alias sp, rules...) 3589 { 3590 alias and!(discard!(zeroOrMore!sp), staticMap!(AddSpace!(zeroOrMore!sp), rules)) spaceAnd; 3591 } 3592 3593 unittest // 'spaceAnd' unit test 3594 { 3595 alias literal!"a" a; 3596 alias literal!"b" b; 3597 3598 //Basic use 3599 alias and!(a,b) ab; 3600 alias spaceAnd!(oneOrMore!blank, a, b) a_b; 3601 3602 ParseTree reference = ab("ab"); 3603 ParseTree result = a_b("ab"); 3604 3605 assert(reference.successful); 3606 assert(result.successful); 3607 assert(result.matches == reference.matches); 3608 assert(result.begin == reference.begin); 3609 assert(result.end == reference.end); 3610 assert(result.children == reference.children); 3611 3612 reference = ab(" a b "); 3613 result = a_b( " a b "); 3614 3615 assert(!reference.successful); 3616 assert(result.successful); 3617 assert(result.matches == ["a","b"]); 3618 assert(result.begin == 0); 3619 assert(result.end == " a b ".length); 3620 assert(result.children.length == 2); 3621 3622 reference = ab("ac"); 3623 result = a_b("ac"); 3624 3625 assert(!reference.successful); 3626 assert(!result.successful); 3627 3628 reference = ab(" a c "); 3629 result = a_b(" a c "); 3630 3631 assert(!reference.successful); 3632 assert(!result.successful); 3633 3634 // With another separator than spaces 3635 alias spaceAnd!(digit, a, b) a0b; 3636 3637 assert(a0b("ab").successful); 3638 assert(!a0b(" a b ").successful); 3639 3640 result = a0b("012a34b567"); 3641 3642 assert(result.successful); 3643 assert(result.matches == ["a", "b"]); 3644 assert(result.begin == 0); 3645 assert(result.end == "012a34b567".length); 3646 assert(result.children.length == 2); 3647 } 3648 3649 /// Mixin to simplify a parse tree inside a grammar 3650 mixin template decimateTree() 3651 { 3652 static ParseTree decimateTree(ParseTree p) 3653 { 3654 if(p.children.length == 0) return p; 3655 3656 bool parseFailed = !p.successful; 3657 3658 ParseTree[] filterChildren(ParseTree pt) 3659 { 3660 ParseTree[] result; 3661 foreach(child; pt.children) 3662 { 3663 import std.algorithm : startsWith; 3664 3665 if ( (isRule(child.name) && (child.matches.length != 0 || parseFailed)) 3666 || (!child.successful && child.children.length == 0) 3667 || (!child.successful && child.name.startsWith("or!") && child.children.length > 1) 3668 || (!pt.successful && child.successful && child.children.length == 0 && child.failedChild.length > 0)) 3669 { 3670 child.children = filterChildren(child); 3671 result ~= child; 3672 } 3673 else if (child.name.startsWith("keep!(")) // 'keep' node are never discarded. 3674 // They have only one child, the node to keep 3675 { 3676 result ~= child.children[0]; 3677 } 3678 else // discard this node, but see if its children contain nodes to keep 3679 { 3680 result ~= filterChildren(child); 3681 } 3682 } 3683 return result; 3684 } 3685 void filterFailedChildren(ref ParseTree pt) 3686 { 3687 foreach(ref child; pt.children) 3688 { 3689 filterFailedChildren(child); 3690 import std.algorithm : startsWith; 3691 3692 if ( (isRule(child.name) && (child.matches.length != 0 || parseFailed)) 3693 || (!child.successful && child.children.length == 0) 3694 || (!child.successful && child.name.startsWith("or!") && child.children.length > 1) 3695 || (!pt.successful && child.successful && child.children.length == 0 && child.failedChild.length > 0)) 3696 { 3697 } 3698 else if (child.name.startsWith("keep!(")) // 'keep' node are never discarded. 3699 // They have only one child, the node to keep 3700 { 3701 } 3702 else if (child.failedChild.length > 0)// discard this node, but see if its children contain nodes to keep 3703 { 3704 pt.failedChild ~= child.failedChild; 3705 child.failedChild = []; 3706 } 3707 } 3708 foreach(ref child; pt.failedChild) 3709 { 3710 filterFailedChildren(child); 3711 child.children = filterChildren(child); 3712 } 3713 } 3714 if (!p.successful) 3715 filterFailedChildren(p); 3716 p.children = filterChildren(p); 3717 return p; 3718 } 3719 } 3720 3721 /** 3722 Discard one-child nodes and replace them with their children. 3723 Most grammar tend to produce long one-child/one-child branches, 3724 simplifyTree compacts these. 3725 */ 3726 ParseTree simplifyTree(ParseTree p) pure nothrow @nogc 3727 { 3728 foreach(ref child; p.children) 3729 child = simplifyTree(child); 3730 3731 if (p.children.length != 1) 3732 return p; 3733 else // linear tree 3734 return p.children[0]; 3735 } 3736 3737 /** 3738 Returns: the number of nodes in a parse tree. 3739 */ 3740 size_t size(ParseTree p) pure nothrow @nogc 3741 { 3742 size_t result = 1; 3743 foreach(child; p.children) 3744 result += size(child); 3745 return result; 3746 } 3747 3748 /** 3749 Generic ParseTree modifier: 3750 predicate must be callable with a ParseTree and return a boolean. 3751 modifier must be callable with a ParseTree and return a ParseTree. 3752 3753 If predicate is true on input, modify calls modifier on input and return the result. 3754 If not, it continues with the children. 3755 */ 3756 ParseTree modify(alias predicate, alias modifier)(ParseTree input) 3757 { 3758 foreach(ref child; input.children) 3759 child = modify!(predicate, modifier)(child); 3760 3761 if (predicate(input)) 3762 input = modifier(input); 3763 3764 return input; 3765 }