1 /** 2 This module contains the engine behind Pegged, the expression templates building blocks to create a top-down 3 recursive-descent parser. 4 5 The terminals and non-terminals described here are meant to be used inside a Pegged grammar. 6 As such, they are a bit less user-friendly than what's output by pegged.grammar. 7 For example they take a ParseTree as input, not a string. 8 9 See the /docs directory for the full documentation as markdown files. 10 */ 11 module pegged.peg; 12 13 /* 14 NOTE: 15 Do not use the GrammarTester for unittesting in this module. This module needs 16 to be able to pass its unittests before the GrammarTester is even trustable. 17 Writing tests the long way is preferred here, as it will avoid the circular 18 dependency. 19 */ 20 21 import std.algorithm: equal, map, startsWith; 22 import std.uni : isAlpha, icmp; 23 import std.array; 24 import std.conv; 25 import std..string: strip; 26 import std.typetuple; 27 28 // Returns quoted and escaped version of the input, but if the input is null, then return `"end of input"`. 29 package string stringified(string inp) @safe 30 { 31 import std.format : format; 32 33 if (inp is null) 34 return `"end of input"`; 35 36 string[1] shell = [inp]; 37 return "%(%s%)".format(shell); 38 } 39 40 @safe unittest // Run- & Compile-time. 41 { 42 static bool doTest() 43 { 44 const caseSuit1 = [ 45 "" : `""`, 46 47 "\0" : `"\0"`, 48 "\n" : `"\n"`, 49 "\t" : `"\t"`, 50 51 `"` : `"\""`, 52 "`" : q"("`")", 53 "'" : `"'"`, 54 55 "42" : `"42"`, 56 "\"some text\"\n" : `"\"some text\"\n"` 57 ]; 58 59 const caseSuit2 = [cast(string)null : `"end of input"`]; 60 61 const auto testSuit = [caseSuit1, caseSuit2]; 62 63 foreach (const ref suit; testSuit) 64 { 65 foreach (const ref value, const ref expected; suit) 66 { 67 import std.format : format; 68 69 const result = value.stringified; 70 assert(result == expected, "from %s expected %s but got %s".format(value, expected, result)); 71 } 72 } 73 74 return true; 75 } 76 77 // Result is always true, but here, we just force CTFE-mode. 78 static assert(doTest); // Compile-time. 79 doTest; // Run-time. 80 } 81 82 version (tracer) 83 { 84 import std.experimental.logger; 85 import std.algorithm.comparison : min; 86 87 // Function pointers. 88 private static bool function(string ruleName, const ref ParseTree p) traceConditionFunction; 89 private static bool delegate(string ruleName, const ref ParseTree p) traceConditionDelegate; 90 private static int traceLevel; 91 private static bool traceBlocked; 92 private static bool logTraceLevel = false; 93 94 private void incTraceLevel() 95 { 96 if (!__ctfe) 97 traceLevel++; 98 } 99 100 private void decTraceLevel() 101 { 102 if (!__ctfe) 103 traceLevel--; 104 } 105 106 private bool shouldTrace(string ruleName, const ref ParseTree p) 107 { 108 if (__ctfe || traceBlocked) 109 return false; 110 if (traceConditionDelegate != null) 111 return traceConditionDelegate(ruleName, p); 112 if (traceConditionFunction != null) 113 return traceConditionFunction(ruleName, p); 114 return false; 115 } 116 117 static this() 118 { 119 traceLevel = 0; 120 traceNothing(); 121 traceBlocked = false; 122 } 123 124 /++ Supply a function to dynamically switch tracing on and off based on the rule name. 125 + 126 + Example: 127 + --- 128 + /* Exclude build-in parsers, only trace parsers generated from MyGrammar. */ 129 + setTraceConditionFunction(ruleName => ruleName.startsWith("MyGrammar")); 130 + --- 131 +/ 132 void setTraceConditionFunction(bool delegate(string ruleName, const ref ParseTree p) condition) 133 { 134 traceConditionDelegate = condition; 135 traceConditionFunction = null; 136 } 137 138 /// ditto 139 void setTraceConditionFunction(bool function(string ruleName, const ref ParseTree p) condition) 140 { 141 traceConditionFunction = condition; 142 traceConditionDelegate = null; 143 } 144 145 /** Trace all rules. 146 * 147 * This can produce a lot of output. 148 */ 149 void traceAll() 150 { 151 setTraceConditionFunction(function(string ruleName, const ref ParseTree p) {return true;}); 152 } 153 154 /** Do not trace any rules. */ 155 void traceNothing() 156 { 157 traceConditionFunction = null; 158 traceConditionDelegate = null; 159 } 160 161 private string traceMsg(ParseTree p, string expression, string name) 162 { 163 import std.format; 164 Position pos = position(p); 165 enum inputLength = 15; 166 string result; 167 for (auto i = 1; i <= traceLevel; i++) 168 result ~= format("%d|", i); 169 result ~= format(" (l:%d, c:%d, i:%d)\t", pos.line + 1, pos.col + 1, pos.index) ~ 170 expression.stringified ~ " considering rule " ~ name.stringified ~ " on " ~ 171 p.input[p.end .. min(p.input.length, p.end + inputLength)].stringified ~ 172 (p.end + inputLength > p.input.length ? "" : "..."); 173 return result; 174 } 175 176 private string traceResultMsg(ParseTree p, string name) 177 { 178 import std.format; 179 import std.range: chain; 180 import std.algorithm.iteration: joiner; 181 Position pos = position(p); 182 enum inputLength = 15; 183 string result; 184 for (auto i = 1; i <= traceLevel; i++) 185 result ~= format("%d|", i); 186 if (p.successful) 187 { 188 string consumed; 189 foreach (match; p.matches) 190 consumed ~= match; 191 result ~= format(" (l:%d, c:%d, i:%d)\t", pos.line + 1, pos.col + 1, pos.index) ~ name.stringified ~ " SUCCEEDED on " ~ 192 consumed.stringified; 193 } 194 else 195 result ~= format(" (l:%d, c:%d, i:%d)\t", pos.line + 1, pos.col + 1, pos.index) ~ name.stringified ~ " FAILED on " ~ 196 p.input[p.end .. min(p.input.length, p.end + inputLength)].stringified ~ 197 (p.end + inputLength > p.input.length ? "" : "..."); 198 return result; 199 } 200 201 /** 202 Overrides FileLogger to remove the time stamp. 203 204 Example: 205 --- 206 sharedLog = new TraceLogger("TraceLog.txt"); 207 --- 208 */ 209 class TraceLogger : FileLogger 210 { 211 this(in string fn) @safe 212 { 213 super(fn); 214 } 215 import std.concurrency : Tid; 216 import std.datetime : SysTime; 217 override protected void beginLogMsg(string file, int line, string funcName, 218 string prettyFuncName, string moduleName, LogLevel logLevel, 219 Tid threadId, SysTime timestamp, Logger logger) 220 @safe 221 { 222 } 223 } 224 } 225 226 /** 227 CT Switch for testing 'keywords' implementations 228 */ 229 enum 230 { 231 IFCHAIN, 232 TRIE 233 } 234 enum KEYWORDS = IFCHAIN; 235 236 /** 237 The basic parse tree, as used throughout the project. 238 You can define your own parse tree node, but respect the basic layout. 239 */ 240 struct ParseTree 241 { 242 string name; /// The node name 243 bool successful; /// Indicates whether a parsing was successful or not 244 string[] matches; /// The matched input's parts. Some expressions match at more than one place, hence matches is an array. 245 246 string input; /// The input string that generated the parse tree. Stored here for the parse tree to be passed to other expressions, as input. 247 size_t begin, end; /// Indices for the matched part (from the very beginning of the first match to the last char of the last match. 248 249 ParseTree[] children; /// The sub-trees created by sub-rules parsing. 250 251 /** 252 Basic toString for easy pretty-printing. 253 */ 254 string toString(string tabs = "") const 255 { 256 string result = name; 257 258 string childrenString; 259 bool allChildrenSuccessful = true; 260 261 foreach(i,child; children) 262 { 263 childrenString ~= tabs ~ " +-" ~ child.toString(tabs ~ ((i < children.length -1 ) ? " | " : " ")); 264 if (!child.successful) 265 allChildrenSuccessful = false; 266 } 267 268 if (successful) 269 { 270 result ~= " " ~ to!string([begin, end]) ~ to!string(matches) ~ "\n"; 271 } 272 else // some failure info is needed 273 { 274 if (allChildrenSuccessful) // no one calculated the position yet 275 { 276 Position pos = position(this); 277 string left, right; 278 279 if (pos.index < 10) 280 left = input[0 .. pos.index]; 281 else 282 left = input[pos.index-10 .. pos.index]; 283 //left = strip(left); 284 285 if (pos.index + 10 < input.length) 286 right = input[pos.index .. pos.index + 10]; 287 else 288 right = input[pos.index .. $]; 289 //right = strip(right); 290 291 result ~= " failure at line " ~ to!string(pos.line) ~ ", col " ~ to!string(pos.col) ~ ", " 292 ~ (left.length > 0 ? "after " ~ left.stringified ~ " " : "") 293 ~ "expected "~ (matches.length > 0 ? matches[$-1].stringified : "NO MATCH") 294 ~ ", but got " ~ right.stringified ~ "\n"; 295 } 296 else 297 { 298 result ~= " (failure)\n"; 299 } 300 } 301 302 return result ~ childrenString; 303 } 304 305 @property string failMsg() 306 { 307 foreach(i, child; children) 308 { 309 if (!child.successful) 310 return child.failMsg; 311 } 312 313 if (!successful) 314 { 315 Position pos = position(this); 316 string left, right; 317 318 if (pos.index < 10) 319 left = input[0 .. pos.index]; 320 else 321 left = input[pos.index - 10 .. pos.index]; 322 323 if (pos.index + 10 < input.length) 324 right = input[pos.index .. pos.index + 10]; 325 else 326 right = input[pos.index .. $]; 327 328 return "Failure at line " ~ to!string(pos.line) ~ ", col " ~ to!string(pos.col) ~ ", " 329 ~ (left.length > 0 ? "after " ~ left.stringified ~ " " : "") 330 ~ "expected " ~ (matches.length > 0 ? matches[$ - 1].stringified : "NO MATCH") 331 ~ `, but got ` ~ right.stringified; 332 } 333 334 return "Success"; 335 } 336 337 ParseTree dup() @property 338 { 339 ParseTree result = this; 340 result.matches = result.matches.dup; 341 result.children = map!(p => p.dup)(result.children).array(); 342 return result; 343 } 344 } 345 346 347 unittest // ParseTree testing 348 { 349 ParseTree p; 350 assert(p == p, "Self-identity on null tree."); 351 352 p = ParseTree("Name", true, ["abc", "", "def"], "input", 0, 1, null); 353 assert(p == p, "Self identity on non-null tree."); 354 355 ParseTree child = ParseTree("Child", true, ["abc", "", "def"], "input", 0, 1, null); 356 p.children = [child, child]; 357 358 ParseTree q = p; 359 assert(p == q, "Copying creates equal trees."); 360 361 assert(p.children == q.children); 362 p.children = [child, child, child]; 363 assert(q.children != p.children, "Disconnecting children."); 364 365 p = ParseTree("Name", true, ["abc", "", "def"], "input", 0, 1, null); 366 p.children = [child, child]; 367 368 q = p.dup; 369 assert(p == q, "Dupping creates equal trees."); 370 371 assert(p.children == q.children, "Equal children for dupped trees."); 372 p.children = null; 373 assert(q.children != p.children); 374 375 q.children = [p,p]; 376 assert(p != q, "Tree with different children are not equal."); 377 378 p.children = [p,p]; 379 assert(p == q, "Adding equivalent children is OK."); 380 381 p.matches = null; 382 assert(p != q, "Nulling matches makes trees unequal."); 383 384 p.matches = q.matches; 385 assert(p == q, "Copying matches makes equal trees."); 386 } 387 388 /// To compare two trees for content (not bothering with node names) 389 /// That's useful to compare the results from two different grammars. 390 bool softCompare(ParseTree p1, ParseTree p2) 391 { 392 return p1.successful == p2.successful 393 && p1.matches == p2.matches 394 && p1.begin == p2.begin 395 && p1.end == p2.end 396 && equal!(softCompare)(p1.children, p2.children); // the same for children 397 } 398 399 unittest // softCompare 400 { 401 ParseTree p = ParseTree("Name", true, ["abc", "", "def"], "input", 0, 1, null); 402 ParseTree child = ParseTree("Child", true, ["abc", "", "def"], "input", 0, 1, null); 403 p.children = [child, child]; 404 405 ParseTree q = p; 406 assert(p == q, "Copy => Equal trees."); 407 assert(softCompare(p,q), "Copy => Trees equal for softCompare."); 408 409 q.name = "Another One"; 410 assert(p != q, "Name change => non-equal trees."); 411 assert(softCompare(p,q), "Name change => Trees equal for softCompare."); 412 } 413 414 /// To record a position in a text 415 struct Position 416 { 417 size_t line;/// line number (starts at 0) 418 size_t col;/// column number (starts at 0) 419 size_t index;/// index (starts at 0) 420 } 421 422 /** 423 Given an input string, returns the position corresponding to the end of the string. 424 425 For example: 426 --- 427 assert(position("abc") == Position(0,3,3)); 428 assert(position("abc 429 ") == Position(1,0,4)); 430 assert(position("abc 431 432 ") == Position(2,4,8)); 433 --- 434 */ 435 Position position(string s) 436 { 437 size_t col, line, index, prev_i; 438 char prev_c; 439 foreach(i,c; s) 440 { 441 if ((c == '\n' && !(i == prev_i + 1 && prev_c == '\r')) || // new line except when directly following a carriage return. 442 c == '\r') 443 { 444 col = 0; 445 ++line; 446 ++index; 447 prev_i = i; 448 prev_c = c; 449 } 450 else 451 { 452 if (c != '\n') 453 ++col; 454 ++index; 455 } 456 } 457 458 return Position(line,col,index); 459 } 460 461 /** 462 Same as previous overload, but from the begin of P.input to p.end 463 */ 464 Position position(const ParseTree p) 465 { 466 return position(p.input[0..p.end]); 467 } 468 469 unittest 470 { 471 assert(position("") == Position(0,0,0), "Null string, position 0."); 472 assert(position("abc") == Position(0,3,3), "length 3 string, no line feed."); 473 assert(position("abc 474 ") == Position(1,0,4), "One end of line."); 475 assert(position("abc 476 477 ----") == Position(2,4,9), "Three lines (second one empty)."); 478 assert(position("abc 479 ---- 480 ----") == Position(2,4,13), "Three lines."); 481 assert(position(" 482 483 484 ") == Position(3,0,3), "Four lines, all empty."); 485 assert(position("one\r\ntwo\r\nthree") == Position(2, 5, 15), "Three lines, DOS line endings"); 486 } 487 488 string getName(alias expr)() @property 489 { 490 static if (is(typeof( { expr(GetName()); }))) 491 return expr(GetName()); 492 else 493 return __traits(identifier, expr); 494 } 495 496 struct GetName {} 497 498 /** 499 Basic rule, that always fail without consuming. 500 */ 501 ParseTree fail(ParseTree p) 502 { 503 return ParseTree("fail", false, [], p.input, p.end, p.end, null); 504 } 505 506 /// ditto 507 ParseTree fail(string input) 508 { 509 return fail(ParseTree("", false, [], input)); 510 } 511 512 string fail(GetName g) 513 { 514 return "fail"; 515 } 516 517 unittest // 'fail' unit test 518 { 519 ParseTree input = ParseTree("input", true, [], "This is the input string.", 0,0, null); 520 ParseTree result = fail(input); 521 assert(result.name == "fail"); 522 assert(!result.successful, "'fail' fails."); 523 assert(result.matches is null, "'fail' makes no match."); 524 assert(result.input == input.input, "'fail' does not change the input."); 525 assert(result.end == input.end, "'fail' puts the index after the previous parse."); 526 assert(result.children is null, "'fail' has no children."); 527 528 result = fail("This is the input string."); 529 assert(!result.successful, "'fail' fails."); 530 } 531 532 /** 533 Matches the end of input. Fails if there is any character left. 534 */ 535 ParseTree eoi(ParseTree p) 536 { 537 if (p.end == p.input.length) 538 return ParseTree("eoi", true, [], p.input, p.end, p.end); 539 else 540 return ParseTree("eoi", false, ["end of input"], p.input, p.end, p.end); 541 } 542 543 /// ditto 544 ParseTree eoi(string input) 545 { 546 return eoi(ParseTree("", false, [], input)); 547 } 548 549 string eoi(GetName g) 550 { 551 return "eoi"; 552 } 553 554 alias eoi endOfInput; /// helper alias. 555 556 unittest // 'eoi' unit test 557 { 558 ParseTree input = ParseTree("input", true, [], "This is the input string.", 0,0, null); 559 ParseTree result = eoi(input); 560 assert(result.name == "eoi"); 561 assert(!result.successful, "'eoi' fails on non-null string."); 562 assert(result.matches == ["end of input"], "'eoi' error message."); 563 assert(result.input == input.input, "'eoi' does not change the input."); 564 assert(result.end == input.end, "'eoi' puts the index after the previous parse."); 565 assert(result.children is null, "'eoi' has no children."); 566 567 input = ParseTree("input", true, [], "", 0,0, null); 568 result = eoi(input); 569 assert(result.successful, "'eoi' succeeds on strings of length 0."); 570 result = eoi(""); 571 assert(result.successful, "'eoi' succeeds on strings of length 0."); 572 result = eoi(null); 573 assert(result.successful, "'eoi' succeeds on null strings"); 574 } 575 576 /** 577 Match any character. As long as there is at least a character left in the input, it succeeds. 578 Conversely, it fails only if called at the end of the input. 579 */ 580 ParseTree any(ParseTree p) 581 { 582 if (p.end < p.input.length) 583 return ParseTree("any", true, [p.input[p.end..p.end+1]], p.input, p.end, p.end+1); 584 else 585 return ParseTree("any", false, ["any char"], p.input, p.end, p.end); 586 } 587 588 /// ditto 589 ParseTree any(string input) 590 { 591 return any(ParseTree("", false, [], input)); 592 } 593 594 string any(GetName g) 595 { 596 return "any"; 597 } 598 599 unittest // 'any' unit test 600 { 601 ParseTree input = ParseTree("input", true, [], "This is the input string.", 0,0, null); 602 ParseTree result = any(input); 603 604 assert(result.name == "any"); 605 assert(result.successful, "'any' succeeds on non-null strings."); 606 assert(result.matches == ["T"], "'any' matches the first char in an input."); 607 assert(result.input == input.input, "'any' does not change the input."); 608 assert(result.end == input.end+1, "'any' advances the index by one position."); 609 assert(result.children is null, "'any' has no children."); 610 611 result = any("a"); 612 assert(result.successful, "'any' matches on strings of length one."); 613 assert(result.matches == ["a"], "'any' matches the first char in an input."); 614 assert(result.input == "a", "'any' does not change the input."); 615 assert(result.end == 1, "'any' advances the index by one position."); 616 assert(result.children is null, "'any' has no children."); 617 618 input = ParseTree("input", true, [], "", 0,0, null); 619 620 result = any(input); 621 assert(!result.successful, "'any' fails on strings of length 0."); 622 assert(result.matches == ["any char"], "'any' error message on strings of length 0."); 623 assert(result.end == 0, "'any' does not advance the index."); 624 625 result = any(""); 626 assert(!result.successful, "'any' fails on strings of length 0."); 627 assert(result.matches == ["any char"], "'any' error message on strings of length 0."); 628 assert(result.end == 0, "'any' does not advance the index."); 629 630 result = any(null); 631 assert(!result.successful, "'any' fails on null strings."); 632 assert(result.matches == ["any char"], "'any' error message on strings of length 0."); 633 assert(result.end == 0, "'any' does not advance the index."); 634 } 635 636 /** 637 Predefined parser: matches word boundaries, as \b for regexes. 638 */ 639 ParseTree wordBoundary(ParseTree p) 640 { 641 // TODO: I added more indexing guards and now this could probably use 642 // some simplification. Too tired to write it better. --Chad 643 bool matched = (p.end == 0 && isAlpha(p.input.front())) 644 || (p.end == p.input.length && isAlpha(p.input.back())) 645 || (p.end > 0 && isAlpha(p.input[p.end-1]) && p.end < p.input.length && !isAlpha(p.input[p.end])) 646 || (p.end > 0 && !isAlpha(p.input[p.end-1]) && p.end < p.input.length && isAlpha(p.input[p.end])); 647 if (matched) 648 return ParseTree("wordBoundary", matched, [], p.input, p.end, p.end, null); 649 else 650 return ParseTree("wordBoundary", matched, ["word boundary"], p.input, p.end, p.end, null); 651 } 652 653 /// ditto 654 ParseTree wordBoundary(string input) 655 { 656 return ParseTree("wordBoundary", isAlpha(input.front()), [], input, 0,0, null); 657 } 658 659 string wordBoundary(GetName g) 660 { 661 return "wordBoundary"; 662 } 663 664 unittest // word boundary 665 { 666 ParseTree input = ParseTree("", false, [], "This is a word."); 667 auto wb = [// "This" 668 cast(size_t)(0):true, 1:false, 2:false, 3:false, 4: true, 669 // "is" 670 5: true, 6:false, 7: true, 671 // "a" 672 8: true, 9:true, 673 // "word" 674 10:true, 11:false, 12:false, 13:false, 14:true, 675 // "." 676 15:false 677 ]; 678 679 foreach(size_t index; 0 .. input.input.length) 680 { 681 input.end = index; 682 ParseTree result = wordBoundary(input); 683 684 assert(result.name == "wordBoundary"); 685 assert(result.successful == wb[index]); // true, false, ... 686 // for errors, there is an error message 687 assert(result.successful && result.matches is null || !result.successful); 688 assert(result.begin == input.end); 689 assert(result.end == input.end); 690 assert(result.children is null); 691 } 692 } 693 694 /** 695 Represents a literal in a PEG, like "abc" or 'abc' (or even ''). 696 It succeeds if a prefix of the input is equal to its template parameter and fails otherwise. 697 */ 698 template literal(string s) 699 { 700 enum name = "literal!(\""~s~"\")"; 701 702 ParseTree literal(ParseTree p) 703 { 704 enum lit = "\"" ~ s ~ "\""; 705 if (p.end+s.length <= p.input.length && p.input[p.end..p.end+s.length] == s) 706 return ParseTree(name, true, [s], p.input, p.end, p.end+s.length); 707 else 708 return ParseTree(name, false, [lit], p.input, p.end, p.end); 709 } 710 711 ParseTree literal(string input) 712 { 713 return .literal!(s)(ParseTree("", false, [], input)); 714 } 715 716 717 string literal(GetName g) 718 { 719 return name; 720 } 721 722 } 723 724 unittest // 'literal' unit test 725 { 726 ParseTree input = ParseTree("input", true, [], "abcdef", 0,0, null); 727 728 alias literal!"a" a; 729 alias literal!"abc" abc; 730 alias literal!"" empty; 731 732 ParseTree result = a(input); 733 734 assert(result.name == `literal!("a")`, "Literal name test."); 735 assert(result.successful, "'a' succeeds on inputs beginning with 'a'."); 736 assert(result.matches == ["a"], "'a' matches the 'a' at the beginning."); 737 assert(result.input == input.input, "'a' does not change the input."); 738 assert(result.end == input.end+1, "'a' advances the index by one position."); 739 assert(result.children is null, "'a' has no children."); 740 741 result = a("abcdef"); 742 743 assert(result.successful, "'a' succeeds on inputs beginning with 'a'."); 744 assert(result.matches == ["a"], "'a' matches the 'a' at the beginning."); 745 assert(result.input == input.input, "'a' does not change the input."); 746 assert(result.end == input.end+1, "'a' advances the index by one position."); 747 assert(result.children is null, "'a' has no children."); 748 749 result = abc(input); 750 751 assert(result.name == `literal!("abc")`, "Literal name test."); 752 assert(result.successful, "'abc' succeeds on inputs beginning with 'abc'."); 753 assert(result.matches == ["abc"], "'abc' matches 'abc' at the beginning."); 754 assert(result.input == input.input, "'abc' does not change the input."); 755 assert(result.end == input.end+3, "'abc' advances the index by 3 positions."); 756 assert(result.children is null, "'abc' has no children."); 757 758 result = abc("abcdef"); 759 760 assert(result.successful, "'abc' succeeds on inputs beginning with 'abc'."); 761 assert(result.matches == ["abc"], "'abc' matches 'abc' at the beginning."); 762 assert(result.input == input.input, "'abc' does not change the input."); 763 assert(result.end == input.end+3, "'abc' advances the index by 3 positions."); 764 assert(result.children is null, "'abc' has no children."); 765 766 result = empty(input); 767 768 assert(result.name == `literal!("")`, "Literal name test."); 769 assert(result.successful, "'' succeeds on non-null inputs."); 770 assert(result.matches == [""], "'' matches '' at the beginning."); 771 assert(result.input == input.input, "'' does not change the input."); 772 assert(result.end == input.end+0, "'' does not advance the index."); 773 assert(result.children is null, "'' has no children."); 774 775 result = empty("abcdef"); 776 777 assert(result.successful, "'' succeeds on non-null inputs."); 778 assert(result.matches == [""], "'' matches '' at the beginning."); 779 assert(result.input == input.input, "'' does not change the input."); 780 assert(result.end == input.end+0, "'' does not advance the index."); 781 assert(result.children is null, "'' has no children."); 782 783 input.input = "bcdef"; 784 785 result = a(input); 786 787 assert(!result.successful, "'a' fails on inputs not beginning with 'a'."); 788 assert(result.matches == ["\"a\""], "'a' makes no match on 'bcdef'."); 789 assert(result.input == input.input, "'a' does not change the input."); 790 assert(result.end == input.end, "'a' does not advances the index on 'bcdef'."); 791 assert(result.children is null, "'a' has no children."); 792 793 result = abc(input); 794 795 assert(!result.successful, "'abc' fails on inputs not beginning with 'abc'."); 796 assert(result.matches == ["\"abc\""], "'abc' does no match on 'bcdef'."); 797 assert(result.input == input.input, "'abc' does not change the input."); 798 assert(result.end == input.end, "'abc' does not advance the index on 'bcdef'."); 799 assert(result.children is null, "'abc' has no children."); 800 801 result = empty(input); 802 803 assert(result.successful, "'' succeeds on non-null inputs."); 804 assert(result.matches == [""], "'' matches '' at the beginning."); 805 assert(result.input == input.input, "'' does not change the input."); 806 assert(result.end == input.end+0, "'' does not advance the index."); 807 assert(result.children is null, "'' has no children."); 808 809 input.input = ""; 810 811 result = a(input); 812 813 assert(!result.successful, "'a' fails on empty strings."); 814 assert(result.matches == ["\"a\""], "'a' does not match ''."); 815 assert(result.input == input.input, "'a' does not change the input."); 816 assert(result.end == input.end, "'a' does not advance the index on 'bcdef'."); 817 assert(result.children is null, "'a' has no children."); 818 819 result = abc(input); 820 821 assert(!result.successful, "'abc' fails on empty strings."); 822 assert(result.matches == ["\"abc\""], "'abc' does not match ''."); 823 assert(result.input == input.input, "'abc' does not change the input."); 824 assert(result.end == input.end, "'abc' does not advance the index on 'bcdef'."); 825 assert(result.children is null, "'abc' has no children."); 826 827 result = empty(input); 828 829 assert(result.successful, "'' succeeds on empty strings."); 830 assert(result.matches == [""], "'' matches '' at the beginning, even on empty strings."); 831 assert(result.input == input.input, "'' does not change the input."); 832 assert(result.end == input.end+0, "'' does not advance the index."); 833 assert(result.children is null, "'' has no children."); 834 } 835 836 /** 837 Represents a case insensitive literal in a PEG, like "abc"i or 'abc'i (or even ''i). 838 It succeeds if a case insensitive comparison of a prefix of the input and its template 839 parameter yields no difference and fails otherwise. 840 */ 841 template caseInsensitiveLiteral(string s) 842 { 843 enum name = "caseInsensitiveLiteral!(\""~s~"\")"; 844 845 ParseTree caseInsensitiveLiteral(ParseTree p) 846 { 847 enum lit = "\"" ~ s ~ "\""; 848 if (p.end+s.length <= p.input.length && icmp(p.input[p.end..p.end+s.length], s) == 0) 849 return ParseTree(name, true, [s], p.input, p.end, p.end+s.length); 850 else 851 return ParseTree(name, false, [lit], p.input, p.end, p.end); 852 } 853 854 ParseTree caseInsensitiveLiteral(string input) 855 { 856 return .caseInsensitiveLiteral!(s)(ParseTree("", false, [], input)); 857 } 858 859 860 string caseInsensitiveLiteral(GetName g) 861 { 862 return name; 863 } 864 865 } 866 867 unittest // 'caseInsensitiveLiteral' unit test 868 { 869 ParseTree input = ParseTree("input", true, [], "AbCdEf", 0,0, null); 870 871 alias caseInsensitiveLiteral!"a" a; 872 alias caseInsensitiveLiteral!"aBC" abc; 873 alias caseInsensitiveLiteral!"" empty; 874 875 ParseTree result = a(input); 876 877 assert(result.name == `caseInsensitiveLiteral!("a")`, "caseInsensitiveLiteral name test."); 878 assert(result.successful, "'a' succeeds on inputs beginning with 'A'."); 879 assert(result.matches == ["a"], "'a' matches the 'A' at the beginning."); 880 assert(result.input == input.input, "'a' does not change the input."); 881 assert(result.end == input.end+1, "'a' advances the index by one position."); 882 assert(result.children is null, "'a' has no children."); 883 884 result = a("AbCdEf"); 885 886 assert(result.successful, "'a' succeeds on inputs beginning with 'A'."); 887 assert(result.matches == ["a"], "'a' matches the 'A' at the beginning."); 888 assert(result.input == input.input, "'a' does not change the input."); 889 assert(result.end == input.end+1, "'a' advances the index by one position."); 890 assert(result.children is null, "'a' has no children."); 891 892 result = abc(input); 893 894 assert(result.name == `caseInsensitiveLiteral!("aBC")`, "caseInsensitiveLiteral name test."); 895 assert(result.successful, "'aBC' succeeds on inputs beginning with 'AbC'."); 896 assert(result.matches == ["aBC"], "'AbC' matches 'aBC' at the beginning."); 897 assert(result.input == input.input, "'aBC' does not change the input."); 898 assert(result.end == input.end+3, "'aBC' advances the index by 3 positions."); 899 assert(result.children is null, "'aBC' has no children."); 900 901 result = abc("AbCdEf"); 902 903 assert(result.successful, "'aBC' succeeds on inputs beginning with 'AbC'."); 904 assert(result.matches == ["aBC"], "'AbC' matches 'aBC' at the beginning."); 905 assert(result.input == input.input, "'aBC' does not change the input."); 906 assert(result.end == input.end+3, "'aBC' advances the index by 3 positions."); 907 assert(result.children is null, "'aBC' has no children."); 908 909 result = empty(input); 910 911 assert(result.name == `caseInsensitiveLiteral!("")`, "caseInsensitiveLiteral name test."); 912 assert(result.successful, "'' succeeds on non-null inputs."); 913 assert(result.matches == [""], "'' matches '' at the beginning."); 914 assert(result.input == input.input, "'' does not change the input."); 915 assert(result.end == input.end+0, "'' does not advance the index."); 916 assert(result.children is null, "'' has no children."); 917 918 result = empty("AbCdEf"); 919 920 assert(result.successful, "'' succeeds on non-null inputs."); 921 assert(result.matches == [""], "'' matches '' at the beginning."); 922 assert(result.input == input.input, "'' does not change the input."); 923 assert(result.end == input.end+0, "'' does not advance the index."); 924 assert(result.children is null, "'' has no children."); 925 926 input.input = "bCdEf"; 927 928 result = a(input); 929 930 assert(!result.successful, "'a' fails on inputs not beginning with 'a' or 'A'."); 931 assert(result.matches == ["\"a\""], "'a' makes no match on 'bCdEf'."); 932 assert(result.input == input.input, "'a' does not change the input."); 933 assert(result.end == input.end, "'a' does not advances the index on 'bCdEf'."); 934 assert(result.children is null, "'a' has no children."); 935 936 result = abc(input); 937 938 assert(!result.successful, "'aBC' fails on inputs beginning with 'bCdEf'."); 939 assert(result.matches == ["\"aBC\""], "'aBC' does no match on 'bCdEf'."); 940 assert(result.input == input.input, "'aBC' does not change the input."); 941 assert(result.end == input.end, "'aBC' does not advance the index on 'bCdEf'."); 942 assert(result.children is null, "'aBC' has no children."); 943 944 result = empty(input); 945 946 assert(result.successful, "'' succeeds on non-null inputs."); 947 assert(result.matches == [""], "'' matches '' at the beginning."); 948 assert(result.input == input.input, "'' does not change the input."); 949 assert(result.end == input.end+0, "'' does not advance the index."); 950 assert(result.children is null, "'' has no children."); 951 952 input.input = ""; 953 954 result = a(input); 955 956 assert(!result.successful, "'a' fails on empty strings."); 957 assert(result.matches == ["\"a\""], "'a' does not match ''."); 958 assert(result.input == input.input, "'a' does not change the input."); 959 assert(result.end == input.end, "'a' does not advance the index on 'bCdEf'."); 960 assert(result.children is null, "'a' has no children."); 961 962 result = abc(input); 963 964 assert(!result.successful, "'aBC' fails on empty strings."); 965 assert(result.matches == ["\"aBC\""], "'aBC' does not match ''."); 966 assert(result.input == input.input, "'aBC' does not change the input."); 967 assert(result.end == input.end, "'aBC' does not advance the index on 'bCdEf'."); 968 assert(result.children is null, "'aBC' has no children."); 969 970 result = empty(input); 971 972 assert(result.successful, "'' succeeds on empty strings."); 973 assert(result.matches == [""], "'' matches '' at the beginning, even on empty strings."); 974 assert(result.input == input.input, "'' does not change the input."); 975 assert(result.end == input.end+0, "'' does not advance the index."); 976 assert(result.children is null, "'' has no children."); 977 978 input.input = "ÄöÜæØå"; 979 result = caseInsensitiveLiteral!"äÖüÆøÅ"(input); 980 981 assert(result.successful, "Unicode characters are matched case insensitively."); 982 } 983 984 /** 985 Represents a range of chars, from begin to end, included. So charRange!('a','z') matches 986 all English lowercase letters. If fails if the input is empty or does not begin with a character 987 between begin and end. 988 989 If begin == end, it will match one char (begin... or end). 990 991 begin > end is non-legal. 992 */ 993 template charRange(dchar begin, dchar end) if (begin <= end) 994 { 995 enum name = "charRange!('"~to!string(begin)~"','" ~ to!string(end) ~ "')"; 996 997 ParseTree charRange(ParseTree p) 998 { 999 enum longname = "a char between '"~to!string(begin)~"' and '"~to!string(end)~"'"; 1000 if (p.end < p.input.length && p.input[p.end] >= begin && p.input[p.end] <= end) 1001 return ParseTree(name, true, [p.input[p.end..p.end+1]], p.input, p.end, p.end+1); 1002 else 1003 return ParseTree(name, false, [longname], p.input, p.end, p.end); 1004 } 1005 1006 ParseTree charRange(string input) 1007 { 1008 return .charRange!(begin,end)(ParseTree("",false,[],input)); 1009 } 1010 1011 string charRange(GetName g) 1012 { 1013 return name; 1014 } 1015 } 1016 1017 unittest // 'charRange' unit test 1018 { 1019 ParseTree input = ParseTree("input", true, [], "abcdef", 0,0, null); 1020 1021 alias charRange!('a','a') aa; 1022 alias charRange!('a','b') ab; 1023 alias charRange!('a','z') az; 1024 alias charRange!(dchar.min,dchar.max) allChars; 1025 alias charRange!('\U00000000','\U000000FF') ASCII; 1026 1027 static assert(!__traits(compiles, {alias charRange!('z','a') za;})); 1028 1029 ParseTree result = aa(input); 1030 1031 assert(result.name == "charRange!('a','a')", "charRange name test."); 1032 assert(result.successful, "'a-a' succeeds on inputs beginning with 'a'."); 1033 assert(result.matches == ["a"], "'a-a' matches the 'a' at the beginning."); 1034 assert(result.input == input.input, "'a-a' does not change the input."); 1035 assert(result.end == input.end+1, "'a-a' advances the index by one position."); 1036 assert(result.children is null, "'a-a' has no children."); 1037 1038 result = ab("abcdef"); 1039 1040 assert(result.name == "charRange!('a','b')", "charRange name test."); 1041 assert(result.successful, "'a-b' succeeds on inputs beginning with 'a'."); 1042 assert(result.matches == ["a"], "'a-b' matches the 'a' at the beginning."); 1043 assert(result.input == input.input, "'a-b' does not change the input."); 1044 assert(result.end == input.end+1, "'a-b' advances the index by one position."); 1045 assert(result.children is null, "'a-b' has no children."); 1046 1047 result = az(input); 1048 1049 assert(result.name == "charRange!('a','z')", "charRange name test."); 1050 assert(result.successful, "'a-z' succeeds on inputs beginning with 'abc'."); 1051 assert(result.matches == ["a"], "'a-z' matches 'a' at the beginning."); 1052 assert(result.input == input.input, "'a-z' does not change the input."); 1053 assert(result.end == input.end+1, "'a-z' advances the index by one position."); 1054 assert(result.children is null, "'a-z' has no children."); 1055 1056 input.input = "bcdef"; 1057 1058 result = aa(input); 1059 1060 assert(!result.successful, "'a-a' fails on inputs not beginning with 'a'."); 1061 assert(result.matches == ["a char between 'a' and 'a'"], "'a-a' makes no match on 'bcdef'."); 1062 assert(result.input == input.input, "'a-a' does not change the input."); 1063 assert(result.end == input.end, "'a-a' does not advances the index on 'bcdef'."); 1064 assert(result.children is null, "'a-a' has no children."); 1065 1066 result = ab(input); 1067 1068 assert(result.successful, "'a-b' succeeds on inputs beginning with 'b'."); 1069 assert(result.matches == ["b"], "'a-b' matches on 'bcdef'."); 1070 assert(result.input == input.input, "'a-b' does not change the input."); 1071 assert(result.end == input.end+1, "'a-b' advances the index by one position'."); 1072 assert(result.children is null, "'a-b' has no children."); 1073 1074 result = az(input); 1075 1076 assert(result.successful, "'a-z' succeeds on 'bcdef'."); 1077 assert(result.matches == ["b"], "'a-z' matches 'b' at the beginning of 'bcdef'."); 1078 assert(result.input == input.input, "'a-z' does not change the input."); 1079 assert(result.end == input.end+1, "'a-z' advances the index by one position."); 1080 assert(result.children is null, "'a-z' has no children."); 1081 1082 input.input = ""; 1083 1084 result = aa(input); 1085 1086 assert(!result.successful, "'a-a' fails on empty strings."); 1087 assert(result.matches == ["a char between 'a' and 'a'"], "'a-a' does not match ''."); 1088 assert(result.input == input.input, "'a-a' does not change the input."); 1089 assert(result.end == input.end, "'a-a' does not advance the index on ''."); 1090 assert(result.children is null, "'a-a' has no children."); 1091 1092 result = ab(input); 1093 1094 assert(!result.successful, "'a-b' fails on empty strings."); 1095 assert(result.matches == ["a char between 'a' and 'b'"], "'a-b' does not match ''."); 1096 assert(result.input == input.input, "'a-b' does not change the input."); 1097 assert(result.end == input.end, "'a-b' does not advance the index on ''."); 1098 assert(result.children is null, "'a-b' has no children."); 1099 1100 result = az(input); 1101 1102 assert(!result.successful, "'a-z' fails on empty strings."); 1103 assert(result.matches == ["a char between 'a' and 'z'"], "'a-z' does not match ''."); 1104 assert(result.input == input.input, "'a-z' does not change the input."); 1105 assert(result.end == input.end, "'a-z' does not advance the index on ''."); 1106 assert(result.children is null, "'a-z' has no children."); 1107 1108 input.input = "123"; 1109 1110 result = aa(input); 1111 1112 assert(!result.successful, "'a-a' fails on '123'."); 1113 assert(result.matches == ["a char between 'a' and 'a'"], "'a-a' does not match '123'."); 1114 assert(result.input == input.input, "'a-a' does not change the input."); 1115 assert(result.end == input.end, "'a-a' does not advance the index on '123'."); 1116 assert(result.children is null, "'a-a' has no children."); 1117 1118 result = ab(input); 1119 1120 assert(!result.successful, "'a-b' fails on '123'."); 1121 assert(result.matches == ["a char between 'a' and 'b'"], "'a-b' does not match '123'."); 1122 assert(result.input == input.input, "'a-b' does not change the input."); 1123 assert(result.end == input.end, "'a-b' does not advance the index on '123'."); 1124 assert(result.children is null, "'a-b' has no children."); 1125 1126 result = az(input); 1127 1128 assert(!result.successful, "'a-z' fails on '123'."); 1129 assert(result.matches == ["a char between 'a' and 'z'"], "'a-z' does not match '123'."); 1130 assert(result.input == input.input, "'a-z' does not change the input."); 1131 assert(result.end == input.end, "'a-z' does not advance the index on '123'."); 1132 assert(result.children is null, "'a-z' has no children."); 1133 1134 1135 foreach(dchar ch; 0..256*(128+64+16+8)) 1136 assert(allChars(to!string(ch)).successful); 1137 1138 assert(!allChars("").successful); 1139 } 1140 1141 /** 1142 eps matches the empty string (usually denoted by the Greek letter 'epsilon') and always succeeds. 1143 It's equivalent to literal!"" (for example, it creates a match of [""]: one match, the empty string). 1144 */ 1145 ParseTree eps(ParseTree p) 1146 { 1147 return ParseTree("eps", true, [""], p.input, p.end, p.end); 1148 } 1149 1150 ParseTree eps(string input) 1151 { 1152 return eps(ParseTree("",false,[], input)); 1153 } 1154 1155 string eps(GetName g) 1156 { 1157 return "eps"; 1158 } 1159 1160 unittest // 'eps' unit test 1161 { 1162 ParseTree input = ParseTree("input", true, [], "abcdef", 0,0, null); 1163 1164 ParseTree result = eps(input); 1165 1166 assert(result.name == "eps"); 1167 assert(result.successful, "'eps' succeeds on non-null inputs."); 1168 assert(result.matches == [""], "'eps' matches '' at the beginning."); 1169 assert(result.input == input.input, "'eps' does not change the input."); 1170 assert(result.end == input.end+0, "'eps' does not advance the index."); 1171 assert(result.children is null, "'eps' has no children."); 1172 1173 input.input = ""; 1174 1175 result = eps(input); 1176 assert(result.name == "eps"); 1177 assert(result.successful, "'eps' succeeds on empty strings."); 1178 assert(result.matches == [""], "'eps' matches '' at the beginning, even on empty strings."); 1179 assert(result.input == input.input, "'eps' does not change the input."); 1180 assert(result.end == input.end+0, "'eps' does not advance the index."); 1181 assert(result.children is null, "'eps' has no children."); 1182 } 1183 1184 1185 /** 1186 Basic operator: it matches if all its subrules (stored in the rules template parameter tuple) match 1187 the input successively. Its subrules parse trees are stored as its children and its matches field 1188 will contain all its subrules matches, in order. 1189 1190 ---- 1191 alias and!(literal!"abc", charRange!('a','z')) rule; // abc followed by any letter between a and z. 1192 ParseTree input = ParseTree("",false,[],"abcd"); // low-level plumbing, 1193 // the rules described here act on ParseTree's not strings. 1194 // It's equivalent to "abcd" as input 1195 auto result = rule(input); 1196 1197 assert(result.successful); // OK, 'abc' followed by 'd' 1198 assert(result.matches == ["abc", "d"]); // stores the matches 1199 assert(result.children.length == 2); // two children, the result of "abc" on "abcd" and the result of [a-z] on "d" 1200 1201 input.input = "abc"; // changing the input string; 1202 assert(!rule(input)).successful); // NOK, abc alone 1203 input.input = "ab"; 1204 assert(!rule(input)).successful); // NOK, does not begin by abc 1205 ---- 1206 1207 If it fails, the last children will contain the failed node. That way, when printing, as sort of diagnostic is given: 1208 1209 ---- 1210 alias and!(literal!"abc", charRange!('a','z')) rule; // 'abc[a-z]', aka 'abc' followed by any letter between 'a' and 'z'. 1211 ParseTree input = ParseTree("",false,[],"abc1"); // equivalent to "abc1" 1212 1213 auto failure = rule(input); 1214 writeln(failure); 1215 /+ 1216 writes: 1217 and (failure) 1218 +-literal(abc) [0, 3]["abc"] 1219 +-charRange(a,z) failure at line 0, col 3, after "abc" a char between 'a' and 'z', but got "1" 1220 +/ 1221 ---- 1222 1223 So we know the global 'and' failed, that the first sub-rule ('abc') succeeded on input[0..3] with "abc" 1224 and that the second subrule ('[a-z]') failed at position 3 (so, on '1'). 1225 */ 1226 template and(rules...) if (rules.length > 0) 1227 { 1228 1229 string ctfeGetNameAnd() 1230 { 1231 string name = "and!("; 1232 foreach(i,rule; rules) 1233 name ~= __traits(identifier, rule) // because using getName!(rule) causes an infinite loop during compilation 1234 // for recursive rules 1235 ~ (i < rules.length -1 ? ", " : ""); 1236 name ~= ")"; 1237 return name; 1238 } 1239 1240 enum name = ctfeGetNameAnd(); 1241 1242 ParseTree and(ParseTree p) 1243 { 1244 bool keepNode(ParseTree node) 1245 { 1246 return node.name.startsWith("keep!(") 1247 || ( !node.name.startsWith("discard!(") 1248 //&& !node.name.startsWith("drop!(") 1249 && node.matches !is null 1250 //&& node.begin != node.end 1251 ); 1252 } 1253 1254 version (tracer) 1255 { 1256 incTraceLevel(); 1257 } 1258 1259 ParseTree result = ParseTree(name, false, [], p.input, p.end, p.end, []); 1260 1261 foreach(i,r; rules) 1262 { 1263 version (tracer) 1264 { 1265 if (shouldTrace(getName!(r)(), p)) 1266 trace(traceMsg(result, name, getName!(r)())); 1267 } 1268 ParseTree temp = r(result); 1269 result.end = temp.end; 1270 if (temp.successful) 1271 { 1272 if (keepNode(temp)) 1273 { 1274 result.matches ~= temp.matches; 1275 if (temp.name.startsWith("drop!(")) 1276 {} 1277 else if (temp.name.startsWith("propagate!(")) 1278 result.children ~= temp.children; 1279 else 1280 result.children ~= temp; 1281 } 1282 } 1283 else 1284 { 1285 result.children ~= temp;// add the failed node, to indicate which failed 1286 if (temp.matches.length > 0) 1287 result.matches ~= temp.matches[$-1]; 1288 version (tracer) 1289 { 1290 if (shouldTrace(getName!(r)(), p)) 1291 trace(traceResultMsg(result, getName!(r)())); 1292 decTraceLevel(); 1293 } 1294 return result; // and end the parsing attempt right there 1295 } 1296 } 1297 result.successful = true; 1298 version (tracer) 1299 { 1300 foreach(i, r; rules) 1301 if (shouldTrace(getName!(r)(), p)) 1302 { 1303 trace(traceResultMsg(result, name)); 1304 break; 1305 } 1306 decTraceLevel(); 1307 } 1308 return result; 1309 } 1310 1311 ParseTree and(string input) 1312 { 1313 return .and!(rules)(ParseTree("",false,[],input)); 1314 } 1315 1316 string and(GetName g) 1317 { 1318 return name; 1319 } 1320 } 1321 1322 unittest // 'and' unit test 1323 { 1324 alias literal!"abc" abc; 1325 alias literal!"de" de; 1326 alias literal!"f" f; 1327 1328 alias and!(abc) abcAnd; 1329 alias and!(abc,de) abcde; 1330 alias and!(abc,de,f) abcdef; 1331 alias and!(eps, abc, eps, de, eps, f, eps) withEps; 1332 1333 //assert(getName!(abcAnd)() == `and!(literal!("abc"))`); 1334 //assert(getName!(abcde)() == `and!(literal!("abc"), literal!("de"))`); 1335 //assert(getName!(abcdef)() == `and!(literal!("abc"), literal!("de"), literal!("f"))`); 1336 1337 ParseTree input = ParseTree("",false,[], "abcdefghi"); 1338 1339 ParseTree result = abcAnd(input); 1340 1341 assert(result.successful, "and!('abc') parses 'abcdefghi'"); 1342 assert(result.matches == ["abc"], "and!('abc') matches 'abc' at the beginning of 'abcdefghi'"); 1343 assert(result.end == input.end+3, "and!('abc') advances the index by 'abc' size (3)."); 1344 assert(result.children == [abc(input)], "and!('abc') has one child: the one created by 'abc'."); 1345 1346 result = abcde(input); 1347 1348 assert(result.successful, "and!('abc','de') parses 'abcdefghi'"); 1349 assert(result.matches == ["abc","de"], 1350 "and!('abc','de') matches 'abc' and 'de' at the beginning of 'abcdefghi'"); 1351 assert(result.end == input.end+5, "and!('abc','de') advances the index by 3+2 positions."); 1352 assert(result.children == [abc(input), de(abc(input))], 1353 "and!('abc','de') has two children, created by 'abc' and 'de'."); 1354 1355 result = abcdef(input); 1356 1357 assert(result.successful, "and!('abc','de','f') parses 'abcdefghi'"); 1358 assert(result.matches == ["abc","de","f"], 1359 "and!('abc','de','f') matches 'abcdef' at the beginning of 'abcdefghi'"); 1360 assert(result.end == input.end+6, "and!('abc','de','f') advances the index by 3+2+1 positions."); 1361 assert(result.children == [abc(input), de(abc(input)), f(de(abc(input)))] 1362 , "and!('abc','de') has two children, created by 'abc' and 'de'."); 1363 1364 result = withEps(input); 1365 1366 assert(result.successful, "and!('','abc','','de','','f','') parses 'abcdefghi'"); 1367 assert(result.matches == ["","abc","","de","","f",""], 1368 "and!('','abc','','de','','f','') matches 'abcdef' at the beginning of 'abcdefghi'"); 1369 assert(result.end == input.end+6, 1370 "and!('','abc','','de','','f','') advances the index by 0+3+0+2+0+1+0 positions."); 1371 1372 input.input = "bcdefghi"; 1373 1374 result = abcdef(input); 1375 1376 assert(!result.successful, "'abc' 'de' 'f' fails on 'bcdefghi'"); 1377 //assert(result.matches is null, "'abc' 'de' 'f' has no match on 'bcdefghi'"); 1378 assert(result.end == input.end); 1379 assert(result.children == [abc(input)], "'abc' 'de' 'f' has one child (a failure) on 'bcdefghi'"); 1380 1381 input.input = "abc_efghi"; 1382 1383 result = abcdef(input); 1384 1385 assert(!result.successful, "'abc' 'de' 'f' fails on 'abc_efghi'"); 1386 //assert(result.matches == ["abc"], "Only the first match, from 'abc'."); 1387 assert(result.end == input.end+3, "Advances by 3 positions, due to 'abc'"); 1388 assert(result.children == [abc(input), de(abc(input))] 1389 , "'abc' 'de' 'f' has two child on 'abc_efghi', the one from 'abc' (success) and the one from 'de' (failure)."); 1390 } 1391 1392 template wrapAround(alias before, alias target, alias after) 1393 { 1394 ParseTree wrapAround(ParseTree p) 1395 { 1396 ParseTree temp = before(p); 1397 if (!temp.successful) 1398 return temp; 1399 1400 ParseTree result = target(temp); 1401 if (!result.successful) 1402 return result; 1403 result.begin = temp.begin; 1404 1405 temp = after(result); 1406 if (!temp.successful) 1407 return temp; 1408 1409 result.end = temp.end; 1410 return result; 1411 } 1412 1413 ParseTree wrapAround(string input) 1414 { 1415 return .wrapAround!(before, target, after)(ParseTree("",false,[],input)); 1416 } 1417 1418 string wrapAround(GetName g) 1419 { 1420 return "wrapAround!(" ~ getName!(before)() ~ 1421 ", " ~ getName!(target)() ~ 1422 ", " ~ getName!(after)() ~ ")"; 1423 } 1424 } 1425 1426 /** 1427 Basic operator: it matches if one of its subrules (stored in the rules template parameter tuple) match 1428 the input. The subrules are tested in order, from rules[0] to rules[$-1]. 1429 1430 The matching subrule parse trees is stored as its only child and its matches field 1431 will contain all the subrule matches, in order. 1432 1433 ---- 1434 alias or!(literal!"abc", charRange!('a','z')) rule; // abc or, failing that, any letter between a and z. 1435 ParseTree input = ParseTree("",false,[],"defg"); // low-level plumbing, the rules described here act on ParseTree's not strings. 1436 // It's equivalent to "defg" as input 1437 auto result = rule(input); 1438 1439 assert(result.successful); // OK 1440 assert(result.matches == ["d"]); // stores the (in this case) only match 1441 assert(result.children.length == 1); // one child, the result of "abc" or [a-z], depending on which rule succeeded. 1442 1443 input.input = "abc"; // changing the input string; 1444 assert(rule(input)).successful); // Still OK 1445 input.input = "1abc"; 1446 assert(!rule(input)).successful); // NOK, does not begin by abc nor by [a-z] 1447 ---- 1448 1449 If it fails, the last children will contain the failed node that matched furthest (longes match). 1450 That way, when printing, as sort of diagnostic is given: 1451 1452 ---- 1453 alias or!(literal!"abc", and!(literal!"ab", charRange!('0','9'))) rule; // 'abc' or 'ab[0-9]' 1454 ParseTree input = ParseTree("",false,[],"abd"); // equivalent to "abd" 1455 1456 auto failure = rule(input); 1457 writeln(failure); 1458 /+ 1459 or (failure) 1460 +-and (failure) 1461 +-literal(ab) [0, 2]["ab"] 1462 +-charRange(0,9) failure at line 0, col 2, after "ab" expected a char between '0' and '9', but got "d" 1463 +/ 1464 ---- 1465 1466 So we know 'or' failed, that the 'and' sub-rule had the longest match, matching 'ab' and failing for [0-9] on index 2. 1467 */ 1468 template or(rules...) if (rules.length > 0) 1469 { 1470 string ctfeGetNameOr() 1471 { 1472 string name = "or!("; 1473 foreach(i,rule; rules) 1474 name ~= getName!(rule) 1475 ~ (i < rules.length -1 ? ", " : ""); 1476 name ~= ")"; 1477 return name; 1478 } 1479 1480 enum name = ctfeGetNameOr(); 1481 1482 ParseTree or(ParseTree p) 1483 { 1484 // error-management 1485 ParseTree longestFail = ParseTree(name, false, [], p.input, p.end, 0); 1486 string[] errorStrings; 1487 size_t errorStringChars; 1488 string orErrorString; 1489 1490 ParseTree[rules.length] results; 1491 string[rules.length] names; 1492 size_t[rules.length] failedLength; 1493 size_t maxFailedLength; 1494 1495 version (tracer) 1496 { 1497 incTraceLevel(); 1498 } 1499 1500 // Real 'or' loop 1501 foreach(i,r; rules) 1502 { 1503 version (tracer) 1504 { 1505 if (shouldTrace(getName!(r)(), p)) 1506 trace(traceMsg(p, name, getName!(r)())); 1507 } 1508 ParseTree temp = r(p); 1509 if (temp.successful) 1510 { 1511 temp.children = [temp]; 1512 temp.name = name; 1513 version (tracer) 1514 { 1515 if (shouldTrace(getName!(r)(), p)) 1516 trace(traceResultMsg(temp, getName!(r)())); 1517 decTraceLevel(); 1518 } 1519 return temp; 1520 } 1521 else 1522 { 1523 version (tracer) 1524 { 1525 if (shouldTrace(getName!(r)(), p)) 1526 trace(traceResultMsg(temp, getName!(r)())); 1527 } 1528 enum errName = " (" ~ getName!(r)() ~")"; 1529 failedLength[i] = temp.end; 1530 if (temp.end >= longestFail.end) 1531 { 1532 maxFailedLength = temp.end; 1533 longestFail = temp; 1534 names[i] = errName; 1535 results[i] = temp; 1536 1537 if (temp.end == longestFail.end) 1538 errorStringChars += (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4; 1539 else 1540 errorStringChars = (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4; 1541 } 1542 // Else, this error parsed less input than another one: we discard it. 1543 } 1544 } 1545 version (tracer) 1546 { 1547 decTraceLevel(); 1548 } 1549 1550 1551 // All subrules failed, we will take the longest match as the result 1552 // If more than one node failed at the same (farthest) position, we concatenate their error messages 1553 1554 1555 char[] errString;// = new char[](errorStringChars); 1556 errString.length = errorStringChars; 1557 uint start = 0; 1558 foreach(i; 0..rules.length) 1559 { 1560 if (failedLength[i] == maxFailedLength && results[i].matches.length > 0) 1561 { 1562 auto temp = results[i]; 1563 auto len = temp.matches[$-1].length; 1564 auto nlen = names[i].length; 1565 errString[start .. start+len] = temp.matches[$-1][]; 1566 errString[start+len .. start+len+names[i].length] = names[i][]; 1567 errString[start+len+nlen .. start+len+nlen+4] = " or "; 1568 start += len + names[i].length + 4; 1569 } 1570 } 1571 orErrorString = cast(string)(errString[0..$-4]); 1572 1573 longestFail.matches = longestFail.matches.length == 0 ? [orErrorString] : 1574 longestFail.matches[0..$-1] // discarding longestFail error message 1575 ~ [orErrorString]; // and replacing it by the new, concatenated one. 1576 longestFail.name = name; 1577 longestFail.begin = p.end; 1578 return longestFail; 1579 } 1580 1581 ParseTree or(string input) 1582 { 1583 return .or!(rules)(ParseTree("",false,[],input)); 1584 } 1585 1586 string or(GetName g) 1587 { 1588 return name; 1589 } 1590 } 1591 1592 unittest // 'or' unit test 1593 { 1594 alias charRange!('a','b') ab; 1595 alias charRange!('c','d') cd; 1596 1597 alias or!(ab) abOr; 1598 alias or!(ab,cd) abOrcd; 1599 1600 assert(getName!(ab)() == "charRange!('a','b')"); 1601 assert(getName!(cd)() == "charRange!('c','d')"); 1602 1603 ParseTree input = ParseTree("",false,[], "abcdefghi"); 1604 1605 ParseTree result = abOr(input); 1606 1607 assert(result.name == "or!(charRange!('a','b'))", "or name test."); 1608 assert(result.successful, "or!([a-b]) parses 'abcdefghi'"); 1609 assert(result.matches == ["a"], "or!([a-b]) matches 'a' at the beginning of 'abcdefghi'"); 1610 assert(result.end == input.end+1, "or!([a-b]) advances the index by 'a' size (1)."); 1611 assert(result.children == [ab(input)], "or!([a-b]) has one child: the one created by '[a-b]'."); 1612 1613 result = abOrcd(input); 1614 1615 assert(result.name == "or!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 1616 assert(result.successful, "or!([a-b],[c-d]) parses 'abcdefghi'"); 1617 assert(result.matches == ["a"], "or!([a-b],[c-d]) matches 'a' at the beginning of 'abcdefghi'"); 1618 assert(result.end == input.end+1, "or!([a-b],[c-d]) advances the index by 1 position."); 1619 1620 assert(result.children == [ab(input)], "or!([a-b],[c-d]) has one child, created by [a-b]."); 1621 1622 input.input = "cdefghi"; 1623 1624 result = abOrcd(input); 1625 1626 assert(result.name == "or!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 1627 assert(result.successful, "or!([a-b],[c-d]) parses 'cdefghi'"); 1628 assert(result.matches == ["c"], "or!([a-b],[c-d]) matches 'c' at the beginning of 'cdefghi'"); 1629 assert(result.end == input.end+1, "or!([a-b],[c-d]) advances the index by 1 position."); 1630 1631 assert(result.children == [cd(input)], "or!([a-b],[c-d]) has one child, created by [c-d]."); 1632 1633 input.input = "_abcdefghi"; 1634 1635 result = abOrcd(input); 1636 1637 assert(!result.successful, "or!([a-b],[c-d]) fails on '_abcdefghi'"); 1638 assert(result.end == input.end+0, "or!([a-b],[c-d]) does not advance the index."); 1639 assert(result.matches == 1640 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 1641 , "or!([a-b],[c-d]) error message. |" ~ result.matches[0]~ "|"); 1642 1643 input.input = ""; 1644 1645 result = abOrcd(input); 1646 1647 assert(!result.successful, "or!([a-b],[c-d]) fails on and empty input"); 1648 assert(result.end == input.end+0, "or!([a-b],[c-d]) does not advance the index."); 1649 assert(result.matches == 1650 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 1651 , "or!([a-b],[c-d]) error message."); 1652 } 1653 1654 /** 1655 Basic operator: it matches if one of its subrules (stored in the rules template parameter tuple) match 1656 the input. All subrules are tested and the one producing the longest match is taken. 1657 1658 The longest matching subrule's parse tree is stored as its only child and its matches field 1659 will contain all the subrule matches, in order. 1660 */ 1661 template longest_match(rules...) if (rules.length > 0) 1662 { 1663 string ctfeGetNameOr() 1664 { 1665 string name = "longest_match!("; 1666 foreach(i,rule; rules) 1667 name ~= getName!(rule) 1668 ~ (i < rules.length -1 ? ", " : ""); 1669 name ~= ")"; 1670 return name; 1671 } 1672 1673 enum name = ctfeGetNameOr(); 1674 1675 ParseTree longest_match(ParseTree p) 1676 { 1677 // error-management 1678 ParseTree longest, longestFail = ParseTree(name, false, [], p.input, p.end, 0); 1679 string[] errorStrings; 1680 size_t errorStringChars; 1681 string orErrorString; 1682 1683 ParseTree[rules.length] results; 1684 string[rules.length] names; 1685 size_t[rules.length] failedLength; 1686 size_t maxFailedLength; 1687 1688 version (tracer) 1689 { 1690 incTraceLevel(); 1691 } 1692 1693 // Real 'longest_match' loop 1694 foreach(i,r; rules) 1695 { 1696 version (tracer) 1697 { 1698 if (shouldTrace(getName!(r)(), p)) 1699 trace(traceMsg(p, name, getName!(r)())); 1700 } 1701 ParseTree temp = r(p); 1702 version (tracer) 1703 { 1704 if (shouldTrace(getName!(r)(), p)) 1705 trace(traceResultMsg(temp, getName!(r)())); 1706 } 1707 1708 if (temp.successful) 1709 { 1710 if (temp.end > longest.end) 1711 longest = temp; 1712 // Else, this rule parsed less input than another one: we discard it. 1713 } 1714 else 1715 { 1716 enum errName = " (" ~ getName!(r)() ~")"; 1717 failedLength[i] = temp.end; 1718 if (temp.end >= longestFail.end) 1719 { 1720 maxFailedLength = temp.end; 1721 longestFail = temp; 1722 names[i] = errName; 1723 results[i] = temp; 1724 1725 if (temp.end == longestFail.end) 1726 errorStringChars += (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4; 1727 else 1728 errorStringChars = (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4; 1729 } 1730 // Else, this error parsed less input than another one: we discard it. 1731 } 1732 } 1733 version (tracer) 1734 { 1735 decTraceLevel(); 1736 } 1737 if (longest.successful) 1738 { 1739 longest.children = [longest]; 1740 longest.name = name; 1741 return longest; 1742 } 1743 1744 // All subrules failed, we will take the longest match as the result 1745 // If more than one node failed at the same (farthest) position, we concatenate their error messages 1746 1747 char[] errString;// = new char[](errorStringChars); 1748 errString.length = errorStringChars; 1749 uint start = 0; 1750 foreach(i; 0..rules.length) 1751 { 1752 if (failedLength[i] == maxFailedLength && results[i].matches.length > 0) 1753 { 1754 auto temp = results[i]; 1755 auto len = temp.matches[$-1].length; 1756 auto nlen = names[i].length; 1757 errString[start .. start+len] = temp.matches[$-1][]; 1758 errString[start+len .. start+len+names[i].length] = names[i][]; 1759 errString[start+len+nlen .. start+len+nlen+4] = " or "; 1760 start += len + names[i].length + 4; 1761 } 1762 } 1763 orErrorString = cast(string)(errString[0..$-4]); 1764 1765 longestFail.matches = longestFail.matches.length == 0 ? [orErrorString] : 1766 longestFail.matches[0..$-1] // discarding longestFail error message 1767 ~ [orErrorString]; // and replacing it by the new, concatenated one. 1768 longestFail.name = name; 1769 longestFail.begin = p.end; 1770 return longestFail; 1771 } 1772 1773 ParseTree longest_match(string input) 1774 { 1775 return .or!(rules)(ParseTree("",false,[],input)); 1776 } 1777 1778 string longest_match(GetName g) 1779 { 1780 return name; 1781 } 1782 } 1783 1784 unittest // 'longest_match' unit test 1785 { 1786 alias charRange!('a','b') ab; 1787 alias charRange!('c','d') cd; 1788 1789 alias longest_match!(ab) abOr; 1790 alias longest_match!(ab,cd) abOrcd; 1791 1792 assert(getName!(ab)() == "charRange!('a','b')"); 1793 assert(getName!(cd)() == "charRange!('c','d')"); 1794 1795 // These are the same tests as for 'or': 1796 1797 ParseTree input = ParseTree("",false,[], "abcdefghi"); 1798 1799 ParseTree result = abOr(input); 1800 1801 assert(result.name == "longest_match!(charRange!('a','b'))", "or name test."); 1802 assert(result.successful, "longest_match!([a-b]) parses 'abcdefghi'"); 1803 assert(result.matches == ["a"], "longest_match!([a-b]) matches 'a' at the beginning of 'abcdefghi'"); 1804 assert(result.end == input.end+1, "longest_match!([a-b]) advances the index by 'a' size (1)."); 1805 assert(result.children == [ab(input)], "longest_match!([a-b]) has one child: the one created by '[a-b]'."); 1806 1807 result = abOrcd(input); 1808 1809 assert(result.name == "longest_match!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 1810 assert(result.successful, "longest_match!([a-b],[c-d]) parses 'abcdefghi'"); 1811 assert(result.matches == ["a"], "longest_match!([a-b],[c-d]) matches 'a' at the beginning of 'abcdefghi'"); 1812 assert(result.end == input.end+1, "longest_match!([a-b],[c-d]) advances the index by 1 position."); 1813 1814 assert(result.children == [ab(input)], "longest_match!([a-b],[c-d]) has one child, created by [a-b]."); 1815 1816 input.input = "cdefghi"; 1817 1818 result = abOrcd(input); 1819 1820 assert(result.name == "longest_match!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 1821 assert(result.successful, "longest_match!([a-b],[c-d]) parses 'cdefghi'"); 1822 assert(result.matches == ["c"], "longest_match!([a-b],[c-d]) matches 'c' at the beginning of 'cdefghi'"); 1823 assert(result.end == input.end+1, "longest_match!([a-b],[c-d]) advances the index by 1 position."); 1824 1825 assert(result.children == [cd(input)], "longest_match!([a-b],[c-d]) has one child, created by [c-d]."); 1826 1827 input.input = "_abcdefghi"; 1828 1829 result = abOrcd(input); 1830 1831 assert(!result.successful, "longest_match!([a-b],[c-d]) fails on '_abcdefghi'"); 1832 assert(result.end == input.end+0, "longest_match!([a-b],[c-d]) does not advance the index."); 1833 assert(result.matches == 1834 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 1835 , "longest_match!([a-b],[c-d]) error message. |" ~ result.matches[0]~ "|"); 1836 1837 input.input = ""; 1838 1839 result = abOrcd(input); 1840 1841 assert(!result.successful, "longest_match!([a-b],[c-d]) fails on and empty input"); 1842 assert(result.end == input.end+0, "longest_match!([a-b],[c-d]) does not advance the index."); 1843 assert(result.matches == 1844 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 1845 , "longest_match!([a-b],[c-d]) error message."); 1846 1847 // Now test longest match behaviour: 1848 1849 input.input = "aaaccccb"; 1850 result = or!(oneOrMore!(literal!("a")), and!(oneOrMore!(literal!("a")), literal!("cc")))(input); 1851 assert(result.matches == ["a", "a", "a"], "or! takes the first matching rule."); 1852 result = longest_match!(oneOrMore!(literal!("a")), and!(oneOrMore!(literal!("a")), literal!("cc")))(input); 1853 assert(result.matches == ["a", "a", "a", "cc"], "longest_match! takes the longest matching rule."); 1854 } 1855 1856 1857 /** 1858 Compile-time switch trie from Brian Schott 1859 */ 1860 class Trie(V) : TrieNode!(V) 1861 { 1862 /** 1863 * Adds the given value to the trie with the given key 1864 */ 1865 void add(string key, V value) pure 1866 { 1867 TrieNode!(V) current = this; 1868 foreach(dchar keyPart; key) 1869 { 1870 if ((keyPart in current.children) is null) 1871 { 1872 auto node = new TrieNode!(V); 1873 current.children[keyPart] = node; 1874 current = node; 1875 } 1876 else 1877 current = current.children[keyPart]; 1878 } 1879 current.value = value; 1880 } 1881 } 1882 1883 class TrieNode(V) 1884 { 1885 V value; 1886 TrieNode!(V)[dchar] children; 1887 } 1888 1889 string printCaseStatements(V)(TrieNode!(V) node, string indentString) 1890 { 1891 string s = ""; 1892 string idnt = indentString; 1893 1894 void incIndent() { idnt ~= " "; } 1895 void decIndent() { idnt = idnt[2..$]; } 1896 1897 void put(string k) { s ~= idnt ~ k; } 1898 void append(string k) { s ~= k;} 1899 1900 foreach(dchar k, TrieNode!(V) v; node.children) 1901 { 1902 put("case '"); 1903 switch(k) 1904 { 1905 case '\n': append("\\n"); break; 1906 case '\t': append("\\t"); break; 1907 case '\r': append("\\r"); break; 1908 case 92: append("\\"); break; 1909 default: append(to!string(k)); 1910 } 1911 append("':\n"); 1912 incIndent(); 1913 1914 put("temp.end++;\n"); 1915 if (v.children.length > 0) 1916 { 1917 put("if (temp.end >= temp.input.length)\n"); 1918 put("{\n"); 1919 incIndent(); 1920 1921 if (node.children[k].value.length != 0) 1922 put("return ParseTree(name, true, [`" ~ node.children[k].value ~ "`], temp.input, p.end, temp.end)"); 1923 else 1924 put("return ParseTree(name, false, [failString], p.input, p.end, p.end)"); 1925 1926 append(";\n"); 1927 decIndent(); 1928 1929 put("}\n"); 1930 put("switch (temp.input[temp.end])\n"); 1931 put("{\n"); 1932 1933 incIndent(); 1934 append(printCaseStatements(v, idnt)); 1935 1936 put("default:\n"); 1937 incIndent(); 1938 if (v.value.length != 0) 1939 put("return ParseTree(name, true, [`" ~ v.value ~ "`], temp.input, p.end, temp.end)"); 1940 else 1941 put("return ParseTree(name, false, [failString], p.input, p.end, p.end)"); 1942 append(";\n"); 1943 decIndent(); 1944 decIndent(); 1945 put("}\n"); 1946 } 1947 else 1948 { 1949 if (v.value.length != 0) 1950 put("return ParseTree(name, true, [`" ~ v.value ~ "`], temp.input, p.end, temp.end)"); 1951 else 1952 put("return ParseTree(name, false, [failString], p.input, p.end, p.end)"); 1953 append(";\n"); 1954 } 1955 } 1956 return s; 1957 } 1958 1959 string generateCaseTrie(string[] args ...) 1960 { 1961 auto t = new Trie!(string); 1962 foreach(arg; args) 1963 { 1964 t.add(arg, arg); 1965 } 1966 return printCaseStatements(t, ""); 1967 } 1968 1969 /** 1970 or special case for literal list ("abstract"/"alias"/...) 1971 */ 1972 template keywords(kws...) if (kws.length > 0) 1973 { 1974 string ctfeGetNameKeywords() 1975 { 1976 string name= "keywords!("; 1977 foreach(i,kw;kws) 1978 name ~= "\"" ~ kw ~ "\""~ (i < kws.length -1 ? ", " : ""); 1979 name ~= ")"; 1980 return name; 1981 } 1982 1983 string ctfeConcatKeywords() 1984 { 1985 string s = "["; 1986 foreach(i, k; kws) 1987 { 1988 s ~= "\"" ~ k ~ "\""; 1989 if (i < kws.length - 1) 1990 s ~= ", "; 1991 } 1992 return s ~= "]"; 1993 } 1994 1995 enum name = ctfeGetNameKeywords(); 1996 enum failString = "one among " ~ ctfeConcatKeywords(); 1997 1998 ParseTree keywords(ParseTree p) 1999 { 2000 string keywordCode(string[] keywords) 2001 { 2002 string result; 2003 foreach(kw; keywords) 2004 result ~= "if (p.end+"~to!string(kw.length) ~ " <= p.input.length " 2005 ~" && p.input[p.end..p.end+"~to!string(kw.length)~"]==`" 2006 ~kw~"`) return ParseTree(`" 2007 ~name~"`,true,[`"~kw~"`],p.input,p.end,p.end+" 2008 ~to!string(kw.length)~");\n"; 2009 2010 result ~= "return ParseTree(`"~name~"`,false,[`" ~ failString ~ "`],p.input,p.end,p.end);"; 2011 2012 return result; 2013 } 2014 2015 static if (KEYWORDS == IFCHAIN) 2016 { 2017 mixin(keywordCode([kws])); 2018 } 2019 else static if (KEYWORDS == TRIE) 2020 { 2021 auto temp = p; 2022 if (p.end < p.input.length) // is this conditional required? 2023 { 2024 switch(p.input[p.end]) 2025 { 2026 mixin(generateCaseTrie([kws])); 2027 mixin("default: return ParseTree(`"~name~"`,false,[`" ~ failString ~ "`],p.input,p.end,p.end);"); 2028 } 2029 } 2030 else 2031 { 2032 mixin("return ParseTree(`"~name~"`,false,[`" ~ failString ~ "`],p.input,p.end,p.end);"); 2033 } 2034 } 2035 } 2036 2037 ParseTree keywords(string input) 2038 { 2039 return .keywords!(kws)(ParseTree("",false,[],input)); 2040 } 2041 2042 string keywords(GetName g) 2043 { 2044 return name; 2045 } 2046 } 2047 2048 unittest 2049 { 2050 alias keywords!("abc","de","f") kw; 2051 2052 assert(getName!(kw)() == `keywords!("abc", "de", "f")`); 2053 2054 ParseTree input = ParseTree("",false,[],"abcd"); 2055 2056 ParseTree result = kw(input); 2057 2058 assert(result.name == `keywords!("abc", "de", "f")`, "keywords name test."); 2059 assert(result.successful, "keywords success on `abcd`"); 2060 assert(result.matches == ["abc"], "keywords matches `abc` on `abcd`"); 2061 assert(result.end == input.end+3, "keywords advances the index by 3 positions."); 2062 assert(result.children is null, "No children for `keywords`."); 2063 2064 input.input = "def"; 2065 2066 result = kw(input); 2067 2068 assert(result.successful, "keywords success on `def`"); 2069 assert(result.matches == ["de"], "keywords matches `de` on `def`"); 2070 assert(result.end == input.end+2, "keywords advances the index by 2 positions."); 2071 assert(result.children is null, "No children for `keywords`."); 2072 2073 2074 input.input = "ab_def"; 2075 2076 result = kw(input); 2077 2078 assert(!result.successful, "keywords fails on `ab_def`."); 2079 assert(result.matches == [`one among ["abc", "de", "f"]`], "keywords error message." ~ result.matches[0]); 2080 assert(result.end == input.end, "keywords does not advance the index."); 2081 assert(result.children is null, "No children for `keywords`."); 2082 2083 input.input = ""; 2084 2085 result = kw(input); 2086 2087 assert(!result.successful, "keywords fails on an empty input."); 2088 assert(result.matches == [`one among ["abc", "de", "f"]`], "keywords error message."); 2089 assert(result.end == input.end, "keywords does not advance the index."); 2090 assert(result.children is null, "No children for `keywords`."); 2091 } 2092 2093 2094 /** 2095 Tries to match subrule 'r' zero or more times. It always succeeds, since if 'r' fails 2096 from the very beginning, it matched 'r' zero times... 2097 2098 Its matches are those of its subrules (they might be different for each match) and its 2099 children are all the parse trees returned by the successive application of 'r'. 2100 2101 ---- 2102 alias zeroOrMore!(or!(literal!"abc", literal!"d")) rule; // in PEG-speak: '("abc" / "d")*' 2103 ParseTree input = ParseTree("",false,[], "abcdabce"); 2104 2105 ParseTree result = rule(input); 2106 2107 assert(result.successful); 2108 assert(result.matches == ["abc", "d", "abc"]); 2109 assert(result.end == 7); // matched "abcdabce"[0..7] => "abcdabc". "e" at the end is not part of the parse tree. 2110 assert(result.children.length == 3); 2111 writeln(result); 2112 /+ 2113 writes: 2114 zeroOrMore [0, 7]["abc", "d", "abc"] 2115 +-or [0, 3]["abc"] 2116 | +-literal(abc) [0, 3]["abc"] 2117 +-or [3, 4]["d"] 2118 | +-literal(d) [3, 4]["d"] 2119 +-or [4, 7]["abc"] 2120 +-literal(abc) [4, 7]["abc"] 2121 +/ 2122 ---- 2123 2124 So we know the first child used the 'literal!"abc"' sub-rule and matched input[0..3]. 2125 The second matched input[3..4] and the third input[4..7]. 2126 2127 ---- 2128 input = ParseTree("",false,[], "efgh"); 2129 result = rule(input); 2130 assert(result.successful); // succeed, even though all patterns failed. 2131 assert(result.children.length == 0); 2132 ---- 2133 */ 2134 template zeroOrMore(alias r) 2135 { 2136 enum name = "zeroOrMore!(" ~ getName!(r) ~ ")"; 2137 2138 ParseTree zeroOrMore(ParseTree p) 2139 { 2140 auto result = ParseTree(name, true, [], p.input, p.end, p.end); 2141 version (tracer) 2142 { 2143 incTraceLevel(); 2144 if (shouldTrace(getName!(r)(), p)) 2145 trace(traceMsg(result, name, getName!(r)())); 2146 } 2147 auto temp = r(result); 2148 while(temp.successful 2149 && (temp.begin < temp.end // To avoid infinite loops on epsilon-matching rules 2150 || temp.name.startsWith("discard!("))) 2151 { 2152 result.matches ~= temp.matches; 2153 result.children ~= temp; 2154 result.end = temp.end; 2155 version (tracer) 2156 { 2157 if (shouldTrace(getName!(r)(), p)) 2158 trace(traceMsg(result, name, getName!(r)())); 2159 } 2160 temp = r(result); 2161 } 2162 result.successful = true; 2163 version (tracer) 2164 { 2165 if (shouldTrace(getName!(r)(), p)) 2166 trace(traceResultMsg(result, getName!(r)())); 2167 decTraceLevel(); 2168 } 2169 return result; 2170 } 2171 2172 ParseTree zeroOrMore(string input) 2173 { 2174 return .zeroOrMore!(r)(ParseTree("",false,[],input)); 2175 } 2176 2177 string zeroOrMore(GetName g) 2178 { 2179 return name; 2180 } 2181 } 2182 2183 unittest // 'zeroOrMore' unit test 2184 { 2185 alias literal!"a" a; 2186 alias literal!"abc" abc; 2187 alias charRange!('a','z') az; 2188 2189 alias zeroOrMore!(a) as; 2190 alias zeroOrMore!(abc) abcs; 2191 alias zeroOrMore!(az) azs; 2192 2193 assert(getName!(as)() == `zeroOrMore!(literal!("a"))`); 2194 assert(getName!(abcs)() == `zeroOrMore!(literal!("abc"))`); 2195 assert(getName!(azs)() == `zeroOrMore!(charRange!('a','z'))`); 2196 2197 assert(as("").successful); 2198 assert(as("a").successful); 2199 assert(as("aa").successful); 2200 assert(as("aaa").successful); 2201 assert(as("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").successful); 2202 assert(as("b").successful); 2203 2204 ParseTree result = as("aaa"); 2205 2206 assert(result.name == `zeroOrMore!(literal!("a"))`); 2207 assert(result.successful); 2208 assert(result.matches == ["a","a","a"]); 2209 assert(result.begin == 0); 2210 assert(result.end == 3); 2211 assert(result.children.length == 3); 2212 assert(result.children == [ a("aaa"), a(a("aaa")), a(a(a("aaa")))]); 2213 2214 assert(abcs("").successful); 2215 assert(abcs("abc").successful); 2216 assert(abcs("abcabc").successful); 2217 assert(abcs("abcabcabc").successful); 2218 assert(abcs("abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc").successful); 2219 assert(abcs("ab").successful); 2220 2221 result = abcs("abcabcabc"); 2222 2223 assert(result.name == `zeroOrMore!(literal!("abc"))`); 2224 assert(result.successful); 2225 assert(result.matches == ["abc","abc","abc"]); 2226 assert(result.begin == 0); 2227 assert(result.end == 3*3); 2228 assert(result.children.length == 3); 2229 assert(result.children == [ abc("abcabcabc"), abc(abc("abcabcabc")), abc(abc(abc("abcabcabc")))]); 2230 2231 assert(azs("").successful); 2232 assert(azs("a").successful); 2233 assert(azs("abc").successful); 2234 assert(azs("abcdefghijklmnoqrstuvwxyz").successful); 2235 assert(azs("abcdefghijklmnoqrstuvwxyz 1234567890").successful); 2236 2237 result = azs("abc"); 2238 2239 assert(result.name == `zeroOrMore!(charRange!('a','z'))`); 2240 assert(result.successful); 2241 assert(result.matches == ["a","b","c"]); 2242 assert(result.begin == 0); 2243 assert(result.end == 3); 2244 assert(result.children.length == 3); 2245 assert(result.children == [ az("abc"), az(az("abc")), az(az(az("abc")))]); 2246 } 2247 2248 /** 2249 Tries to match subrule 'r' one or more times. If 'r' fails 2250 from the very beginning, it fails and else succeeds. 2251 2252 Its matches are those of its subrules (they might be different for each match) and its 2253 children are all the parse trees returned by the successive application of 'r'. 2254 2255 ---- 2256 alias oneOrMore!(or!(literal!"abc", literal!"d")) rule; // in PEG-speak: '("abc" / "d")*' 2257 ParseTree input = ParseTree("",false,[], "abcdabce"); 2258 2259 ParseTree result = rule(input); 2260 2261 assert(result.successful); 2262 assert(result.matches == ["abc", "d", "abc"]); 2263 assert(result.end == 7); // matched "abcdabce"[0..7] => "abcdabc". "e" at the end is not part of the parse tree. 2264 assert(result.children.length == 3); 2265 writeln(result); 2266 /+ 2267 writes: 2268 oneOrMore [0, 7]["abc", "d", "abc"] 2269 +-or [0, 3]["abc"] 2270 | +-literal(abc) [0, 3]["abc"] 2271 +-or [3, 4]["d"] 2272 | +-literal(d) [3, 4]["d"] 2273 +-or [4, 7]["abc"] 2274 +-literal(abc) [4, 7]["abc"] 2275 +/ 2276 ---- 2277 2278 So we know the first child used the 'literal!"abc"' sub-rule and matched input[0..3]. 2279 The second matched input[3..4] and the third input[4..7]. 2280 2281 ---- 2282 input = ParseTree("",false,[], "efgh"); 2283 result = rule(input); 2284 assert(!result.successful); // fails, since it failed on the first try. 2285 ---- 2286 */ 2287 template oneOrMore(alias r) 2288 { 2289 enum name = "oneOrMore!(" ~ getName!(r) ~ ")"; 2290 2291 ParseTree oneOrMore(ParseTree p) 2292 { 2293 auto result = ParseTree(name, false, [], p.input, p.end, p.end); 2294 version (tracer) 2295 { 2296 incTraceLevel(); 2297 if (shouldTrace(getName!(r)(), p)) 2298 trace(traceMsg(result, name, getName!(r)())); 2299 } 2300 auto temp = r(result); 2301 2302 if (!temp.successful) 2303 { 2304 result.matches = temp.matches; 2305 result.children = [temp]; 2306 result.end = temp.end; 2307 } 2308 else 2309 { 2310 while( temp.successful 2311 && (temp.begin < temp.end // To avoid infinite loops on epsilon-matching rules 2312 || temp.name.startsWith("discard!("))) 2313 { 2314 result.matches ~= temp.matches; 2315 result.children ~= temp; 2316 result.end = temp.end; 2317 version (tracer) 2318 { 2319 if (shouldTrace(getName!(r)(), p)) 2320 trace(traceMsg(result, name, getName!(r)())); 2321 } 2322 temp = r(result); 2323 } 2324 result.successful = true; 2325 } 2326 version (tracer) 2327 { 2328 if (shouldTrace(getName!(r)(), p)) 2329 trace(traceResultMsg(result, getName!(r)())); 2330 decTraceLevel(); 2331 } 2332 return result; 2333 } 2334 2335 ParseTree oneOrMore(string input) 2336 { 2337 return .oneOrMore!(r)(ParseTree("",false,[],input)); 2338 } 2339 2340 string oneOrMore(GetName g) 2341 { 2342 return name; 2343 } 2344 } 2345 2346 unittest // 'oneOrMore' unit test 2347 { 2348 alias literal!"a" a; 2349 alias literal!"abc" abc; 2350 alias charRange!('a','z') az; 2351 2352 alias oneOrMore!(a) as; 2353 alias oneOrMore!(abc) abcs; 2354 alias oneOrMore!(az) azs; 2355 2356 assert(getName!(as)() == `oneOrMore!(literal!("a"))`); 2357 assert(getName!(abcs)() == `oneOrMore!(literal!("abc"))`); 2358 assert(getName!(azs)() == `oneOrMore!(charRange!('a','z'))`); 2359 2360 assert(!as("").successful); 2361 assert(as("a").successful); 2362 assert(as("aa").successful); 2363 assert(as("aaa").successful); 2364 assert(as("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").successful); 2365 assert(!as("b").successful); 2366 2367 ParseTree result = as("aaa"); 2368 2369 assert(result.name == `oneOrMore!(literal!("a"))`); 2370 assert(result.successful); 2371 assert(result.matches == ["a","a","a"]); 2372 assert(result.begin == 0); 2373 assert(result.end == 3); 2374 assert(result.children.length == 3); 2375 assert(result.children == [ a("aaa"), a(a("aaa")), a(a(a("aaa")))]); 2376 2377 assert(!abcs("").successful); 2378 assert(abcs("abc").successful); 2379 assert(abcs("abcabc").successful); 2380 assert(abcs("abcabcabc").successful); 2381 assert(abcs("abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc").successful); 2382 assert(!abcs("ab").successful); 2383 2384 result = abcs("abcabcabc"); 2385 2386 assert(result.name == `oneOrMore!(literal!("abc"))`); 2387 assert(result.successful); 2388 assert(result.matches == ["abc","abc","abc"]); 2389 assert(result.begin == 0); 2390 assert(result.end == 3*3); 2391 assert(result.children.length == 3); 2392 assert(result.children == [ abc("abcabcabc"), abc(abc("abcabcabc")), abc(abc(abc("abcabcabc")))]); 2393 2394 assert(!azs("").successful); 2395 assert(azs("a").successful); 2396 assert(azs("abc").successful); 2397 assert(azs("abcdefghijklmnoqrstuvwxyz").successful); 2398 assert(azs("abcdefghijklmnoqrstuvwxyz 1234567890").successful); 2399 assert(!azs(".").successful); 2400 2401 result = azs("abc"); 2402 2403 assert(result.name == `oneOrMore!(charRange!('a','z'))`); 2404 assert(result.successful); 2405 assert(result.matches == ["a","b","c"]); 2406 assert(result.begin == 0); 2407 assert(result.end == 3); 2408 assert(result.children.length == 3); 2409 assert(result.children == [ az("abc"), az(az("abc")), az(az(az("abc")))]); 2410 } 2411 2412 /** 2413 Given a subrule 'r', represents the expression 'r?'. It tries to match 'r' and if this matches 2414 successfully, it returns this match. If 'r' failed, 'r?' is still a success, but without any child nor match. 2415 2416 ---- 2417 alias option!(literal!"abc") rule; // Aka '"abc"?' 2418 ParseTree input = ParseTree("",false,[],"abcd"); 2419 2420 ParseTree result = rule(input); 2421 assert(result.successful); 2422 assert(result.matches == ["abc"]); 2423 assert(result.children.length == 1); 2424 assert(result.children[0] == literal!"abc"(input)); 2425 ---- 2426 */ 2427 template option(alias r) 2428 { 2429 enum name = "option!(" ~ getName!(r) ~ ")"; 2430 2431 ParseTree option(ParseTree p) 2432 { 2433 version (tracer) 2434 { 2435 if (shouldTrace(getName!(r)(), p)) 2436 trace(traceMsg(p, name, getName!(r)())); 2437 } 2438 ParseTree result = r(p); 2439 if (result.successful) 2440 return ParseTree(name, true, result.matches, result.input, result.begin, result.end, [result]); 2441 else 2442 return ParseTree(name, true, [], p.input, p.end, p.end, null); 2443 } 2444 2445 ParseTree option(string input) 2446 { 2447 return .option!(r)(ParseTree("",false,[],input)); 2448 } 2449 2450 string option(GetName g) 2451 { 2452 return name; 2453 } 2454 } 2455 2456 unittest // 'option' unit test 2457 { 2458 alias literal!"a" a; 2459 alias literal!"abc" abc; 2460 2461 alias option!(a) a_; 2462 alias option!(abc) abc_; 2463 2464 assert(getName!(a_)() == `option!(literal!("a"))`); 2465 assert(getName!(abc_)() == `option!(literal!("abc"))`); 2466 2467 assert(a_("").successful); 2468 assert(a_("a").successful); 2469 assert(a_("aa").successful); 2470 assert(a_("b").successful); 2471 2472 ParseTree result = a_("a"); 2473 2474 assert(result.name == `option!(literal!("a"))`); 2475 assert(result.successful); 2476 assert(result.matches == ["a"]); 2477 assert(result.begin == 0); 2478 assert(result.end == 1); 2479 assert(result.children.length == 1); 2480 assert(result.children == [ a("a")]); 2481 2482 result = a_(""); 2483 2484 assert(result.name == `option!(literal!("a"))`); 2485 assert(result.successful); 2486 assert(result.matches is null); 2487 assert(result.begin == 0); 2488 assert(result.end == 0); 2489 assert(result.children.length == 0); 2490 2491 2492 assert(abc_("").successful); 2493 assert(abc_("abc").successful); 2494 assert(abc_("abcabc").successful); 2495 assert(abc_("ab").successful); 2496 2497 result = abc_("abcdef"); 2498 2499 assert(result.name == `option!(literal!("abc"))`); 2500 assert(result.successful); 2501 assert(result.matches == ["abc"]); 2502 assert(result.begin == 0); 2503 assert(result.end == 3); 2504 assert(result.children.length == 1); 2505 assert(result.children == [ abc("abcdef")]); 2506 2507 result = abc_("def"); 2508 2509 assert(result.name == `option!(literal!("abc"))`); 2510 assert(result.successful); 2511 assert(result.matches is null); 2512 assert(result.begin == 0); 2513 assert(result.end == 0); 2514 assert(result.children.length == 0); 2515 } 2516 2517 /** 2518 Tries 'r' on the input. If it succeeds, the rule also succeeds, without consuming any input. 2519 If 'r' fails, then posLookahead!r also fails. Low-level implementation of '&r'. 2520 */ 2521 template posLookahead(alias r) 2522 { 2523 enum name = "posLookahead!(" ~ getName!(r) ~ ")"; 2524 2525 ParseTree posLookahead(ParseTree p) 2526 { 2527 version (tracer) 2528 { 2529 if (shouldTrace(getName!(r)(), p)) 2530 trace(traceMsg(p, name, getName!(r)())); 2531 } 2532 ParseTree temp = r(p); 2533 if (temp.successful) 2534 return ParseTree(name, temp.successful, [], p.input, p.end, p.end); 2535 else 2536 return ParseTree(name, temp.successful, [temp.matches[$-1]], p.input, p.end, p.end); 2537 } 2538 2539 ParseTree posLookahead(string input) 2540 { 2541 return .posLookahead!(r)(ParseTree("",false,[],input)); 2542 } 2543 2544 string posLookahead(GetName g) 2545 { 2546 return name; 2547 } 2548 } 2549 2550 unittest // 'posLookahead' unit test 2551 { 2552 alias literal!"a" a; 2553 alias literal!"abc" abc; 2554 2555 alias posLookahead!(a) a_; 2556 alias posLookahead!(abc) abc_; 2557 2558 assert(getName!(a_)() == `posLookahead!(literal!("a"))`); 2559 assert(getName!(abc_)() == `posLookahead!(literal!("abc"))`); 2560 2561 assert(!a_("").successful); 2562 assert(a_("a").successful); 2563 assert(a_("aa").successful); 2564 assert(!a_("b").successful); 2565 2566 ParseTree result = a_("a"); 2567 2568 assert(result.name == `posLookahead!(literal!("a"))`); 2569 assert(result.successful); 2570 assert(result.matches is null); 2571 assert(result.begin == 0); 2572 assert(result.end == 0); 2573 assert(result.children.length == 0); 2574 2575 result = a_(""); 2576 2577 assert(result.name == `posLookahead!(literal!("a"))`); 2578 assert(!result.successful); 2579 assert(result.matches == [`"a"`], "posLookahead error message."); 2580 assert(result.begin == 0); 2581 assert(result.end == 0); 2582 assert(result.children.length == 0); 2583 2584 assert(!abc_("").successful); 2585 assert(abc_("abc").successful); 2586 assert(abc_("abcabc").successful); 2587 assert(!abc_("ab").successful); 2588 2589 result = abc_("abcdef"); 2590 2591 assert(result.name == `posLookahead!(literal!("abc"))`); 2592 assert(result.successful); 2593 assert(result.matches is null); 2594 assert(result.begin == 0); 2595 assert(result.end == 0); 2596 assert(result.children.length == 0); 2597 2598 result = abc_("def"); 2599 2600 assert(result.name == `posLookahead!(literal!("abc"))`); 2601 assert(!result.successful); 2602 assert(result.matches == [`"abc"`], "posLookahead error message."); 2603 assert(result.begin == 0); 2604 assert(result.end == 0); 2605 assert(result.children.length == 0); 2606 } 2607 2608 /** 2609 Tries 'r' on the input. If it fails, the rule succeeds, without consuming any input. 2610 If 'r' succeeds, then negLookahead!r fails. Low-level implementation of '!r'. 2611 */ 2612 template negLookahead(alias r) 2613 { 2614 enum name = "negLookahead!(" ~ getName!(r) ~ ")"; 2615 2616 ParseTree negLookahead(ParseTree p) 2617 { 2618 version (tracer) 2619 { 2620 if (shouldTrace(getName!(r)(), p)) 2621 trace(traceMsg(p, name, getName!(r)())); 2622 } 2623 ParseTree temp = r(p); 2624 if (temp.successful) 2625 return ParseTree(name, false, ["anything but \"" ~ p.input[temp.begin..temp.end] ~ "\""], p.input, p.end, p.end); 2626 else 2627 return ParseTree(name, true, [], p.input, p.end, p.end); 2628 } 2629 2630 ParseTree negLookahead(string input) 2631 { 2632 return .negLookahead!(r)(ParseTree("",false,[],input)); 2633 } 2634 2635 string negLookahead(GetName g) 2636 { 2637 return name; 2638 } 2639 } 2640 2641 unittest // 'negLookahead' unit test 2642 { 2643 alias literal!"a" a; 2644 alias literal!"abc" abc; 2645 2646 alias negLookahead!(a) a_; 2647 alias negLookahead!(abc) abc_; 2648 2649 assert(getName!(a_)() == `negLookahead!(literal!("a"))`); 2650 assert(getName!(abc_)() == `negLookahead!(literal!("abc"))`); 2651 2652 assert(a_("").successful); 2653 assert(!a_("a").successful); 2654 assert(!a_("aa").successful); 2655 assert(a_("b").successful); 2656 2657 ParseTree result = a_("a"); 2658 2659 assert(result.name == `negLookahead!(literal!("a"))`); 2660 assert(!result.successful); 2661 assert(result.matches == [`anything but "a"`]); 2662 assert(result.begin == 0); 2663 assert(result.end == 0); 2664 assert(result.children.length == 0); 2665 2666 result = a_(""); 2667 2668 assert(result.name == `negLookahead!(literal!("a"))`); 2669 assert(result.successful); 2670 assert(result.matches is null); 2671 assert(result.begin == 0); 2672 assert(result.end == 0); 2673 assert(result.children.length == 0); 2674 2675 assert(abc_("").successful); 2676 assert(!abc_("abc").successful); 2677 assert(!abc_("abcabc").successful); 2678 assert(abc_("ab").successful); 2679 2680 result = abc_("abcdef"); 2681 2682 assert(result.name == `negLookahead!(literal!("abc"))`); 2683 assert(!result.successful); 2684 assert(result.matches == [`anything but "abc"`]); 2685 assert(result.begin == 0); 2686 assert(result.end == 0); 2687 assert(result.children.length == 0); 2688 2689 result = abc_("def"); 2690 2691 assert(result.name == `negLookahead!(literal!("abc"))`); 2692 assert(result.successful); 2693 assert(result.matches is null); 2694 assert(result.begin == 0); 2695 assert(result.end == 0); 2696 assert(result.children.length == 0); 2697 } 2698 2699 /** 2700 Internal helper template, to get a parse tree node with a name. For example, given: 2701 ---- 2702 alias or!(literal!("abc"), charRange!('0','9')) myRule; 2703 ---- 2704 2705 myRule gives nodes named "or", since its the parent rule. If you want nodes to be named "myRule", 2706 use named. named just overwrites the original root node name, the children are left untouched. 2707 2708 See_Also: defined. 2709 2710 ---- 2711 alias or!(literal!("abc"), charRange!('0','9')) rule; 2712 alias named!(rule, "myRule") myRule; 2713 2714 auto input = "abc3"; 2715 auto p1 = rule(input); 2716 auto p2 = myRule(input); 2717 2718 // They are both successful 2719 assert(p1.successful && p2.successful); 2720 assert(p1.matches == p2.matches); 2721 // But the names are different 2722 assert(p1.name == `or!(literal!("abc"), charRange!('0','9'))`); 2723 assert(p2.name == `myRule`); 2724 // Same children: 2725 assert(p2.children == p1.children); 2726 2727 ---- 2728 */ 2729 template named(alias r, string name) 2730 { 2731 ParseTree named(ParseTree p) 2732 { 2733 ParseTree result = r(p); 2734 result.name = name; 2735 return result; 2736 } 2737 2738 ParseTree named(string input) 2739 { 2740 return .named!(r,name)(ParseTree("",false,[],input)); 2741 } 2742 2743 string named(GetName g) 2744 { 2745 return name; 2746 } 2747 } 2748 2749 unittest // 'named' unit test 2750 { 2751 alias or!(literal!("abc"), charRange!('0','9')) rule; 2752 alias named!(rule, "myRule") myRule; 2753 2754 assert(getName!(rule)() == `or!(literal!("abc"), charRange!('0','9'))`); 2755 assert(getName!(myRule)() == "myRule"); 2756 2757 // Equality on success (except for the name) 2758 ParseTree result = rule("abc0"); 2759 ParseTree myResult = myRule("abc0"); 2760 2761 assert(myResult.successful == result.successful); 2762 assert(myResult.name == "myRule"); 2763 assert(myResult.matches == result.matches); 2764 assert(myResult.begin == result.begin); 2765 assert(myResult.end == result.end); 2766 assert(myResult.children == result.children); 2767 2768 // Equality on failure (except for the name) 2769 result = rule("_abc"); 2770 myResult = myRule("_abc"); 2771 2772 assert(myResult.successful == result.successful); 2773 assert(myResult.name == "myRule"); 2774 assert(myResult.matches == result.matches); 2775 assert(myResult.begin == result.begin); 2776 assert(myResult.end == result.end); 2777 assert(myResult.children == result.children); 2778 } 2779 2780 /** 2781 Internal helper template, to get a parse tree node with a name, while keeping the original node (see also named). 2782 For example, given: 2783 ---- 2784 alias or!(literal!("abc"), charRange!('0','9')) myRule; 2785 ---- 2786 2787 myRule gives nodes named "or", since its the parent rule. If you want nodes to be named "myRule", 2788 use defined. Contrary to named (see before), the original node is pushed as the child. 2789 2790 See_Also: named. 2791 2792 ---- 2793 alias or!(literal!("abc"), charRange!('0','9')) rule; 2794 alias defined!(rule, "myRule") myRule; 2795 2796 auto input = "abc3"; 2797 auto p1 = rule(input); 2798 auto p2 = myRule(input); 2799 2800 // They are both successful 2801 assert(p1.successful && p2.successful); 2802 assert(p1.matches == p2.matches); 2803 // But the names are different 2804 assert(p1.name == `or!(literal!("abc"), charRange!('0','9'))`); 2805 assert(p2.name == `myRule`); 2806 // Here the original tree is not discarded: 2807 assert(p2.children[0] == p1); 2808 ---- 2809 */ 2810 template defined(alias r, string name) 2811 { 2812 ParseTree defined(ParseTree p) 2813 { 2814 ParseTree result = r(p); 2815 result.children = [result]; 2816 result.name = name; 2817 return result; 2818 } 2819 2820 ParseTree defined(string input) 2821 { 2822 return .defined!(r,name)(ParseTree("",false,[],input)); 2823 } 2824 2825 string defined(GetName g) 2826 { 2827 return name; 2828 } 2829 } 2830 2831 unittest // 'defined' unit test 2832 { 2833 alias or!(literal!("abc"), charRange!('0','9')) rule; 2834 alias defined!(rule, "myRule") myRule; 2835 2836 assert(getName!(rule)() == `or!(literal!("abc"), charRange!('0','9'))`); 2837 assert(getName!(myRule)() == "myRule"); 2838 2839 // Equality on success (except for the name) 2840 ParseTree result = rule("abc0"); 2841 ParseTree myResult = myRule("abc0"); 2842 2843 assert(myResult.successful && result.successful); 2844 assert(myResult.name == "myRule"); 2845 assert(myResult.matches == result.matches); 2846 assert(myResult.begin == result.begin); 2847 assert(myResult.end == result.end); 2848 assert(myResult.children[0] == result); 2849 2850 // Equality on failure (except for the name) 2851 result = rule("_abc"); 2852 myResult = myRule("_abc"); 2853 2854 assert(!myResult.successful && !result.successful); 2855 assert(myResult.name == "myRule"); 2856 assert(myResult.matches == result.matches); 2857 assert(myResult.begin == result.begin); 2858 assert(myResult.end == result.end); 2859 assert(myResult.children[0] == result); 2860 } 2861 2862 /** 2863 Low-level representation for the expression 'r {act}'. That is, it applies rule 'r' 2864 on the input and then calls 'act' on the resulting ParseTree. 2865 */ 2866 template action(alias r, alias act) 2867 { 2868 ParseTree action(ParseTree p) 2869 { 2870 return act(r(p)); 2871 } 2872 2873 ParseTree action(string input) 2874 { 2875 return .action!(r,act)(ParseTree("",false,[],input)); 2876 } 2877 2878 string action(GetName g) 2879 { 2880 enum name = "action!("~ getName!(r)() ~ ", " ~ __traits(identifier, act) ~ ")"; 2881 return name; 2882 } 2883 } 2884 2885 unittest // 'action' unit test 2886 { 2887 ParseTree foo(ParseTree p) 2888 { 2889 p.matches ~= p.matches; // doubling matches 2890 return p; 2891 } 2892 2893 alias literal!("abc") abc; 2894 2895 alias action!(abc, foo) abcfoo; 2896 2897 assert(getName!(abcfoo) == `action!(literal!("abc"), foo)`); 2898 2899 ParseTree result = abcfoo("abc"); 2900 ParseTree reference = abc("abc"); 2901 2902 assert(result.successful == reference.successful); 2903 assert(result.successful); 2904 assert(result.matches == reference.matches ~ reference.matches); 2905 assert(result.begin == reference.begin); 2906 assert(result.end == reference.end); 2907 assert(result.children == reference.children); 2908 2909 // On failure 2910 result = abcfoo("_abc"); 2911 reference = abc("_abc"); 2912 2913 assert(result.successful == reference.successful); 2914 assert(!result.successful); 2915 assert(result.matches == reference.matches ~ reference.matches); 2916 assert(result.begin == reference.begin); 2917 assert(result.end == reference.end); 2918 assert(result.children == reference.children); 2919 } 2920 2921 /** 2922 Concatenates a ParseTree's matches as one match and discards its children. Equivalent to the expression '~r'. 2923 */ 2924 template fuse(alias r) 2925 { 2926 ParseTree fuse(ParseTree p) 2927 { 2928 p = r(p); 2929 if(p.successful) 2930 { 2931 if (p.matches.length != 0) 2932 p.matches = [std.array.join(p.matches)]; 2933 2934 p.children = null; // also discard children 2935 } 2936 return p; 2937 } 2938 2939 ParseTree fuse(string input) 2940 { 2941 return .fuse!(r)(ParseTree("",false,[],input)); 2942 } 2943 2944 string fuse(GetName g) 2945 { 2946 enum name = "fuse!(" ~ getName!(r)() ~ ")"; 2947 return name; 2948 } 2949 } 2950 2951 unittest // 'fuse' unit test 2952 { 2953 alias oneOrMore!(literal!("abc")) abcs; 2954 2955 alias fuse!(abcs) f; 2956 2957 assert(getName!(f) == `fuse!(oneOrMore!(literal!("abc")))`); 2958 2959 ParseTree result = f("abcabcabc"); 2960 ParseTree reference = abcs("abcabcabc"); 2961 2962 assert(result.successful == reference.successful); 2963 assert(result.successful); 2964 assert(reference.matches == ["abc", "abc", "abc"]); 2965 assert(result.matches == ["abcabcabc"]); 2966 assert(result.begin == reference.begin); 2967 assert(result.end == reference.end); 2968 assert(result.children is null); 2969 2970 // On failure 2971 result = f("_abc"); 2972 reference = abcs("_abc"); 2973 2974 assert(result.successful == reference.successful); 2975 assert(!result.successful); 2976 assert(result.matches == reference.matches); 2977 assert(result.begin == reference.begin); 2978 assert(result.end == reference.end); 2979 assert(result.children == reference.children); 2980 2981 alias discard!(literal!("abc")) dabc; 2982 alias fuse!(dabc) f2; 2983 2984 result = f2("abcabc"); 2985 reference = dabc("abcabc"); 2986 2987 assert(result.successful); 2988 assert(result.matches is null); 2989 assert(result.begin == reference.begin); 2990 assert(result.end == reference.end); 2991 assert(result.children == reference.children); 2992 } 2993 2994 /** 2995 Calls 'r' on the input and then discards its children nodes. 2996 */ 2997 template discardChildren(alias r) 2998 { 2999 ParseTree discardChildren(ParseTree p) 3000 { 3001 p = r(p); 3002 p.children = null; 3003 return p; 3004 } 3005 3006 ParseTree discardChildren(string input) 3007 { 3008 return .discardChildren!(r)(ParseTree("",false,[],input)); 3009 } 3010 3011 string discardChildren(GetName g) 3012 { 3013 enum name = "discardChildren!(" ~ getName!(r)() ~ ")"; 3014 return name; 3015 } 3016 } 3017 3018 /** 3019 Calls 'r' on the input and then discards its matches. 3020 */ 3021 template discardMatches(alias r) 3022 { 3023 ParseTree discardMatches(ParseTree p) 3024 { 3025 p = r(p); 3026 if (p.successful) 3027 p.matches = null; 3028 return p; 3029 } 3030 3031 ParseTree discardMatches(string input) 3032 { 3033 return .discardMatches!(r)(ParseTree("",false,[],input)); 3034 } 3035 3036 string discardMatches(GetName g) 3037 { 3038 enum name = "discardMatches!(" ~ getName!(r)() ~ ")"; 3039 return name; 3040 } 3041 } 3042 3043 /** 3044 Calls 'r' on the input and then discard everything 'r' returned: no children, no match and index 3045 put at the end of the match. It's the low-level engine behind ':r'. 3046 */ 3047 template discard(alias r) 3048 { 3049 ParseTree discard(ParseTree p) 3050 { 3051 ParseTree result = r(p); 3052 result.name = "discard!(" ~ getName!(r)() ~ ")"; 3053 //result.begin = result.end; 3054 result.children = null; 3055 if (result.successful) 3056 result.matches = null;//to keep error messages, if any 3057 3058 return result; 3059 } 3060 3061 ParseTree discard(string input) 3062 { 3063 return .discard!(r)(ParseTree("",false,[],input)); 3064 } 3065 3066 string discard(GetName g) 3067 { 3068 return "discard!(" ~ getName!(r)() ~ ")"; 3069 } 3070 } 3071 3072 unittest // 'discard' unit test 3073 { 3074 alias literal!"abc" abc; 3075 alias oneOrMore!abc abcs; 3076 alias discard!(literal!("abc")) dabc; 3077 alias discard!(oneOrMore!(literal!("abc")))dabcs; 3078 3079 ParseTree reference = abc("abc"); 3080 ParseTree result =dabc("abc"); 3081 3082 assert(result.successful == reference.successful); 3083 assert(result.successful); 3084 assert(result.name == `discard!(literal!("abc"))`); 3085 assert(result.matches is null); 3086 assert(result.begin == reference.begin); 3087 assert(result.end == reference.end); 3088 assert(result.children is null); 3089 3090 reference = abcs("abcabcabc"); 3091 result = dabcs("abcabcabc"); 3092 3093 assert(result.successful == reference.successful); 3094 assert(result.successful); 3095 assert(result.name == `discard!(oneOrMore!(literal!("abc")))`); 3096 assert(result.matches is null); 3097 assert(result.begin == reference.begin); 3098 assert(result.end == reference.end); 3099 assert(result.children is null); 3100 3101 // On failure 3102 reference = abcs(""); 3103 result = dabcs(""); 3104 3105 assert(result.successful == reference.successful); 3106 assert(!result.successful); 3107 assert(result.name == `discard!(oneOrMore!(literal!("abc")))`); 3108 assert(result.matches == [`"abc"`], "discard error message."); 3109 assert(result.begin == reference.begin); 3110 assert(result.end == reference.end); 3111 assert(result.children is null); 3112 3113 // Action on 'and' 3114 alias and!(abc,dabc,abc) discardMiddle; 3115 3116 result = discardMiddle("abcabcabc"); 3117 assert(result.successful); 3118 assert(result.matches == ["abc", "abc"]); 3119 assert(result.begin == 0); 3120 assert(result.end == 3*3); 3121 assert(result.children.length == 2); 3122 assert(result.children == [abc("abcabcabc"), abc(abc(abc("abcabcabc")))]); 3123 } 3124 3125 /** 3126 Calls 'r' on the input and then discards everything 'r' did, except its matches (so that 3127 they propagate upwards). Equivalent to ';r'. 3128 */ 3129 template drop(alias r) 3130 { 3131 ParseTree drop(ParseTree p) 3132 { 3133 ParseTree result = r(p); 3134 //result.begin = result.end; 3135 result.children = null; 3136 if (result.successful) 3137 result.name = "drop!(" ~ getName!(r)() ~ ")"; 3138 return result; 3139 } 3140 3141 ParseTree drop(string input) 3142 { 3143 return .drop!(r)(ParseTree("",false,[],input)); 3144 } 3145 3146 string drop(GetName g) 3147 { 3148 return "drop!(" ~ getName!(r)() ~ ")"; 3149 } 3150 } 3151 3152 unittest // 'drop' unit test 3153 { 3154 alias literal!"abc" abc; 3155 alias oneOrMore!abc abcs; 3156 alias drop!(literal!("abc")) dabc; 3157 alias drop!(oneOrMore!(literal!("abc"))) dabcs; 3158 3159 ParseTree reference = abc("abc"); 3160 ParseTree result =dabc("abc"); 3161 3162 assert(result.successful == reference.successful); 3163 assert(result.successful); 3164 assert(result.name == `drop!(literal!("abc"))`); 3165 assert(result.matches == reference.matches); 3166 assert(result.begin == reference.begin); 3167 assert(result.end == reference.end); 3168 assert(result.children is null); 3169 3170 reference = abcs("abcabcabc"); 3171 result = dabcs("abcabcabc"); 3172 3173 assert(result.successful == reference.successful); 3174 assert(result.successful); 3175 assert(result.name == `drop!(oneOrMore!(literal!("abc")))`); 3176 assert(result.matches == reference.matches); 3177 assert(result.begin == reference.begin); 3178 assert(result.end == reference.end); 3179 assert(result.children is null); 3180 3181 // On failure 3182 reference = abcs(""); 3183 result = dabcs(""); 3184 3185 assert(result.successful == reference.successful); 3186 assert(!result.successful); 3187 assert(result.name == reference.name); 3188 assert(result.matches == [`"abc"`], "'drop' error message."); 3189 assert(result.begin == reference.begin); 3190 assert(result.end == reference.end); 3191 assert(result.children is null); 3192 3193 // Action on 'and' 3194 alias and!(abc,dabc,abc) discardMiddle; 3195 3196 result = discardMiddle("abcabcabc"); 3197 assert(result.successful); 3198 assert(result.matches == ["abc", "abc", "abc"], "3 matches."); 3199 assert(result.begin == 0); 3200 assert(result.end == 3*3); 3201 assert(result.children.length == 2, "but only 2 children."); 3202 assert(result.children == [abc("abcabcabc"), abc(abc(abc("abcabcabc")))]); 3203 } 3204 3205 /** 3206 Makes r disappear in a sequence, letting its children take its place. It's equivalent 3207 to the '%' operator. Given A <- B %C D and C <- E F, a successful parse for A will 3208 generate a three with four children: B, E, F and D parse trees. 3209 */ 3210 template propagate(alias r) 3211 { 3212 ParseTree propagate(ParseTree p) 3213 { 3214 ParseTree result = r(p); 3215 if (result.successful) 3216 result.name = "propagate!(" ~ getName!(r)() ~ ")"; 3217 return result; 3218 } 3219 3220 ParseTree propagate(string input) 3221 { 3222 return .propagate!(r)(ParseTree("",false,[],input)); 3223 } 3224 3225 string propagate(GetName g) 3226 { 3227 return "propagate!(" ~ getName!(r)() ~ ")"; 3228 } 3229 } 3230 3231 /** 3232 Makes 'r's result be kept when it would be discarded by the tree-decimation done by a grammar. 3233 Equivalent to '^r'. 3234 */ 3235 template keep(alias r) 3236 { 3237 ParseTree keep(ParseTree p) 3238 { 3239 ParseTree result = r(p); 3240 if (result.successful) 3241 { 3242 result.children = [result]; 3243 result.name = "keep!(" ~ getName!(r)() ~ ")"; 3244 } 3245 return result; 3246 } 3247 3248 ParseTree keep(string input) 3249 { 3250 return .keep!(r)(ParseTree("",false,[],input)); 3251 } 3252 3253 string keep(GetName g) 3254 { 3255 return "keep!(" ~ getName!(r)() ~ ")"; 3256 } 3257 } 3258 3259 unittest // 'keep' unit test 3260 { 3261 // Grammar mimicry 3262 struct KeepTest 3263 { 3264 static bool isRule(string s) 3265 { 3266 if (s == "A" || s == "KA") 3267 return true; 3268 else 3269 return false; 3270 } 3271 3272 mixin decimateTree; 3273 3274 // Equivalent to A <- 'a' 'b' 3275 static ParseTree A(string s) 3276 { 3277 return decimateTree(named!(and!(literal!"a", literal!"b"), "A")(s)); 3278 } 3279 3280 // Here we use keep to protect 'b' 3281 // Equivalent to KA <- 'a' ^'b' 3282 static ParseTree KA(string s) 3283 { 3284 return decimateTree(named!(and!(literal!"a", keep!(literal!"b")), "KA")(s)); 3285 } 3286 } 3287 3288 ParseTree reference = KeepTest.A("abc"); 3289 ParseTree result = KeepTest.KA("abc"); 3290 3291 assert(result.successful == reference.successful); 3292 assert(result.matches == reference.matches); 3293 assert(result.matches == ["a","b"]); 3294 assert(result.begin == reference.begin); 3295 assert(result.end == reference.end); 3296 assert(reference.children.length == 0); 3297 assert(result.children.length == 1); 3298 assert(result.children == [literal!("b")(literal!("a")("abc"))], "'b' node was kept."); 3299 } 3300 3301 /* ****************** predefined rules ******************** */ 3302 3303 alias named!(or!(literal!("\r\n"), literal!("\n"), literal!("\r")), "endOfLine") endOfLine; /// predefined end-of-line parser 3304 alias endOfLine eol; /// helper alias. 3305 3306 alias or!(literal!(" "), literal!("\t"), literal!("\v")) space; /// predefined space-recognizing parser (space or tabulation). 3307 alias named!(literal!"\t", "tab") tab; /// A parser recognizing \t (tabulation) 3308 alias named!(fuse!(discardChildren!(oneOrMore!space)), 3309 "spaces") spaces; /// aka '~space+' 3310 alias or!(space, endOfLine) blank; /// Any blank char (spaces or end of line). 3311 alias named!(discard!(zeroOrMore!blank), 3312 "spacing") spacing; /// The basic space-management parser: discard zero or more blank spaces. 3313 3314 alias charRange!('0', '9') digit; /// Decimal digit: [0-9] 3315 alias named!(fuse!(discardChildren!(oneOrMore!digit)), "digits") digits; /// [0-9]+ 3316 3317 alias or!(charRange!('0','9'), charRange!('a','f'), charRange!('A', 'F')) hexDigit; /// Hexadecimal digit: [0-9a-fA-F] 3318 3319 alias charRange!('a', 'z') alpha; /// [a-z] 3320 alias charRange!('A', 'Z') Alpha; /// [A-Z] 3321 3322 alias and!(oneOrMore!(or!(alpha, Alpha, literal!("_"))), zeroOrMore!(or!(digit, alpha, Alpha, literal!("_")))) ident; 3323 alias named!(fuse!(discardChildren!ident), 3324 "identifier") identifier; /// [a-zA-Z_][a-zA-Z_0-9]*, the basic C-family identifier 3325 alias named!(fuse!(discardChildren!(and!(identifier, zeroOrMore!(and!(literal!".", identifier))))), 3326 "qualifiedIdentifier") qualifiedIdentifier; /// qualified identifiers (ids separated by dots: abd.def.g). 3327 3328 alias named!(literal!"/", "slash") slash; /// A parser recognizing '/' 3329 alias named!(literal!"\\", "backslash") backslash; /// A parser recognizing '\' 3330 alias named!(literal!"'", "quote") quote; /// A parser recognizing ' (single quote) 3331 alias named!(literal!"\"", "doublequote") doublequote; /// A parser recognizing " (double quote) 3332 alias named!(literal!"`", "backquote") backquote; /// A parser recognizing $(BACKTICK) (backquote) 3333 3334 /// A list of elem's separated by sep's. One element minimum. 3335 template list(alias elem, alias sep) 3336 { 3337 alias named!(spaceAnd!(oneOrMore!blank, and!(elem, zeroOrMore!(spaceAnd!(discardMatches!(sep), elem)))), "list") list; 3338 } 3339 3340 /// A list of elem's separated by sep's. The empty list (no elem, no sep) is OK. 3341 template list0(alias elem, alias sep) 3342 { 3343 alias named!(spaceAnd!(oneOrMore!blank, option!(and!(elem, zeroOrMore!(spaceAnd!(discardMatches!(sep), elem))))), "list0") list0; 3344 } 3345 3346 template AddSpace(alias sp) 3347 { 3348 template AddSpace(alias r) 3349 { 3350 alias TypeTuple!(r, discard!sp) AddSpace; 3351 } 3352 } 3353 3354 /** 3355 The engine formerly behind the '< ' Pegged rule: all sequence subelements of a rule are interspersed 3356 with a space-consuming rule, given as the first template parameter. It's not used by Pegged anymore 3357 but can be useful for low-level code. It might become deprecated, but it's not there yet. 3358 3359 ---- 3360 alias and!(literal!"abc", literal!"def") rule1; // "abc" "def", equivalent to "abcdef" 3361 alias spaceAnd!(oneOrMore!blank, literal!"abc", literal!"def") rule2; // "abc" "def", but with spaces in-between. 3362 3363 string input1 = "abcdef"; 3364 string input2 = " abc 3365 3366 def "; // with spaces and end of line markers. 3367 3368 assert(rule1(input1).successful); // OK 3369 assert(!rule1(input2).successful); // NOK, couldn't find "def" after "abc" 3370 3371 assert(rule2(input1).successful); // OK 3372 assert(rule2(input2).successful); // Still OK 3373 assert(rule2(input2).matches == ["abc","def"]);// rule2 finds the literals among the spaces 3374 ---- 3375 3376 As you can see on the previous line, spaceAnd discards the matched spaces 3377 and returns matches only for the 'real' subrules. 3378 3379 Note: by using a non-space rule as the first template argument, 3380 you can use spaceAnd as a generic 'find these patterns, possibly separated by this pattern' rule. 3381 3382 For example, using digits as separators: 3383 ---- 3384 alias spaceAnd!(digit, literal!"abc", litera!"def") rule3; 3385 3386 ParseTree result = rule3("123abc45def67890"); 3387 assert(rule3.successful); 3388 assert(rule3.matches == ["abc", "def"]); 3389 assert(rule3.children.length == 2); 3390 3391 assert(rule3.begin == 0;) 3392 assert(rule3.end == "123abc45def67890".length); 3393 ---- 3394 */ 3395 template spaceAnd(alias sp, rules...) 3396 { 3397 alias and!(discard!(zeroOrMore!sp), staticMap!(AddSpace!(zeroOrMore!sp), rules)) spaceAnd; 3398 } 3399 3400 unittest // 'spaceAnd' unit test 3401 { 3402 alias literal!"a" a; 3403 alias literal!"b" b; 3404 3405 //Basic use 3406 alias and!(a,b) ab; 3407 alias spaceAnd!(oneOrMore!blank, a, b) a_b; 3408 3409 ParseTree reference = ab("ab"); 3410 ParseTree result = a_b("ab"); 3411 3412 assert(reference.successful); 3413 assert(result.successful); 3414 assert(result.matches == reference.matches); 3415 assert(result.begin == reference.begin); 3416 assert(result.end == reference.end); 3417 assert(result.children == reference.children); 3418 3419 reference = ab(" a b "); 3420 result = a_b( " a b "); 3421 3422 assert(!reference.successful); 3423 assert(result.successful); 3424 assert(result.matches == ["a","b"]); 3425 assert(result.begin == 0); 3426 assert(result.end == " a b ".length); 3427 assert(result.children.length == 2); 3428 3429 reference = ab("ac"); 3430 result = a_b("ac"); 3431 3432 assert(!reference.successful); 3433 assert(!result.successful); 3434 3435 reference = ab(" a c "); 3436 result = a_b(" a c "); 3437 3438 assert(!reference.successful); 3439 assert(!result.successful); 3440 3441 // With another separator than spaces 3442 alias spaceAnd!(digit, a, b) a0b; 3443 3444 assert(a0b("ab").successful); 3445 assert(!a0b(" a b ").successful); 3446 3447 result = a0b("012a34b567"); 3448 3449 assert(result.successful); 3450 assert(result.matches == ["a", "b"]); 3451 assert(result.begin == 0); 3452 assert(result.end == "012a34b567".length); 3453 assert(result.children.length == 2); 3454 } 3455 3456 /// Mixin to simplify a parse tree inside a grammar 3457 mixin template decimateTree() 3458 { 3459 static ParseTree decimateTree(ParseTree p) 3460 { 3461 if(p.children.length == 0) return p; 3462 3463 ParseTree[] filterChildren(ParseTree pt) 3464 { 3465 ParseTree[] result; 3466 foreach(child; pt.children) 3467 { 3468 import std.algorithm : startsWith; 3469 3470 if ( (isRule(child.name) && child.matches.length != 0) 3471 || !child.successful && child.children.length == 0) 3472 { 3473 child.children = filterChildren(child); 3474 result ~= child; 3475 } 3476 else if (child.name.startsWith("keep!(")) // 'keep' node are never discarded. 3477 // They have only one child, the node to keep 3478 { 3479 result ~= child.children[0]; 3480 } 3481 else // discard this node, but see if its children contain nodes to keep 3482 { 3483 result ~= filterChildren(child); 3484 } 3485 } 3486 return result; 3487 } 3488 p.children = filterChildren(p); 3489 return p; 3490 } 3491 } 3492 3493 /** 3494 Discard one-child nodes and replace them with their children. 3495 Most grammar tend to produce long one-child/one-child branches, 3496 simplifyTree compacts these. 3497 */ 3498 ParseTree simplifyTree(ParseTree p) 3499 { 3500 foreach(ref child; p.children) 3501 child = simplifyTree(child); 3502 3503 if (p.children.length != 1) 3504 return p; 3505 else // linear tree 3506 return p.children[0]; 3507 } 3508 3509 /** 3510 Returns: the number of nodes in a parse tree. 3511 */ 3512 size_t size(ParseTree p) 3513 { 3514 size_t result = 1; 3515 foreach(child; p.children) 3516 result += size(child); 3517 return result; 3518 } 3519 3520 /** 3521 Generic ParseTree modifier: 3522 predicate must be callable with a ParseTree and return a boolean. 3523 modifier must be callable with a ParseTree and return a ParseTree. 3524 3525 If predicate is true on input, modify calls modifier on input and return the result. 3526 If not, it continues with the children. 3527 */ 3528 ParseTree modify(alias predicate, alias modifier)(ParseTree input) 3529 { 3530 foreach(ref child; input.children) 3531 child = modify!(predicate, modifier)(child); 3532 3533 if (predicate(input)) 3534 input = modifier(input); 3535 3536 return input; 3537 }