1 /** 2 This module contains the engine behind Pegged, the expression templates building blocks to create a top-down 3 recursive-descent parser. 4 5 The terminals and non-terminals described here are meant to be used inside a Pegged grammar. 6 As such, they are a bit less user-friendly than what's output by pegged.grammar. 7 For example they take a ParseTree as input, not a string. 8 9 See the /docs directory for the full documentation as markdown files. 10 */ 11 module pegged.peg; 12 13 /* 14 NOTE: 15 Do not use the GrammarTester for unittesting in this module. This module needs 16 to be able to pass its unittests before the GrammarTester is even trustable. 17 Writing tests the long way is preferred here, as it will avoid the circular 18 dependency. 19 */ 20 21 import std.algorithm: map, startsWith; 22 import std.uni : isAlpha; 23 import std.array; 24 import std.conv; 25 import std.range: equal; 26 import std..string: strip; 27 import std.typetuple; 28 import std.uni: isAlpha; 29 30 /** 31 CT Switch for testing 'keywords' implementations 32 */ 33 enum 34 { 35 IFCHAIN, 36 TRIE 37 } 38 enum KEYWORDS = IFCHAIN; 39 40 /** 41 The basic parse tree, as used throughout the project. 42 You can define your own parse tree node, but respect the basic layout. 43 */ 44 struct ParseTree 45 { 46 string name; /// The node name 47 bool successful; /// Indicates whether a parsing was successful or not 48 string[] matches; /// The matched input's parts. Some expressions match at more than one place, hence matches is an array. 49 50 string input; /// The input string that generated the parse tree. Stored here for the parse tree to be passed to other expressions, as input. 51 size_t begin, end; /// Indices for the matched part (from the very beginning of the first match to the last char of the last match. 52 53 ParseTree[] children; /// The sub-trees created by sub-rules parsing. 54 55 /** 56 Basic toString for easy pretty-printing. 57 */ 58 string toString(string tabs = "") const 59 { 60 string result = name; 61 62 string childrenString; 63 bool allChildrenSuccessful = true; 64 65 foreach(i,child; children) 66 { 67 childrenString ~= tabs ~ " +-" ~ child.toString(tabs ~ ((i < children.length -1 ) ? " | " : " ")); 68 if (!child.successful) 69 allChildrenSuccessful = false; 70 } 71 72 if (successful) 73 { 74 result ~= " " ~ to!string([begin, end]) ~ to!string(matches) ~ "\n"; 75 } 76 else // some failure info is needed 77 { 78 if (allChildrenSuccessful) // no one calculated the position yet 79 { 80 Position pos = position(this); 81 string left, right; 82 83 if (pos.index < 10) 84 left = input[0 .. pos.index]; 85 else 86 left = input[pos.index-10 .. pos.index]; 87 //left = strip(left); 88 89 if (pos.index + 10 < input.length) 90 right = input[pos.index .. pos.index + 10]; 91 else 92 right = input[pos.index .. $]; 93 //right = strip(right); 94 95 result ~= " failure at line " ~ to!string(pos.line) ~ ", col " ~ to!string(pos.col) ~ ", " 96 ~ (left.length > 0 ? "after \"" ~ left ~ "\" " : "") 97 ~ "expected "~ (matches.length > 0 ? matches[$-1] : "NO MATCH") 98 ~ ", but got \"" ~ right ~ "\"\n"; 99 } 100 else 101 { 102 result ~= " (failure)\n"; 103 } 104 } 105 106 return result ~ childrenString; 107 } 108 109 @property string failMsg() 110 { 111 foreach(i, child; children) 112 { 113 if (!child.successful) 114 return child.failMsg; 115 } 116 117 if (!successful) 118 { 119 Position pos = position(this); 120 string left, right; 121 122 if (pos.index < 10) 123 left = input[0 .. pos.index]; 124 else 125 left = input[pos.index - 10 .. pos.index]; 126 left = left.replace("\n", `\n`).replace("\t", `\t`); 127 128 if (pos.index + 10 < input.length) 129 right = input[pos.index .. pos.index + 10]; 130 else 131 right = input[pos.index .. $]; 132 right = right.replace("\n", `\n`).replace("\t", `\t`); 133 134 return "Failure at line " ~ to!string(pos.line) ~ ", col " ~ to!string(pos.col) ~ ", " 135 ~ (left.length > 0 ? "after \"" ~ left ~ "\" " : "") 136 ~ "expected " ~ (matches.length > 0 ? matches[$ - 1] : "NO MATCH") 137 ~ `, but got "` ~ right ~ `"`; 138 } 139 140 return "Success"; 141 } 142 143 ParseTree dup() @property 144 { 145 ParseTree result = this; 146 result.matches = result.matches.dup; 147 result.children = map!(p => p.dup)(result.children).array(); 148 return result; 149 } 150 } 151 152 153 unittest // ParseTree testing 154 { 155 ParseTree p; 156 assert(p == p, "Self-identity on null tree."); 157 158 p = ParseTree("Name", true, ["abc", "", "def"], "input", 0, 1, null); 159 assert(p == p, "Self identity on non-null tree."); 160 161 ParseTree child = ParseTree("Child", true, ["abc", "", "def"], "input", 0, 1, null); 162 p.children = [child, child]; 163 164 ParseTree q = p; 165 assert(p == q, "Copying creates equal trees."); 166 167 assert(p.children == q.children); 168 p.children = [child, child, child]; 169 assert(q.children != p.children, "Disconnecting children."); 170 171 p = ParseTree("Name", true, ["abc", "", "def"], "input", 0, 1, null); 172 p.children = [child, child]; 173 174 q = p.dup; 175 assert(p == q, "Dupping creates equal trees."); 176 177 assert(p.children == q.children, "Equal children for dupped trees."); 178 p.children = null; 179 assert(q.children != p.children); 180 181 q.children = [p,p]; 182 assert(p != q, "Tree with different children are not equal."); 183 184 p.children = [p,p]; 185 assert(p == q, "Adding equivalent children is OK."); 186 187 p.matches = null; 188 assert(p != q, "Nulling matches makes trees unequal."); 189 190 p.matches = q.matches; 191 assert(p == q, "Copying matches makes equal trees."); 192 } 193 194 /// To compare two trees for content (not bothering with node names) 195 /// That's useful to compare the results from two different grammars. 196 bool softCompare(ParseTree p1, ParseTree p2) 197 { 198 return p1.successful == p2.successful 199 && p1.matches == p2.matches 200 && p1.begin == p2.begin 201 && p1.end == p2.end 202 && std.algorithm.equal!(softCompare)(p1.children, p2.children); // the same for children 203 } 204 205 unittest // softCompare 206 { 207 ParseTree p = ParseTree("Name", true, ["abc", "", "def"], "input", 0, 1, null); 208 ParseTree child = ParseTree("Child", true, ["abc", "", "def"], "input", 0, 1, null); 209 p.children = [child, child]; 210 211 ParseTree q = p; 212 assert(p == q, "Copy => Equal trees."); 213 assert(softCompare(p,q), "Copy => Trees equal for softCompare."); 214 215 q.name = "Another One"; 216 assert(p != q, "Name change => non-equal trees."); 217 assert(softCompare(p,q), "Name change => Trees equal for softCompare."); 218 } 219 220 /// To record a position in a text 221 struct Position 222 { 223 size_t line;/// line number (starts at 0) 224 size_t col;/// column number (starts at 0) 225 size_t index;/// index (starts at 0) 226 } 227 228 /** 229 Given an input string, returns the position corresponding to the end of the string. 230 231 For example: 232 --- 233 assert(position("abc") == Position(0,3,3)); 234 assert(position("abc 235 ") == Position(1,0,4)); 236 assert(position("abc 237 238 ") == Position(2,4,8)); 239 --- 240 */ 241 Position position(string s) 242 { 243 size_t col, line, index; 244 foreach(i,c; s) 245 { 246 if (eol(ParseTree("", false, [], s, 0,i)).successful) 247 { 248 col = 0; 249 ++line; 250 ++index; 251 } 252 else 253 { 254 ++col; 255 ++index; 256 } 257 } 258 259 return Position(line,col,index); 260 } 261 262 /** 263 Same as previous overload, but from the begin of P.input to p.end 264 */ 265 Position position(const ParseTree p) 266 { 267 return position(p.input[0..p.end]); 268 } 269 270 unittest 271 { 272 assert(position("") == Position(0,0,0), "Null string, position 0."); 273 assert(position("abc") == Position(0,3,3), "length 3 string, no line feed."); 274 assert(position("abc 275 ") == Position(1,0,4), "One end of line."); 276 assert(position("abc 277 278 ----") == Position(2,4,9), "Three lines (second one empty)."); 279 assert(position("abc 280 ---- 281 ----") == Position(2,4,13), "Three lines."); 282 assert(position(" 283 284 285 ") == Position(3,0,3), "Four lines, all empty."); 286 } 287 288 string getName(alias expr)() @property 289 { 290 static if (is(typeof( { expr(GetName()); }))) 291 return expr(GetName()); 292 else 293 return __traits(identifier, expr); 294 } 295 296 struct GetName {} 297 298 /** 299 Basic rule, that always fail without consuming. 300 */ 301 ParseTree fail(ParseTree p) 302 { 303 return ParseTree("fail", false, [], p.input, p.end, p.end, null); 304 } 305 306 /// ditto 307 ParseTree fail(string input) 308 { 309 return fail(ParseTree("", false, [], input)); 310 } 311 312 string fail(GetName g) 313 { 314 return "fail"; 315 } 316 317 unittest // 'fail' unit test 318 { 319 ParseTree input = ParseTree("input", true, [], "This is the input string.", 0,0, null); 320 ParseTree result = fail(input); 321 assert(result.name == "fail"); 322 assert(!result.successful, "'fail' fails."); 323 assert(result.matches is null, "'fail' makes no match."); 324 assert(result.input == input.input, "'fail' does not change the input."); 325 assert(result.end == input.end, "'fail' puts the index after the previous parse."); 326 assert(result.children is null, "'fail' has no children."); 327 328 result = fail("This is the input string."); 329 assert(!result.successful, "'fail' fails."); 330 } 331 332 /** 333 Matches the end of input. Fails if there is any character left. 334 */ 335 ParseTree eoi(ParseTree p) 336 { 337 if (p.end == p.input.length) 338 return ParseTree("eoi", true, [], p.input, p.end, p.end); 339 else 340 return ParseTree("eoi", false, ["end of input"], p.input, p.end, p.end); 341 } 342 343 /// ditto 344 ParseTree eoi(string input) 345 { 346 return eoi(ParseTree("", false, [], input)); 347 } 348 349 string eoi(GetName g) 350 { 351 return "eoi"; 352 } 353 354 alias eoi endOfInput; /// helper alias. 355 356 unittest // 'eoi' unit test 357 { 358 ParseTree input = ParseTree("input", true, [], "This is the input string.", 0,0, null); 359 ParseTree result = eoi(input); 360 assert(result.name == "eoi"); 361 assert(!result.successful, "'eoi' fails on non-null string."); 362 assert(result.matches == ["end of input"], "'eoi' error message."); 363 assert(result.input == input.input, "'eoi' does not change the input."); 364 assert(result.end == input.end, "'eoi' puts the index after the previous parse."); 365 assert(result.children is null, "'eoi' has no children."); 366 367 input = ParseTree("input", true, [], "", 0,0, null); 368 result = eoi(input); 369 assert(result.successful, "'eoi' succeeds on strings of length 0."); 370 result = eoi(""); 371 assert(result.successful, "'eoi' succeeds on strings of length 0."); 372 result = eoi(null); 373 assert(result.successful, "'eoi' succeeds on null strings"); 374 } 375 376 /** 377 Match any character. As long as there is at least a character left in the input, it succeeds. 378 Conversely, it fails only if called at the end of the input. 379 */ 380 ParseTree any(ParseTree p) 381 { 382 if (p.end < p.input.length) 383 return ParseTree("any", true, [p.input[p.end..p.end+1]], p.input, p.end, p.end+1); 384 else 385 return ParseTree("any", false, ["any char"], p.input, p.end, p.end); 386 } 387 388 /// ditto 389 ParseTree any(string input) 390 { 391 return any(ParseTree("", false, [], input)); 392 } 393 394 string any(GetName g) 395 { 396 return "any"; 397 } 398 399 unittest // 'any' unit test 400 { 401 ParseTree input = ParseTree("input", true, [], "This is the input string.", 0,0, null); 402 ParseTree result = any(input); 403 404 assert(result.name == "any"); 405 assert(result.successful, "'any' succeeds on non-null strings."); 406 assert(result.matches == ["T"], "'any' matches the first char in an input."); 407 assert(result.input == input.input, "'any' does not change the input."); 408 assert(result.end == input.end+1, "'any' advances the index by one position."); 409 assert(result.children is null, "'any' has no children."); 410 411 result = any("a"); 412 assert(result.successful, "'any' matches on strings of length one."); 413 assert(result.matches == ["a"], "'any' matches the first char in an input."); 414 assert(result.input == "a", "'any' does not change the input."); 415 assert(result.end == 1, "'any' advances the index by one position."); 416 assert(result.children is null, "'any' has no children."); 417 418 input = ParseTree("input", true, [], "", 0,0, null); 419 420 result = any(input); 421 assert(!result.successful, "'any' fails on strings of length 0."); 422 assert(result.matches == ["any char"], "'any' error message on strings of length 0."); 423 assert(result.end == 0, "'any' does not advance the index."); 424 425 result = any(""); 426 assert(!result.successful, "'any' fails on strings of length 0."); 427 assert(result.matches == ["any char"], "'any' error message on strings of length 0."); 428 assert(result.end == 0, "'any' does not advance the index."); 429 430 result = any(null); 431 assert(!result.successful, "'any' fails on null strings."); 432 assert(result.matches == ["any char"], "'any' error message on strings of length 0."); 433 assert(result.end == 0, "'any' does not advance the index."); 434 } 435 436 /** 437 Predefined parser: matches word boundaries, as \b for regexes. 438 */ 439 ParseTree wordBoundary(ParseTree p) 440 { 441 // TODO: I added more indexing guards and now this could probably use 442 // some simplification. Too tired to write it better. --Chad 443 bool matched = (p.end == 0 && isAlpha(p.input.front())) 444 || (p.end == p.input.length && isAlpha(p.input.back())) 445 || (p.end > 0 && isAlpha(p.input[p.end-1]) && p.end < p.input.length && !isAlpha(p.input[p.end])) 446 || (p.end > 0 && !isAlpha(p.input[p.end-1]) && p.end < p.input.length && isAlpha(p.input[p.end])); 447 if (matched) 448 return ParseTree("wordBoundary", matched, [], p.input, p.end, p.end, null); 449 else 450 return ParseTree("wordBoundary", matched, ["word boundary"], p.input, p.end, p.end, null); 451 } 452 453 /// ditto 454 ParseTree wordBoundary(string input) 455 { 456 return ParseTree("wordBoundary", isAlpha(input.front()), [], input, 0,0, null); 457 } 458 459 string wordBoundary(GetName g) 460 { 461 return "wordBoundary"; 462 } 463 464 unittest // word boundary 465 { 466 ParseTree input = ParseTree("", false, [], "This is a word."); 467 auto wb = [// "This" 468 cast(size_t)(0):true, 1:false, 2:false, 3:false, 4: true, 469 // "is" 470 5: true, 6:false, 7: true, 471 // "a" 472 8: true, 9:true, 473 // "word" 474 10:true, 11:false, 12:false, 13:false, 14:true, 475 // "." 476 15:false 477 ]; 478 479 foreach(size_t index; 0 .. input.input.length) 480 { 481 input.end = index; 482 ParseTree result = wordBoundary(input); 483 484 assert(result.name == "wordBoundary"); 485 assert(result.successful == wb[index]); // true, false, ... 486 // for errors, there is an error message 487 assert(result.successful && result.matches is null || !result.successful); 488 assert(result.begin == input.end); 489 assert(result.end == input.end); 490 assert(result.children is null); 491 } 492 } 493 494 /** 495 Represents a literal in a PEG, like "abc" or 'abc' (or even ''). 496 It succeeds if a prefix of the input is equal to its template parameter and fails otherwise. 497 */ 498 template literal(string s) 499 { 500 enum name = "literal!(\""~s~"\")"; 501 502 ParseTree literal(ParseTree p) 503 { 504 enum lit = "\"" ~ s ~ "\""; 505 if (p.end+s.length <= p.input.length && p.input[p.end..p.end+s.length] == s) 506 return ParseTree(name, true, [s], p.input, p.end, p.end+s.length); 507 else 508 return ParseTree(name, false, [lit], p.input, p.end, p.end); 509 } 510 511 ParseTree literal(string input) 512 { 513 return .literal!(s)(ParseTree("", false, [], input)); 514 } 515 516 517 string literal(GetName g) 518 { 519 return name; 520 } 521 522 } 523 524 unittest // 'literal' unit test 525 { 526 ParseTree input = ParseTree("input", true, [], "abcdef", 0,0, null); 527 528 alias literal!"a" a; 529 alias literal!"abc" abc; 530 alias literal!"" empty; 531 532 ParseTree result = a(input); 533 534 assert(result.name == `literal!("a")`, "Literal name test."); 535 assert(result.successful, "'a' succeeds on inputs beginning with 'a'."); 536 assert(result.matches == ["a"], "'a' matches the 'a' at the beginning."); 537 assert(result.input == input.input, "'a' does not change the input."); 538 assert(result.end == input.end+1, "'a' advances the index by one position."); 539 assert(result.children is null, "'a' has no children."); 540 541 result = a("abcdef"); 542 543 assert(result.successful, "'a' succeeds on inputs beginning with 'a'."); 544 assert(result.matches == ["a"], "'a' matches the 'a' at the beginning."); 545 assert(result.input == input.input, "'a' does not change the input."); 546 assert(result.end == input.end+1, "'a' advances the index by one position."); 547 assert(result.children is null, "'a' has no children."); 548 549 result = abc(input); 550 551 assert(result.name == `literal!("abc")`, "Literal name test."); 552 assert(result.successful, "'abc' succeeds on inputs beginning with 'abc'."); 553 assert(result.matches == ["abc"], "'abc' matches 'abc' at the beginning."); 554 assert(result.input == input.input, "'abc' does not change the input."); 555 assert(result.end == input.end+3, "'abc' advances the index by 3 positions."); 556 assert(result.children is null, "'abc' has no children."); 557 558 result = abc("abcdef"); 559 560 assert(result.successful, "'abc' succeeds on inputs beginning with 'abc'."); 561 assert(result.matches == ["abc"], "'abc' matches 'abc' at the beginning."); 562 assert(result.input == input.input, "'abc' does not change the input."); 563 assert(result.end == input.end+3, "'abc' advances the index by 3 positions."); 564 assert(result.children is null, "'abc' has no children."); 565 566 result = empty(input); 567 568 assert(result.name == `literal!("")`, "Literal name test."); 569 assert(result.successful, "'' succeeds on non-null inputs."); 570 assert(result.matches == [""], "'' matches '' at the beginning."); 571 assert(result.input == input.input, "'' does not change the input."); 572 assert(result.end == input.end+0, "'' does not advance the index."); 573 assert(result.children is null, "'' has no children."); 574 575 result = empty("abcdef"); 576 577 assert(result.successful, "'' succeeds on non-null inputs."); 578 assert(result.matches == [""], "'' matches '' at the beginning."); 579 assert(result.input == input.input, "'' does not change the input."); 580 assert(result.end == input.end+0, "'' does not advance the index."); 581 assert(result.children is null, "'' has no children."); 582 583 input.input = "bcdef"; 584 585 result = a(input); 586 587 assert(!result.successful, "'a' fails on inputs not beginning with 'a'."); 588 assert(result.matches == ["\"a\""], "'a' makes no match on 'bcdef'."); 589 assert(result.input == input.input, "'a' does not change the input."); 590 assert(result.end == input.end, "'a' does not advances the index on 'bcdef'."); 591 assert(result.children is null, "'a' has no children."); 592 593 result = abc(input); 594 595 assert(!result.successful, "'abc' fails on inputs not beginning with 'abc'."); 596 assert(result.matches == ["\"abc\""], "'abc' does no match on 'bcdef'."); 597 assert(result.input == input.input, "'abc' does not change the input."); 598 assert(result.end == input.end, "'abc' does not advance the index on 'bcdef'."); 599 assert(result.children is null, "'abc' has no children."); 600 601 result = empty(input); 602 603 assert(result.successful, "'' succeeds on non-null inputs."); 604 assert(result.matches == [""], "'' matches '' at the beginning."); 605 assert(result.input == input.input, "'' does not change the input."); 606 assert(result.end == input.end+0, "'' does not advance the index."); 607 assert(result.children is null, "'' has no children."); 608 609 input.input = ""; 610 611 result = a(input); 612 613 assert(!result.successful, "'a' fails on empty strings."); 614 assert(result.matches == ["\"a\""], "'a' does not match ''."); 615 assert(result.input == input.input, "'a' does not change the input."); 616 assert(result.end == input.end, "'a' does not advance the index on 'bcdef'."); 617 assert(result.children is null, "'a' has no children."); 618 619 result = abc(input); 620 621 assert(!result.successful, "'abc' fails on empty strings."); 622 assert(result.matches == ["\"abc\""], "'abc' does not match ''."); 623 assert(result.input == input.input, "'abc' does not change the input."); 624 assert(result.end == input.end, "'abc' does not advance the index on 'bcdef'."); 625 assert(result.children is null, "'abc' has no children."); 626 627 result = empty(input); 628 629 assert(result.successful, "'' succeeds on empty strings."); 630 assert(result.matches == [""], "'' matches '' at the beginning, even on empty strings."); 631 assert(result.input == input.input, "'' does not change the input."); 632 assert(result.end == input.end+0, "'' does not advance the index."); 633 assert(result.children is null, "'' has no children."); 634 } 635 636 /** 637 Represents a range of chars, from begin to end, included. So charRange!('a','z') matches 638 all English lowercase letters. If fails if the input is empty or does not begin with a character 639 between begin and end. 640 641 If begin == end, it will match one char (begin... or end). 642 643 begin > end is non-legal. 644 */ 645 template charRange(dchar begin, dchar end) if (begin <= end) 646 { 647 enum name = "charRange!('"~to!string(begin)~"','" ~ to!string(end) ~ "')"; 648 649 ParseTree charRange(ParseTree p) 650 { 651 enum longname = "a char between '"~to!string(begin)~"' and '"~to!string(end)~"'"; 652 if (p.end < p.input.length && p.input[p.end] >= begin && p.input[p.end] <= end) 653 return ParseTree(name, true, [p.input[p.end..p.end+1]], p.input, p.end, p.end+1); 654 else 655 return ParseTree(name, false, [longname], p.input, p.end, p.end); 656 } 657 658 ParseTree charRange(string input) 659 { 660 return .charRange!(begin,end)(ParseTree("",false,[],input)); 661 } 662 663 string charRange(GetName g) 664 { 665 return name; 666 } 667 } 668 669 unittest // 'charRange' unit test 670 { 671 ParseTree input = ParseTree("input", true, [], "abcdef", 0,0, null); 672 673 alias charRange!('a','a') aa; 674 alias charRange!('a','b') ab; 675 alias charRange!('a','z') az; 676 alias charRange!(dchar.min,dchar.max) allChars; 677 alias charRange!('\U00000000','\U000000FF') ASCII; 678 679 static assert(!__traits(compiles, {alias charRange!('z','a') za;})); 680 681 ParseTree result = aa(input); 682 683 assert(result.name == "charRange!('a','a')", "charRange name test."); 684 assert(result.successful, "'a-a' succeeds on inputs beginning with 'a'."); 685 assert(result.matches == ["a"], "'a-a' matches the 'a' at the beginning."); 686 assert(result.input == input.input, "'a-a' does not change the input."); 687 assert(result.end == input.end+1, "'a-a' advances the index by one position."); 688 assert(result.children is null, "'a-a' has no children."); 689 690 result = ab("abcdef"); 691 692 assert(result.name == "charRange!('a','b')", "charRange name test."); 693 assert(result.successful, "'a-b' succeeds on inputs beginning with 'a'."); 694 assert(result.matches == ["a"], "'a-b' matches the 'a' at the beginning."); 695 assert(result.input == input.input, "'a-b' does not change the input."); 696 assert(result.end == input.end+1, "'a-b' advances the index by one position."); 697 assert(result.children is null, "'a-b' has no children."); 698 699 result = az(input); 700 701 assert(result.name == "charRange!('a','z')", "charRange name test."); 702 assert(result.successful, "'a-z' succeeds on inputs beginning with 'abc'."); 703 assert(result.matches == ["a"], "'a-z' matches 'a' at the beginning."); 704 assert(result.input == input.input, "'a-z' does not change the input."); 705 assert(result.end == input.end+1, "'a-z' advances the index by one position."); 706 assert(result.children is null, "'a-z' has no children."); 707 708 input.input = "bcdef"; 709 710 result = aa(input); 711 712 assert(!result.successful, "'a-a' fails on inputs not beginning with 'a'."); 713 assert(result.matches == ["a char between 'a' and 'a'"], "'a-a' makes no match on 'bcdef'."); 714 assert(result.input == input.input, "'a-a' does not change the input."); 715 assert(result.end == input.end, "'a-a' does not advances the index on 'bcdef'."); 716 assert(result.children is null, "'a-a' has no children."); 717 718 result = ab(input); 719 720 assert(result.successful, "'a-b' succeeds on inputs beginning with 'b'."); 721 assert(result.matches == ["b"], "'a-b' matches on 'bcdef'."); 722 assert(result.input == input.input, "'a-b' does not change the input."); 723 assert(result.end == input.end+1, "'a-b' advances the index by one position'."); 724 assert(result.children is null, "'a-b' has no children."); 725 726 result = az(input); 727 728 assert(result.successful, "'a-z' succeeds on 'bcdef'."); 729 assert(result.matches == ["b"], "'a-z' matches 'b' at the beginning of 'bcdef'."); 730 assert(result.input == input.input, "'a-z' does not change the input."); 731 assert(result.end == input.end+1, "'a-z' advances the index by one position."); 732 assert(result.children is null, "'a-z' has no children."); 733 734 input.input = ""; 735 736 result = aa(input); 737 738 assert(!result.successful, "'a-a' fails on empty strings."); 739 assert(result.matches == ["a char between 'a' and 'a'"], "'a-a' does not match ''."); 740 assert(result.input == input.input, "'a-a' does not change the input."); 741 assert(result.end == input.end, "'a-a' does not advance the index on ''."); 742 assert(result.children is null, "'a-a' has no children."); 743 744 result = ab(input); 745 746 assert(!result.successful, "'a-b' fails on empty strings."); 747 assert(result.matches == ["a char between 'a' and 'b'"], "'a-b' does not match ''."); 748 assert(result.input == input.input, "'a-b' does not change the input."); 749 assert(result.end == input.end, "'a-b' does not advance the index on ''."); 750 assert(result.children is null, "'a-b' has no children."); 751 752 result = az(input); 753 754 assert(!result.successful, "'a-z' fails on empty strings."); 755 assert(result.matches == ["a char between 'a' and 'z'"], "'a-z' does not match ''."); 756 assert(result.input == input.input, "'a-z' does not change the input."); 757 assert(result.end == input.end, "'a-z' does not advance the index on ''."); 758 assert(result.children is null, "'a-z' has no children."); 759 760 input.input = "123"; 761 762 result = aa(input); 763 764 assert(!result.successful, "'a-a' fails on '123'."); 765 assert(result.matches == ["a char between 'a' and 'a'"], "'a-a' does not match '123'."); 766 assert(result.input == input.input, "'a-a' does not change the input."); 767 assert(result.end == input.end, "'a-a' does not advance the index on '123'."); 768 assert(result.children is null, "'a-a' has no children."); 769 770 result = ab(input); 771 772 assert(!result.successful, "'a-b' fails on '123'."); 773 assert(result.matches == ["a char between 'a' and 'b'"], "'a-b' does not match '123'."); 774 assert(result.input == input.input, "'a-b' does not change the input."); 775 assert(result.end == input.end, "'a-b' does not advance the index on '123'."); 776 assert(result.children is null, "'a-b' has no children."); 777 778 result = az(input); 779 780 assert(!result.successful, "'a-z' fails on '123'."); 781 assert(result.matches == ["a char between 'a' and 'z'"], "'a-z' does not match '123'."); 782 assert(result.input == input.input, "'a-z' does not change the input."); 783 assert(result.end == input.end, "'a-z' does not advance the index on '123'."); 784 assert(result.children is null, "'a-z' has no children."); 785 786 787 foreach(dchar ch; 0..256*(128+64+16+8)) 788 assert(allChars(to!string(ch)).successful); 789 790 assert(!allChars("").successful); 791 } 792 793 /** 794 eps matches the empty string (usually denoted by the Greek letter 'epsilon') and always succeeds. 795 It's equivalent to literal!"" (for example, it creates a match of [""]: one match, the empty string). 796 */ 797 ParseTree eps(ParseTree p) 798 { 799 return ParseTree("eps", true, [""], p.input, p.end, p.end); 800 } 801 802 ParseTree eps(string input) 803 { 804 return eps(ParseTree("",false,[], input)); 805 } 806 807 string eps(GetName g) 808 { 809 return "eps"; 810 } 811 812 unittest // 'eps' unit test 813 { 814 ParseTree input = ParseTree("input", true, [], "abcdef", 0,0, null); 815 816 ParseTree result = eps(input); 817 818 assert(result.name == "eps"); 819 assert(result.successful, "'eps' succeeds on non-null inputs."); 820 assert(result.matches == [""], "'eps' matches '' at the beginning."); 821 assert(result.input == input.input, "'eps' does not change the input."); 822 assert(result.end == input.end+0, "'eps' does not advance the index."); 823 assert(result.children is null, "'eps' has no children."); 824 825 input.input = ""; 826 827 result = eps(input); 828 assert(result.name == "eps"); 829 assert(result.successful, "'eps' succeeds on empty strings."); 830 assert(result.matches == [""], "'eps' matches '' at the beginning, even on empty strings."); 831 assert(result.input == input.input, "'eps' does not change the input."); 832 assert(result.end == input.end+0, "'eps' does not advance the index."); 833 assert(result.children is null, "'eps' has no children."); 834 } 835 836 837 /** 838 Basic operator: it matches if all its subrules (stored in the rules template parameter tuple) match 839 the input successively. Its subrules parse trees are stored as its children and its matches field 840 will contain all its subrules matches, in order. 841 842 ---- 843 alias and!(literal!"abc", charRange!('a','z')) rule; // abc followed by any letter between a and z. 844 ParseTree input = ParseTree("",false,[],"abcd"); // low-level plumbing, 845 // the rules described here act on ParseTree's not strings. 846 // It's equivalent to "abcd" as input 847 auto result = rule(input); 848 849 assert(result.successful); // OK, 'abc' followed by 'd' 850 assert(result.matches == ["abc", "d"]); // stores the matches 851 assert(result.children.length == 2); // two children, the result of "abc" on "abcd" and the result of [a-z] on "d" 852 853 input.input = "abc"; // changing the input string; 854 assert(!rule(input)).successful); // NOK, abc alone 855 input.input = "ab"; 856 assert(!rule(input)).successful); // NOK, does not begin by abc 857 ---- 858 859 If it fails, the last children will contain the failed node. That way, when printing, as sort of diagnostic is given: 860 861 ---- 862 alias and!(literal!"abc", charRange!('a','z')) rule; // 'abc[a-z]', aka 'abc' followed by any letter between 'a' and 'z'. 863 ParseTree input = ParseTree("",false,[],"abc1"); // equivalent to "abc1" 864 865 auto failure = rule(input); 866 writeln(failure); 867 /+ 868 writes: 869 and (failure) 870 +-literal(abc) [0, 3]["abc"] 871 +-charRange(a,z) failure at line 0, col 3, after "abc" a char between 'a' and 'z', but got "1" 872 +/ 873 ---- 874 875 So we know the global 'and' failed, that the first sub-rule ('abc') succeeded on input[0..3] with "abc" 876 and that the second subrule ('[a-z]') failed at position 3 (so, on '1'). 877 */ 878 template and(rules...) if (rules.length > 0) 879 { 880 881 string ctfeGetNameAnd() 882 { 883 string name = "and!("; 884 foreach(i,rule; rules) 885 name ~= __traits(identifier, rule) // because using getName!(rule) causes an infinite loop during compilation 886 // for recursive rules 887 ~ (i < rules.length -1 ? ", " : ""); 888 name ~= ")"; 889 return name; 890 } 891 892 enum name = ctfeGetNameAnd(); 893 894 ParseTree and(ParseTree p) 895 { 896 bool keepNode(ParseTree node) 897 { 898 return node.name.startsWith("keep!(") 899 || ( !node.name.startsWith("discard!(") 900 //&& !node.name.startsWith("drop!(") 901 && node.matches !is null 902 //&& node.begin != node.end 903 ); 904 } 905 906 ParseTree result = ParseTree(name, false, [], p.input, p.end, p.end, []); 907 908 foreach(i,r; rules) 909 { 910 ParseTree temp = r(result); 911 result.end = temp.end; 912 if (temp.successful) 913 { 914 if (keepNode(temp)) 915 { 916 result.matches ~= temp.matches; 917 if (temp.name.startsWith("drop!(")) 918 {} 919 else if (temp.name.startsWith("propagate!(")) 920 result.children ~= temp.children; 921 else 922 result.children ~= temp; 923 } 924 } 925 else 926 { 927 result.children ~= temp;// add the failed node, to indicate which failed 928 if (temp.matches.length > 0) 929 result.matches ~= temp.matches[$-1]; 930 return result; // and end the parsing attempt right there 931 } 932 } 933 result.successful = true; 934 return result; 935 } 936 937 ParseTree and(string input) 938 { 939 return .and!(rules)(ParseTree("",false,[],input)); 940 } 941 942 string and(GetName g) 943 { 944 return name; 945 } 946 } 947 948 unittest // 'and' unit test 949 { 950 alias literal!"abc" abc; 951 alias literal!"de" de; 952 alias literal!"f" f; 953 954 alias and!(abc) abcAnd; 955 alias and!(abc,de) abcde; 956 alias and!(abc,de,f) abcdef; 957 alias and!(eps, abc, eps, de, eps, f, eps) withEps; 958 959 //assert(getName!(abcAnd)() == `and!(literal!("abc"))`); 960 //assert(getName!(abcde)() == `and!(literal!("abc"), literal!("de"))`); 961 //assert(getName!(abcdef)() == `and!(literal!("abc"), literal!("de"), literal!("f"))`); 962 963 ParseTree input = ParseTree("",false,[], "abcdefghi"); 964 965 ParseTree result = abcAnd(input); 966 967 assert(result.successful, "and!('abc') parses 'abcdefghi'"); 968 assert(result.matches == ["abc"], "and!('abc') matches 'abc' at the beginning of 'abcdefghi'"); 969 assert(result.end == input.end+3, "and!('abc') advances the index by 'abc' size (3)."); 970 assert(result.children == [abc(input)], "and!('abc') has one child: the one created by 'abc'."); 971 972 result = abcde(input); 973 974 assert(result.successful, "and!('abc','de') parses 'abcdefghi'"); 975 assert(result.matches == ["abc","de"], 976 "and!('abc','de') matches 'abc' and 'de' at the beginning of 'abcdefghi'"); 977 assert(result.end == input.end+5, "and!('abc','de') advances the index by 3+2 positions."); 978 assert(result.children == [abc(input), de(abc(input))], 979 "and!('abc','de') has two children, created by 'abc' and 'de'."); 980 981 result = abcdef(input); 982 983 assert(result.successful, "and!('abc','de','f') parses 'abcdefghi'"); 984 assert(result.matches == ["abc","de","f"], 985 "and!('abc','de','f') matches 'abcdef' at the beginning of 'abcdefghi'"); 986 assert(result.end == input.end+6, "and!('abc','de','f') advances the index by 3+2+1 positions."); 987 assert(result.children == [abc(input), de(abc(input)), f(de(abc(input)))] 988 , "and!('abc','de') has two children, created by 'abc' and 'de'."); 989 990 result = withEps(input); 991 992 assert(result.successful, "and!('','abc','','de','','f','') parses 'abcdefghi'"); 993 assert(result.matches == ["","abc","","de","","f",""], 994 "and!('','abc','','de','','f','') matches 'abcdef' at the beginning of 'abcdefghi'"); 995 assert(result.end == input.end+6, 996 "and!('','abc','','de','','f','') advances the index by 0+3+0+2+0+1+0 positions."); 997 998 input.input = "bcdefghi"; 999 1000 result = abcdef(input); 1001 1002 assert(!result.successful, "'abc' 'de' 'f' fails on 'bcdefghi'"); 1003 //assert(result.matches is null, "'abc' 'de' 'f' has no match on 'bcdefghi'"); 1004 assert(result.end == input.end); 1005 assert(result.children == [abc(input)], "'abc' 'de' 'f' has one child (a failure) on 'bcdefghi'"); 1006 1007 input.input = "abc_efghi"; 1008 1009 result = abcdef(input); 1010 1011 assert(!result.successful, "'abc' 'de' 'f' fails on 'abc_efghi'"); 1012 //assert(result.matches == ["abc"], "Only the first match, from 'abc'."); 1013 assert(result.end == input.end+3, "Advances by 3 positions, due to 'abc'"); 1014 assert(result.children == [abc(input), de(abc(input))] 1015 , "'abc' 'de' 'f' has two child on 'abc_efghi', the one from 'abc' (success) and the one from 'de' (failure)."); 1016 } 1017 1018 template wrapAround(alias before, alias target, alias after) 1019 { 1020 ParseTree wrapAround(ParseTree p) 1021 { 1022 ParseTree temp = before(p); 1023 if (!temp.successful) 1024 return temp; 1025 1026 ParseTree result = target(temp); 1027 if (!result.successful) 1028 return result; 1029 result.begin = temp.begin; 1030 1031 temp = after(result); 1032 if (!temp.successful) 1033 return temp; 1034 1035 result.end = temp.end; 1036 return result; 1037 } 1038 1039 ParseTree wrapAround(string input) 1040 { 1041 return .wrapAround!(before, target, after)(ParseTree("",false,[],input)); 1042 } 1043 1044 string wrapAround(GetName g) 1045 { 1046 return "wrapAround!(" ~ getName!(before)() ~ 1047 ", " ~ getName!(target)() ~ 1048 ", " ~ getName!(after)() ~ ")"; 1049 } 1050 } 1051 1052 /** 1053 Basic operator: it matches if one of its subrules (stored in the rules template parameter tuple) match 1054 the input. The subrules are tested in order, from rules[0] to rules[$-1]. 1055 1056 The matching subrule parse trees is stored as its only child and its matches field 1057 will contain all the subrule matches, in order. 1058 1059 ---- 1060 alias or!(literal!"abc", charRange!('a','z')) rule; // abc or, failing that, any letter between a and z. 1061 ParseTree input = ParseTree("",false,[],"defg"); // low-level plumbing, the rules described here act on ParseTree's not strings. 1062 // It's equivalent to "defg" as input 1063 auto result = rule(input); 1064 1065 assert(result.successful); // OK 1066 assert(result.matches == ["d"]); // stores the (in this case) only match 1067 assert(result.children.length == 1); // one child, the result of "abc" or [a-z], depending on which rule succeeded. 1068 1069 input.input = "abc"; // changing the input string; 1070 assert(rule(input)).successful); // Still OK 1071 input.input = "1abc"; 1072 assert(!rule(input)).successful); // NOK, does not begin by abc nor by [a-z] 1073 ---- 1074 1075 If it fails, the last children will contain the failed node that matched furthest (longes match). 1076 That way, when printing, as sort of diagnostic is given: 1077 1078 ---- 1079 alias or!(literal!"abc", and!(literal!"ab", charRange!('0','9'))) rule; // 'abc' or 'ab[0-9]' 1080 ParseTree input = ParseTree("",false,[],"abd"); // equivalent to "abd" 1081 1082 auto failure = rule(input); 1083 writeln(failure); 1084 /+ 1085 or (failure) 1086 +-and (failure) 1087 +-literal(ab) [0, 2]["ab"] 1088 +-charRange(0,9) failure at line 0, col 2, after "ab" expected a char between '0' and '9', but got "d" 1089 +/ 1090 ---- 1091 1092 So we know 'or' failed, that the 'and' sub-rule had the longest match, matching 'ab' and failing for [0-9] on index 2. 1093 */ 1094 template or(rules...) if (rules.length > 0) 1095 { 1096 string ctfeGetNameOr() 1097 { 1098 string name = "or!("; 1099 foreach(i,rule; rules) 1100 name ~= getName!(rule) 1101 ~ (i < rules.length -1 ? ", " : ""); 1102 name ~= ")"; 1103 return name; 1104 } 1105 1106 enum name = ctfeGetNameOr(); 1107 1108 ParseTree or(ParseTree p) 1109 { 1110 // error-management 1111 ParseTree longestFail = ParseTree(name, false, [], p.input, p.end, 0); 1112 string[] errorStrings; 1113 size_t errorStringChars; 1114 string orErrorString; 1115 1116 ParseTree[rules.length] results; 1117 string[rules.length] names; 1118 size_t[rules.length] failedLength; 1119 size_t maxFailedLength; 1120 1121 // Real 'or' loop 1122 foreach(i,r; rules) 1123 { 1124 ParseTree temp = r(p); 1125 if (temp.successful) 1126 { 1127 temp.children = [temp]; 1128 temp.name = name; 1129 return temp; 1130 } 1131 else 1132 { 1133 enum errName = " (" ~ getName!(r)() ~")"; 1134 failedLength[i] = temp.end; 1135 if (temp.end >= longestFail.end) 1136 { 1137 maxFailedLength = temp.end; 1138 longestFail = temp; 1139 names[i] = errName; 1140 results[i] = temp; 1141 1142 if (temp.end == longestFail.end) 1143 errorStringChars += temp.matches[$-1].length + errName.length + 4; 1144 else 1145 errorStringChars = temp.matches[$-1].length + errName.length + 4; 1146 } 1147 // Else, this error parsed less input than another one: we discard it. 1148 } 1149 } 1150 1151 1152 // All subrules failed, we will take the longest match as the result 1153 // If more than one node failed at the same (farthest) position, we concatenate their error messages 1154 1155 1156 char[] errString;// = new char[](errorStringChars); 1157 errString.length = errorStringChars; 1158 uint start = 0; 1159 foreach(i; 0..rules.length) 1160 { 1161 if (failedLength[i] == maxFailedLength) 1162 { 1163 auto temp = results[i]; 1164 auto len = temp.matches[$-1].length; 1165 auto nlen = names[i].length; 1166 errString[start .. start+len] = temp.matches[$-1][]; 1167 errString[start+len .. start+len+names[i].length] = names[i][]; 1168 errString[start+len+nlen .. start+len+nlen+4] = " or "; 1169 start += len + names[i].length + 4; 1170 } 1171 } 1172 orErrorString = cast(string)(errString[0..$-4]); 1173 1174 longestFail.matches = longestFail.matches[0..$-1] // discarding longestFail error message 1175 ~ [orErrorString]; // and replacing it by the new, concatenated one. 1176 longestFail.name = name; 1177 longestFail.begin = p.end; 1178 return longestFail; 1179 } 1180 1181 ParseTree or(string input) 1182 { 1183 return .or!(rules)(ParseTree("",false,[],input)); 1184 } 1185 1186 string or(GetName g) 1187 { 1188 return name; 1189 } 1190 } 1191 1192 unittest // 'or' unit test 1193 { 1194 alias charRange!('a','b') ab; 1195 alias charRange!('c','d') cd; 1196 1197 alias or!(ab) abOr; 1198 alias or!(ab,cd) abOrcd; 1199 1200 assert(getName!(ab)() == "charRange!('a','b')"); 1201 assert(getName!(cd)() == "charRange!('c','d')"); 1202 1203 ParseTree input = ParseTree("",false,[], "abcdefghi"); 1204 1205 ParseTree result = abOr(input); 1206 1207 assert(result.name == "or!(charRange!('a','b'))", "or name test."); 1208 assert(result.successful, "or!([a-b]) parses 'abcdefghi'"); 1209 assert(result.matches == ["a"], "or!([a-b]) matches 'a' at the beginning of 'abcdefghi'"); 1210 assert(result.end == input.end+1, "or!([a-b]) advances the index by 'a' size (1)."); 1211 assert(result.children == [ab(input)], "or!([a-b]) has one child: the one created by '[a-b]'."); 1212 1213 result = abOrcd(input); 1214 1215 assert(result.name == "or!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 1216 assert(result.successful, "or!([a-b],[c-d]) parses 'abcdefghi'"); 1217 assert(result.matches == ["a"], "or!([a-b],[c-d]) matches 'a' at the beginning of 'abcdefghi'"); 1218 assert(result.end == input.end+1, "or!([a-b],[c-d]) advances the index by 1 position."); 1219 1220 assert(result.children == [ab(input)], "or!([a-b],[c-d]) has one child, created by [a-b]."); 1221 1222 input.input = "cdefghi"; 1223 1224 result = abOrcd(input); 1225 1226 assert(result.name == "or!(charRange!('a','b'), charRange!('c','d'))", "or name test."); 1227 assert(result.successful, "or!([a-b],[c-d]) parses 'cdefghi'"); 1228 assert(result.matches == ["c"], "or!([a-b],[c-d]) matches 'c' at the beginning of 'cdefghi'"); 1229 assert(result.end == input.end+1, "or!([a-b],[c-d]) advances the index by 1 position."); 1230 1231 assert(result.children == [cd(input)], "or!([a-b],[c-d]) has one child, created by [c-d]."); 1232 1233 input.input = "_abcdefghi"; 1234 1235 result = abOrcd(input); 1236 1237 assert(!result.successful, "or!([a-b],[c-d]) fails on '_abcdefghi'"); 1238 assert(result.end == input.end+0, "or!([a-b],[c-d]) does not advance the index."); 1239 assert(result.matches == 1240 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 1241 , "or!([a-b],[c-d]) error message. |" ~ result.matches[0]~ "|"); 1242 1243 input.input = ""; 1244 1245 result = abOrcd(input); 1246 1247 assert(!result.successful, "or!([a-b],[c-d]) fails on and empty input"); 1248 assert(result.end == input.end+0, "or!([a-b],[c-d]) does not advance the index."); 1249 assert(result.matches == 1250 [ "a char between 'a' and 'b' (charRange!('a','b')) or a char between 'c' and 'd' (charRange!('c','d'))"] 1251 , "or!([a-b],[c-d]) error message."); 1252 } 1253 1254 1255 /** 1256 Compile-time switch trie from Brian Schott 1257 */ 1258 class Trie(V) : TrieNode!(V) 1259 { 1260 /** 1261 * Adds the given value to the trie with the given key 1262 */ 1263 void add(string key, V value) pure 1264 { 1265 TrieNode!(V) current = this; 1266 foreach(dchar keyPart; key) 1267 { 1268 if ((keyPart in current.children) is null) 1269 { 1270 auto node = new TrieNode!(V); 1271 current.children[keyPart] = node; 1272 current = node; 1273 } 1274 else 1275 current = current.children[keyPart]; 1276 } 1277 current.value = value; 1278 } 1279 } 1280 1281 class TrieNode(V) 1282 { 1283 V value; 1284 TrieNode!(V)[dchar] children; 1285 } 1286 1287 string printCaseStatements(V)(TrieNode!(V) node, string indentString) 1288 { 1289 string s = ""; 1290 string idnt = indentString; 1291 1292 void incIndent() { idnt ~= " "; } 1293 void decIndent() { idnt = idnt[2..$]; } 1294 1295 void put(string k) { s ~= idnt ~ k; } 1296 void append(string k) { s ~= k;} 1297 1298 foreach(dchar k, TrieNode!(V) v; node.children) 1299 { 1300 put("case '"); 1301 switch(k) 1302 { 1303 case '\n': append("\\n"); break; 1304 case '\t': append("\\t"); break; 1305 case '\r': append("\\r"); break; 1306 case 92: append("\\"); break; 1307 default: append(to!string(k)); 1308 } 1309 append("':\n"); 1310 incIndent(); 1311 1312 put("temp.end++;\n"); 1313 if (v.children.length > 0) 1314 { 1315 put("if (temp.end >= temp.input.length)\n"); 1316 put("{\n"); 1317 incIndent(); 1318 1319 if (node.children[k].value.length != 0) 1320 put("return ParseTree(name, true, [`" ~ node.children[k].value ~ "`], temp.input, p.end, temp.end)"); 1321 else 1322 put("return ParseTree(name, false, [failString], p.input, p.end, p.end)"); 1323 1324 append(";\n"); 1325 decIndent(); 1326 1327 put("}\n"); 1328 put("switch (temp.input[temp.end])\n"); 1329 put("{\n"); 1330 1331 incIndent(); 1332 append(printCaseStatements(v, idnt)); 1333 1334 put("default:\n"); 1335 incIndent(); 1336 if (v.value.length != 0) 1337 put("return ParseTree(name, true, [`" ~ v.value ~ "`], temp.input, p.end, temp.end)"); 1338 else 1339 put("return ParseTree(name, false, [failString], p.input, p.end, p.end)"); 1340 append(";\n"); 1341 decIndent(); 1342 decIndent(); 1343 put("}\n"); 1344 } 1345 else 1346 { 1347 if (v.value.length != 0) 1348 put("return ParseTree(name, true, [`" ~ v.value ~ "`], temp.input, p.end, temp.end)"); 1349 else 1350 put("return ParseTree(name, false, [failString], p.input, p.end, p.end)"); 1351 append(";\n"); 1352 } 1353 } 1354 return s; 1355 } 1356 1357 string generateCaseTrie(string[] args ...) 1358 { 1359 auto t = new Trie!(string); 1360 foreach(arg; args) 1361 { 1362 t.add(arg, arg); 1363 } 1364 return printCaseStatements(t, ""); 1365 } 1366 1367 /** 1368 or special case for literal list ("abstract"/"alias"/...) 1369 */ 1370 template keywords(kws...) if (kws.length > 0) 1371 { 1372 string ctfeGetNameKeywords() 1373 { 1374 string name= "keywords!("; 1375 foreach(i,kw;kws) 1376 name ~= "\"" ~ kw ~ "\""~ (i < kws.length -1 ? ", " : ""); 1377 name ~= ")"; 1378 return name; 1379 } 1380 1381 string ctfeConcatKeywords() 1382 { 1383 string s = "["; 1384 foreach(i, k; kws) 1385 { 1386 s ~= "\"" ~ k ~ "\""; 1387 if (i < kws.length - 1) 1388 s ~= ", "; 1389 } 1390 return s ~= "]"; 1391 } 1392 1393 enum name = ctfeGetNameKeywords(); 1394 enum failString = "one among " ~ ctfeConcatKeywords(); 1395 1396 ParseTree keywords(ParseTree p) 1397 { 1398 string keywordCode(string[] keywords) 1399 { 1400 string result; 1401 foreach(kw; keywords) 1402 result ~= "if (p.end+"~to!string(kw.length) ~ " <= p.input.length " 1403 ~" && p.input[p.end..p.end+"~to!string(kw.length)~"]==`" 1404 ~kw~"`) return ParseTree(`" 1405 ~name~"`,true,[`"~kw~"`],p.input,p.end,p.end+" 1406 ~to!string(kw.length)~");\n"; 1407 1408 result ~= "return ParseTree(`"~name~"`,false,[`" ~ failString ~ "`],p.input,p.end,p.end);"; 1409 1410 return result; 1411 } 1412 1413 static if (KEYWORDS == IFCHAIN) 1414 { 1415 mixin(keywordCode([kws])); 1416 } 1417 else static if (KEYWORDS == TRIE) 1418 { 1419 auto temp = p; 1420 if (p.end < p.input.length) // is this conditional required? 1421 { 1422 switch(p.input[p.end]) 1423 { 1424 mixin(generateCaseTrie([kws])); 1425 mixin("default: return ParseTree(`"~name~"`,false,[`" ~ failString ~ "`],p.input,p.end,p.end);"); 1426 } 1427 } 1428 else 1429 { 1430 mixin("return ParseTree(`"~name~"`,false,[`" ~ failString ~ "`],p.input,p.end,p.end);"); 1431 } 1432 } 1433 } 1434 1435 ParseTree keywords(string input) 1436 { 1437 return .keywords!(kws)(ParseTree("",false,[],input)); 1438 } 1439 1440 string keywords(GetName g) 1441 { 1442 return name; 1443 } 1444 } 1445 1446 unittest 1447 { 1448 alias keywords!("abc","de","f") kw; 1449 1450 assert(getName!(kw)() == `keywords!("abc", "de", "f")`); 1451 1452 ParseTree input = ParseTree("",false,[],"abcd"); 1453 1454 ParseTree result = kw(input); 1455 1456 assert(result.name == `keywords!("abc", "de", "f")`, "keywords name test."); 1457 assert(result.successful, "keywords success on `abcd`"); 1458 assert(result.matches == ["abc"], "keywords matches `abc` on `abcd`"); 1459 assert(result.end == input.end+3, "keywords advances the index by 3 positions."); 1460 assert(result.children is null, "No children for `keywords`."); 1461 1462 input.input = "def"; 1463 1464 result = kw(input); 1465 1466 assert(result.successful, "keywords success on `def`"); 1467 assert(result.matches == ["de"], "keywords matches `de` on `def`"); 1468 assert(result.end == input.end+2, "keywords advances the index by 2 positions."); 1469 assert(result.children is null, "No children for `keywords`."); 1470 1471 1472 input.input = "ab_def"; 1473 1474 result = kw(input); 1475 1476 assert(!result.successful, "keywords fails on `ab_def`."); 1477 assert(result.matches == [`one among ["abc", "de", "f"]`], "keywords error message." ~ result.matches[0]); 1478 assert(result.end == input.end, "keywords does not advance the index."); 1479 assert(result.children is null, "No children for `keywords`."); 1480 1481 input.input = ""; 1482 1483 result = kw(input); 1484 1485 assert(!result.successful, "keywords fails on an empty input."); 1486 assert(result.matches == [`one among ["abc", "de", "f"]`], "keywords error message."); 1487 assert(result.end == input.end, "keywords does not advance the index."); 1488 assert(result.children is null, "No children for `keywords`."); 1489 } 1490 1491 1492 /** 1493 Tries to match subrule 'r' zero or more times. It always succeeds, since if 'r' fails 1494 from the very beginning, it matched 'r' zero times... 1495 1496 Its matches are those of its subrules (they might be different for each match) and its 1497 children are all the parse trees returned by the successive application of 'r'. 1498 1499 ---- 1500 alias zeroOrMore!(or!(literal!"abc", literal!"d")) rule; // in PEG-speak: '("abc" / "d")*' 1501 ParseTree input = ParseTree("",false,[], "abcdabce"); 1502 1503 ParseTree result = rule(input); 1504 1505 assert(result.successful); 1506 assert(result.matches == ["abc", "d", "abc"]); 1507 assert(result.end == 7); // matched "abcdabce"[0..7] => "abcdabc". "e" at the end is not part of the parse tree. 1508 assert(result.children.length == 3); 1509 writeln(result); 1510 /+ 1511 writes: 1512 zeroOrMore [0, 7]["abc", "d", "abc"] 1513 +-or [0, 3]["abc"] 1514 | +-literal(abc) [0, 3]["abc"] 1515 +-or [3, 4]["d"] 1516 | +-literal(d) [3, 4]["d"] 1517 +-or [4, 7]["abc"] 1518 +-literal(abc) [4, 7]["abc"] 1519 +/ 1520 ---- 1521 1522 So we know the first child used the 'literal!"abc"' sub-rule and matched input[0..3]. 1523 The second matched input[3..4] and the third input[4..7]. 1524 1525 ---- 1526 input = ParseTree("",false,[], "efgh"); 1527 result = rule(input); 1528 assert(result.successful); // succeed, even though all patterns failed. 1529 assert(result.children.length == 0); 1530 ---- 1531 */ 1532 template zeroOrMore(alias r) 1533 { 1534 enum name = "zeroOrMore!(" ~ getName!(r) ~ ")"; 1535 1536 ParseTree zeroOrMore(ParseTree p) 1537 { 1538 auto result = ParseTree(name, true, [], p.input, p.end, p.end); 1539 auto temp = r(result); 1540 while(temp.successful 1541 && (temp.begin < temp.end // To avoid infinite loops on epsilon-matching rules 1542 || temp.name.startsWith("discard!("))) 1543 { 1544 result.matches ~= temp.matches; 1545 result.children ~= temp; 1546 result.end = temp.end; 1547 temp = r(result); 1548 } 1549 result.successful = true; 1550 return result; 1551 } 1552 1553 ParseTree zeroOrMore(string input) 1554 { 1555 return .zeroOrMore!(r)(ParseTree("",false,[],input)); 1556 } 1557 1558 string zeroOrMore(GetName g) 1559 { 1560 return name; 1561 } 1562 } 1563 1564 unittest // 'zeroOrMore' unit test 1565 { 1566 alias literal!"a" a; 1567 alias literal!"abc" abc; 1568 alias charRange!('a','z') az; 1569 1570 alias zeroOrMore!(a) as; 1571 alias zeroOrMore!(abc) abcs; 1572 alias zeroOrMore!(az) azs; 1573 1574 assert(getName!(as)() == `zeroOrMore!(literal!("a"))`); 1575 assert(getName!(abcs)() == `zeroOrMore!(literal!("abc"))`); 1576 assert(getName!(azs)() == `zeroOrMore!(charRange!('a','z'))`); 1577 1578 assert(as("").successful); 1579 assert(as("a").successful); 1580 assert(as("aa").successful); 1581 assert(as("aaa").successful); 1582 assert(as("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").successful); 1583 assert(as("b").successful); 1584 1585 ParseTree result = as("aaa"); 1586 1587 assert(result.name == `zeroOrMore!(literal!("a"))`); 1588 assert(result.successful); 1589 assert(result.matches == ["a","a","a"]); 1590 assert(result.begin == 0); 1591 assert(result.end == 3); 1592 assert(result.children.length == 3); 1593 assert(result.children == [ a("aaa"), a(a("aaa")), a(a(a("aaa")))]); 1594 1595 assert(abcs("").successful); 1596 assert(abcs("abc").successful); 1597 assert(abcs("abcabc").successful); 1598 assert(abcs("abcabcabc").successful); 1599 assert(abcs("abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc").successful); 1600 assert(abcs("ab").successful); 1601 1602 result = abcs("abcabcabc"); 1603 1604 assert(result.name == `zeroOrMore!(literal!("abc"))`); 1605 assert(result.successful); 1606 assert(result.matches == ["abc","abc","abc"]); 1607 assert(result.begin == 0); 1608 assert(result.end == 3*3); 1609 assert(result.children.length == 3); 1610 assert(result.children == [ abc("abcabcabc"), abc(abc("abcabcabc")), abc(abc(abc("abcabcabc")))]); 1611 1612 assert(azs("").successful); 1613 assert(azs("a").successful); 1614 assert(azs("abc").successful); 1615 assert(azs("abcdefghijklmnoqrstuvwxyz").successful); 1616 assert(azs("abcdefghijklmnoqrstuvwxyz 1234567890").successful); 1617 1618 result = azs("abc"); 1619 1620 assert(result.name == `zeroOrMore!(charRange!('a','z'))`); 1621 assert(result.successful); 1622 assert(result.matches == ["a","b","c"]); 1623 assert(result.begin == 0); 1624 assert(result.end == 3); 1625 assert(result.children.length == 3); 1626 assert(result.children == [ az("abc"), az(az("abc")), az(az(az("abc")))]); 1627 } 1628 1629 /** 1630 Tries to match subrule 'r' one or more times. If 'r' fails 1631 from the very beginning, it fails and else succeeds. 1632 1633 Its matches are those of its subrules (they might be different for each match) and its 1634 children are all the parse trees returned by the successive application of 'r'. 1635 1636 ---- 1637 alias oneOrMore!(or!(literal!"abc", literal!"d")) rule; // in PEG-speak: '("abc" / "d")*' 1638 ParseTree input = ParseTree("",false,[], "abcdabce"); 1639 1640 ParseTree result = rule(input); 1641 1642 assert(result.successful); 1643 assert(result.matches == ["abc", "d", "abc"]); 1644 assert(result.end == 7); // matched "abcdabce"[0..7] => "abcdabc". "e" at the end is not part of the parse tree. 1645 assert(result.children.length == 3); 1646 writeln(result); 1647 /+ 1648 writes: 1649 oneOrMore [0, 7]["abc", "d", "abc"] 1650 +-or [0, 3]["abc"] 1651 | +-literal(abc) [0, 3]["abc"] 1652 +-or [3, 4]["d"] 1653 | +-literal(d) [3, 4]["d"] 1654 +-or [4, 7]["abc"] 1655 +-literal(abc) [4, 7]["abc"] 1656 +/ 1657 ---- 1658 1659 So we know the first child used the 'literal!"abc"' sub-rule and matched input[0..3]. 1660 The second matched input[3..4] and the third input[4..7]. 1661 1662 ---- 1663 input = ParseTree("",false,[], "efgh"); 1664 result = rule(input); 1665 assert(!result.successful); // fails, since it failed on the first try. 1666 ---- 1667 */ 1668 template oneOrMore(alias r) 1669 { 1670 enum name = "oneOrMore!(" ~ getName!(r) ~ ")"; 1671 1672 ParseTree oneOrMore(ParseTree p) 1673 { 1674 auto result = ParseTree(name, false, [], p.input, p.end, p.end); 1675 auto temp = r(result); 1676 1677 if (!temp.successful) 1678 { 1679 result.matches = temp.matches; 1680 result.children = [temp]; 1681 result.end = temp.end; 1682 } 1683 else 1684 { 1685 while( temp.successful 1686 && (temp.begin < temp.end // To avoid infinite loops on epsilon-matching rules 1687 || temp.name.startsWith("discard!("))) 1688 { 1689 result.matches ~= temp.matches; 1690 result.children ~= temp; 1691 result.end = temp.end; 1692 temp = r(result); 1693 } 1694 result.successful = true; 1695 } 1696 return result; 1697 } 1698 1699 ParseTree oneOrMore(string input) 1700 { 1701 return .oneOrMore!(r)(ParseTree("",false,[],input)); 1702 } 1703 1704 string oneOrMore(GetName g) 1705 { 1706 return name; 1707 } 1708 } 1709 1710 unittest // 'oneOrMore' unit test 1711 { 1712 alias literal!"a" a; 1713 alias literal!"abc" abc; 1714 alias charRange!('a','z') az; 1715 1716 alias oneOrMore!(a) as; 1717 alias oneOrMore!(abc) abcs; 1718 alias oneOrMore!(az) azs; 1719 1720 assert(getName!(as)() == `oneOrMore!(literal!("a"))`); 1721 assert(getName!(abcs)() == `oneOrMore!(literal!("abc"))`); 1722 assert(getName!(azs)() == `oneOrMore!(charRange!('a','z'))`); 1723 1724 assert(!as("").successful); 1725 assert(as("a").successful); 1726 assert(as("aa").successful); 1727 assert(as("aaa").successful); 1728 assert(as("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").successful); 1729 assert(!as("b").successful); 1730 1731 ParseTree result = as("aaa"); 1732 1733 assert(result.name == `oneOrMore!(literal!("a"))`); 1734 assert(result.successful); 1735 assert(result.matches == ["a","a","a"]); 1736 assert(result.begin == 0); 1737 assert(result.end == 3); 1738 assert(result.children.length == 3); 1739 assert(result.children == [ a("aaa"), a(a("aaa")), a(a(a("aaa")))]); 1740 1741 assert(!abcs("").successful); 1742 assert(abcs("abc").successful); 1743 assert(abcs("abcabc").successful); 1744 assert(abcs("abcabcabc").successful); 1745 assert(abcs("abcabcabcabcabcabcabcabcabcabcabcabcabcabcabc").successful); 1746 assert(!abcs("ab").successful); 1747 1748 result = abcs("abcabcabc"); 1749 1750 assert(result.name == `oneOrMore!(literal!("abc"))`); 1751 assert(result.successful); 1752 assert(result.matches == ["abc","abc","abc"]); 1753 assert(result.begin == 0); 1754 assert(result.end == 3*3); 1755 assert(result.children.length == 3); 1756 assert(result.children == [ abc("abcabcabc"), abc(abc("abcabcabc")), abc(abc(abc("abcabcabc")))]); 1757 1758 assert(!azs("").successful); 1759 assert(azs("a").successful); 1760 assert(azs("abc").successful); 1761 assert(azs("abcdefghijklmnoqrstuvwxyz").successful); 1762 assert(azs("abcdefghijklmnoqrstuvwxyz 1234567890").successful); 1763 assert(!azs(".").successful); 1764 1765 result = azs("abc"); 1766 1767 assert(result.name == `oneOrMore!(charRange!('a','z'))`); 1768 assert(result.successful); 1769 assert(result.matches == ["a","b","c"]); 1770 assert(result.begin == 0); 1771 assert(result.end == 3); 1772 assert(result.children.length == 3); 1773 assert(result.children == [ az("abc"), az(az("abc")), az(az(az("abc")))]); 1774 } 1775 1776 /** 1777 Given a subrule 'r', represents the expression 'r?'. It tries to match 'r' and if this matches 1778 successfully, it returns this match. If 'r' failed, 'r?' is still a success, but without any child nor match. 1779 1780 ---- 1781 alias option!(literal!"abc") rule; // Aka '"abc"?' 1782 ParseTree input = ParseTree("",false,[],"abcd"); 1783 1784 ParseTree result = rule(input); 1785 assert(result.successful); 1786 assert(result.matches == ["abc"]); 1787 assert(result.children.length == 1); 1788 assert(result.children[0] == literal!"abc"(input)); 1789 ---- 1790 */ 1791 template option(alias r) 1792 { 1793 enum name = "option!(" ~ getName!(r) ~ ")"; 1794 1795 ParseTree option(ParseTree p) 1796 { 1797 ParseTree result = r(p); 1798 if (result.successful) 1799 return ParseTree(name, true, result.matches, result.input, result.begin, result.end, [result]); 1800 else 1801 return ParseTree(name, true, [], p.input, p.end, p.end, null); 1802 } 1803 1804 ParseTree option(string input) 1805 { 1806 return .option!(r)(ParseTree("",false,[],input)); 1807 } 1808 1809 string option(GetName g) 1810 { 1811 return name; 1812 } 1813 } 1814 1815 unittest // 'option' unit test 1816 { 1817 alias literal!"a" a; 1818 alias literal!"abc" abc; 1819 1820 alias option!(a) a_; 1821 alias option!(abc) abc_; 1822 1823 assert(getName!(a_)() == `option!(literal!("a"))`); 1824 assert(getName!(abc_)() == `option!(literal!("abc"))`); 1825 1826 assert(a_("").successful); 1827 assert(a_("a").successful); 1828 assert(a_("aa").successful); 1829 assert(a_("b").successful); 1830 1831 ParseTree result = a_("a"); 1832 1833 assert(result.name == `option!(literal!("a"))`); 1834 assert(result.successful); 1835 assert(result.matches == ["a"]); 1836 assert(result.begin == 0); 1837 assert(result.end == 1); 1838 assert(result.children.length == 1); 1839 assert(result.children == [ a("a")]); 1840 1841 result = a_(""); 1842 1843 assert(result.name == `option!(literal!("a"))`); 1844 assert(result.successful); 1845 assert(result.matches is null); 1846 assert(result.begin == 0); 1847 assert(result.end == 0); 1848 assert(result.children.length == 0); 1849 1850 1851 assert(abc_("").successful); 1852 assert(abc_("abc").successful); 1853 assert(abc_("abcabc").successful); 1854 assert(abc_("ab").successful); 1855 1856 result = abc_("abcdef"); 1857 1858 assert(result.name == `option!(literal!("abc"))`); 1859 assert(result.successful); 1860 assert(result.matches == ["abc"]); 1861 assert(result.begin == 0); 1862 assert(result.end == 3); 1863 assert(result.children.length == 1); 1864 assert(result.children == [ abc("abcdef")]); 1865 1866 result = abc_("def"); 1867 1868 assert(result.name == `option!(literal!("abc"))`); 1869 assert(result.successful); 1870 assert(result.matches is null); 1871 assert(result.begin == 0); 1872 assert(result.end == 0); 1873 assert(result.children.length == 0); 1874 } 1875 1876 /** 1877 Tries 'r' on the input. If it succeeds, the rule also succeeds, without consuming any input. 1878 If 'r' fails, then posLookahead!r also fails. Low-level implementation of '&r'. 1879 */ 1880 template posLookahead(alias r) 1881 { 1882 enum name = "posLookahead!(" ~ getName!(r) ~ ")"; 1883 1884 ParseTree posLookahead(ParseTree p) 1885 { 1886 ParseTree temp = r(p); 1887 if (temp.successful) 1888 return ParseTree(name, temp.successful, [], p.input, p.end, p.end); 1889 else 1890 return ParseTree(name, temp.successful, [temp.matches[$-1]], p.input, p.end, p.end); 1891 } 1892 1893 ParseTree posLookahead(string input) 1894 { 1895 return .posLookahead!(r)(ParseTree("",false,[],input)); 1896 } 1897 1898 string posLookahead(GetName g) 1899 { 1900 return name; 1901 } 1902 } 1903 1904 unittest // 'posLookahead' unit test 1905 { 1906 alias literal!"a" a; 1907 alias literal!"abc" abc; 1908 1909 alias posLookahead!(a) a_; 1910 alias posLookahead!(abc) abc_; 1911 1912 assert(getName!(a_)() == `posLookahead!(literal!("a"))`); 1913 assert(getName!(abc_)() == `posLookahead!(literal!("abc"))`); 1914 1915 assert(!a_("").successful); 1916 assert(a_("a").successful); 1917 assert(a_("aa").successful); 1918 assert(!a_("b").successful); 1919 1920 ParseTree result = a_("a"); 1921 1922 assert(result.name == `posLookahead!(literal!("a"))`); 1923 assert(result.successful); 1924 assert(result.matches is null); 1925 assert(result.begin == 0); 1926 assert(result.end == 0); 1927 assert(result.children.length == 0); 1928 1929 result = a_(""); 1930 1931 assert(result.name == `posLookahead!(literal!("a"))`); 1932 assert(!result.successful); 1933 assert(result.matches == [`"a"`], "posLookahead error message."); 1934 assert(result.begin == 0); 1935 assert(result.end == 0); 1936 assert(result.children.length == 0); 1937 1938 assert(!abc_("").successful); 1939 assert(abc_("abc").successful); 1940 assert(abc_("abcabc").successful); 1941 assert(!abc_("ab").successful); 1942 1943 result = abc_("abcdef"); 1944 1945 assert(result.name == `posLookahead!(literal!("abc"))`); 1946 assert(result.successful); 1947 assert(result.matches is null); 1948 assert(result.begin == 0); 1949 assert(result.end == 0); 1950 assert(result.children.length == 0); 1951 1952 result = abc_("def"); 1953 1954 assert(result.name == `posLookahead!(literal!("abc"))`); 1955 assert(!result.successful); 1956 assert(result.matches == [`"abc"`], "posLookahead error message."); 1957 assert(result.begin == 0); 1958 assert(result.end == 0); 1959 assert(result.children.length == 0); 1960 } 1961 1962 /** 1963 Tries 'r' on the input. If it fails, the rule succeeds, without consuming any input. 1964 If 'r' succeeds, then negLookahead!r fails. Low-level implementation of '!r'. 1965 */ 1966 template negLookahead(alias r) 1967 { 1968 enum name = "negLookahead!(" ~ getName!(r) ~ ")"; 1969 1970 ParseTree negLookahead(ParseTree p) 1971 { 1972 ParseTree temp = r(p); 1973 if (temp.successful) 1974 return ParseTree(name, false, ["anything but \"" ~ p.input[temp.begin..temp.end] ~ "\""], p.input, p.end, p.end); 1975 else 1976 return ParseTree(name, true, [], p.input, p.end, p.end); 1977 } 1978 1979 ParseTree negLookahead(string input) 1980 { 1981 return .negLookahead!(r)(ParseTree("",false,[],input)); 1982 } 1983 1984 string negLookahead(GetName g) 1985 { 1986 return name; 1987 } 1988 } 1989 1990 unittest // 'negLookahead' unit test 1991 { 1992 alias literal!"a" a; 1993 alias literal!"abc" abc; 1994 1995 alias negLookahead!(a) a_; 1996 alias negLookahead!(abc) abc_; 1997 1998 assert(getName!(a_)() == `negLookahead!(literal!("a"))`); 1999 assert(getName!(abc_)() == `negLookahead!(literal!("abc"))`); 2000 2001 assert(a_("").successful); 2002 assert(!a_("a").successful); 2003 assert(!a_("aa").successful); 2004 assert(a_("b").successful); 2005 2006 ParseTree result = a_("a"); 2007 2008 assert(result.name == `negLookahead!(literal!("a"))`); 2009 assert(!result.successful); 2010 assert(result.matches == [`anything but "a"`]); 2011 assert(result.begin == 0); 2012 assert(result.end == 0); 2013 assert(result.children.length == 0); 2014 2015 result = a_(""); 2016 2017 assert(result.name == `negLookahead!(literal!("a"))`); 2018 assert(result.successful); 2019 assert(result.matches is null); 2020 assert(result.begin == 0); 2021 assert(result.end == 0); 2022 assert(result.children.length == 0); 2023 2024 assert(abc_("").successful); 2025 assert(!abc_("abc").successful); 2026 assert(!abc_("abcabc").successful); 2027 assert(abc_("ab").successful); 2028 2029 result = abc_("abcdef"); 2030 2031 assert(result.name == `negLookahead!(literal!("abc"))`); 2032 assert(!result.successful); 2033 assert(result.matches == [`anything but "abc"`]); 2034 assert(result.begin == 0); 2035 assert(result.end == 0); 2036 assert(result.children.length == 0); 2037 2038 result = abc_("def"); 2039 2040 assert(result.name == `negLookahead!(literal!("abc"))`); 2041 assert(result.successful); 2042 assert(result.matches is null); 2043 assert(result.begin == 0); 2044 assert(result.end == 0); 2045 assert(result.children.length == 0); 2046 } 2047 2048 /** 2049 Internal helper template, to get a parse tree node with a name. For example, given: 2050 ---- 2051 alias or!(literal!("abc"), charRange!('0','9')) myRule; 2052 ---- 2053 2054 myRule gives nodes named "or", since its the parent rule. If you want nodes to be named "myRule", 2055 use named. named just overwrites the original root node name, the children are left untouched. 2056 2057 See_Also: defined. 2058 2059 ---- 2060 alias or!(literal!("abc"), charRange!('0','9')) rule; 2061 alias named!(rule, "myRule") myRule; 2062 2063 auto input = "abc3"; 2064 auto p1 = rule(input); 2065 auto p2 = myRule(input); 2066 2067 // They are both successful 2068 assert(p1.successful && p2.successful); 2069 assert(p1.matches == p2.matches); 2070 // But the names are different 2071 assert(p1.name == `or!(literal!("abc"), charRange!('0','9'))`); 2072 assert(p2.name == `myRule`); 2073 // Same children: 2074 assert(p2.children == p1.children); 2075 2076 ---- 2077 */ 2078 template named(alias r, string name) 2079 { 2080 ParseTree named(ParseTree p) 2081 { 2082 ParseTree result = r(p); 2083 result.name = name; 2084 return result; 2085 } 2086 2087 ParseTree named(string input) 2088 { 2089 return .named!(r,name)(ParseTree("",false,[],input)); 2090 } 2091 2092 string named(GetName g) 2093 { 2094 return name; 2095 } 2096 } 2097 2098 unittest // 'named' unit test 2099 { 2100 alias or!(literal!("abc"), charRange!('0','9')) rule; 2101 alias named!(rule, "myRule") myRule; 2102 2103 assert(getName!(rule)() == `or!(literal!("abc"), charRange!('0','9'))`); 2104 assert(getName!(myRule)() == "myRule"); 2105 2106 // Equality on success (except for the name) 2107 ParseTree result = rule("abc0"); 2108 ParseTree myResult = myRule("abc0"); 2109 2110 assert(myResult.successful == result.successful); 2111 assert(myResult.name == "myRule"); 2112 assert(myResult.matches == result.matches); 2113 assert(myResult.begin == result.begin); 2114 assert(myResult.end == result.end); 2115 assert(myResult.children == result.children); 2116 2117 // Equality on failure (except for the name) 2118 result = rule("_abc"); 2119 myResult = myRule("_abc"); 2120 2121 assert(myResult.successful == result.successful); 2122 assert(myResult.name == "myRule"); 2123 assert(myResult.matches == result.matches); 2124 assert(myResult.begin == result.begin); 2125 assert(myResult.end == result.end); 2126 assert(myResult.children == result.children); 2127 } 2128 2129 /** 2130 Internal helper template, to get a parse tree node with a name, while keeping the original node (see also named). 2131 For example, given: 2132 ---- 2133 alias or!(literal!("abc"), charRange!('0','9')) myRule; 2134 ---- 2135 2136 myRule gives nodes named "or", since its the parent rule. If you want nodes to be named "myRule", 2137 use defined. Contrary to named (see before), the original node is pushed as the child. 2138 2139 See_Also: named. 2140 2141 ---- 2142 alias or!(literal!("abc"), charRange!('0','9')) rule; 2143 alias defined!(rule, "myRule") myRule; 2144 2145 auto input = "abc3"; 2146 auto p1 = rule(input); 2147 auto p2 = myRule(input); 2148 2149 // They are both successful 2150 assert(p1.successful && p2.successful); 2151 assert(p1.matches == p2.matches); 2152 // But the names are different 2153 assert(p1.name == `or!(literal!("abc"), charRange!('0','9'))`); 2154 assert(p2.name == `myRule`); 2155 // Here the original tree is not discarded: 2156 assert(p2.children[0] == p1); 2157 ---- 2158 */ 2159 template defined(alias r, string name) 2160 { 2161 ParseTree defined(ParseTree p) 2162 { 2163 ParseTree result = r(p); 2164 result.children = [result]; 2165 result.name = name; 2166 return result; 2167 } 2168 2169 ParseTree defined(string input) 2170 { 2171 return .defined!(r,name)(ParseTree("",false,[],input)); 2172 } 2173 2174 string defined(GetName g) 2175 { 2176 return name; 2177 } 2178 } 2179 2180 unittest // 'defined' unit test 2181 { 2182 alias or!(literal!("abc"), charRange!('0','9')) rule; 2183 alias defined!(rule, "myRule") myRule; 2184 2185 assert(getName!(rule)() == `or!(literal!("abc"), charRange!('0','9'))`); 2186 assert(getName!(myRule)() == "myRule"); 2187 2188 // Equality on success (except for the name) 2189 ParseTree result = rule("abc0"); 2190 ParseTree myResult = myRule("abc0"); 2191 2192 assert(myResult.successful && result.successful); 2193 assert(myResult.name == "myRule"); 2194 assert(myResult.matches == result.matches); 2195 assert(myResult.begin == result.begin); 2196 assert(myResult.end == result.end); 2197 assert(myResult.children[0] == result); 2198 2199 // Equality on failure (except for the name) 2200 result = rule("_abc"); 2201 myResult = myRule("_abc"); 2202 2203 assert(!myResult.successful && !result.successful); 2204 assert(myResult.name == "myRule"); 2205 assert(myResult.matches == result.matches); 2206 assert(myResult.begin == result.begin); 2207 assert(myResult.end == result.end); 2208 assert(myResult.children[0] == result); 2209 } 2210 2211 /** 2212 Low-level representation for the expression 'r {act}'. That is, it applies rule 'r' 2213 on the input and then calls 'act' on the resulting ParseTree. 2214 */ 2215 template action(alias r, alias act) 2216 { 2217 ParseTree action(ParseTree p) 2218 { 2219 return act(r(p)); 2220 } 2221 2222 ParseTree action(string input) 2223 { 2224 return .action!(r,act)(ParseTree("",false,[],input)); 2225 } 2226 2227 string action(GetName g) 2228 { 2229 enum name = "action!("~ getName!(r)() ~ ", " ~ __traits(identifier, act) ~ ")"; 2230 return name; 2231 } 2232 } 2233 2234 unittest // 'action' unit test 2235 { 2236 ParseTree foo(ParseTree p) 2237 { 2238 p.matches ~= p.matches; // doubling matches 2239 return p; 2240 } 2241 2242 alias literal!("abc") abc; 2243 2244 alias action!(abc, foo) abcfoo; 2245 2246 assert(getName!(abcfoo) == `action!(literal!("abc"), foo)`); 2247 2248 ParseTree result = abcfoo("abc"); 2249 ParseTree reference = abc("abc"); 2250 2251 assert(result.successful == reference.successful); 2252 assert(result.successful); 2253 assert(result.matches == reference.matches ~ reference.matches); 2254 assert(result.begin == reference.begin); 2255 assert(result.end == reference.end); 2256 assert(result.children == reference.children); 2257 2258 // On failure 2259 result = abcfoo("_abc"); 2260 reference = abc("_abc"); 2261 2262 assert(result.successful == reference.successful); 2263 assert(!result.successful); 2264 assert(result.matches == reference.matches ~ reference.matches); 2265 assert(result.begin == reference.begin); 2266 assert(result.end == reference.end); 2267 assert(result.children == reference.children); 2268 } 2269 2270 /** 2271 Concatenates a ParseTree's matches as one match and discards its children. Equivalent to the expression '~r'. 2272 */ 2273 template fuse(alias r) 2274 { 2275 ParseTree fuse(ParseTree p) 2276 { 2277 p = r(p); 2278 if(p.successful) 2279 { 2280 if (p.matches.length != 0) 2281 p.matches = [std.array.join(p.matches)]; 2282 2283 p.children = null; // also discard children 2284 } 2285 return p; 2286 } 2287 2288 ParseTree fuse(string input) 2289 { 2290 return .fuse!(r)(ParseTree("",false,[],input)); 2291 } 2292 2293 string fuse(GetName g) 2294 { 2295 enum name = "fuse!(" ~ getName!(r)() ~ ")"; 2296 return name; 2297 } 2298 } 2299 2300 unittest // 'fuse' unit test 2301 { 2302 alias oneOrMore!(literal!("abc")) abcs; 2303 2304 alias fuse!(abcs) f; 2305 2306 assert(getName!(f) == `fuse!(oneOrMore!(literal!("abc")))`); 2307 2308 ParseTree result = f("abcabcabc"); 2309 ParseTree reference = abcs("abcabcabc"); 2310 2311 assert(result.successful == reference.successful); 2312 assert(result.successful); 2313 assert(reference.matches == ["abc", "abc", "abc"]); 2314 assert(result.matches == ["abcabcabc"]); 2315 assert(result.begin == reference.begin); 2316 assert(result.end == reference.end); 2317 assert(result.children is null); 2318 2319 // On failure 2320 result = f("_abc"); 2321 reference = abcs("_abc"); 2322 2323 assert(result.successful == reference.successful); 2324 assert(!result.successful); 2325 assert(result.matches == reference.matches); 2326 assert(result.begin == reference.begin); 2327 assert(result.end == reference.end); 2328 assert(result.children == reference.children); 2329 2330 alias discard!(literal!("abc")) dabc; 2331 alias fuse!(dabc) f2; 2332 2333 result = f2("abcabc"); 2334 reference = dabc("abcabc"); 2335 2336 assert(result.successful); 2337 assert(result.matches is null); 2338 assert(result.begin == reference.begin); 2339 assert(result.end == reference.end); 2340 assert(result.children == reference.children); 2341 } 2342 2343 /** 2344 Calls 'r' on the input and then discards its children nodes. 2345 */ 2346 template discardChildren(alias r) 2347 { 2348 ParseTree discardChildren(ParseTree p) 2349 { 2350 p = r(p); 2351 p.children = null; 2352 return p; 2353 } 2354 2355 ParseTree discardChildren(string input) 2356 { 2357 return .discardChildren!(r)(ParseTree("",false,[],input)); 2358 } 2359 2360 string discardChildren(GetName g) 2361 { 2362 enum name = "discardChildren!(" ~ getName!(r)() ~ ")"; 2363 return name; 2364 } 2365 } 2366 2367 /** 2368 Calls 'r' on the input and then discards its matches. 2369 */ 2370 template discardMatches(alias r) 2371 { 2372 ParseTree discardMatches(ParseTree p) 2373 { 2374 p = r(p); 2375 if (p.successful) 2376 p.matches = null; 2377 return p; 2378 } 2379 2380 ParseTree discardMatches(string input) 2381 { 2382 return .discardMatches!(r)(ParseTree("",false,[],input)); 2383 } 2384 2385 string discardMatches(GetName g) 2386 { 2387 enum name = "discardMatches!(" ~ getName!(r)() ~ ")"; 2388 return name; 2389 } 2390 } 2391 2392 /** 2393 Calls 'r' on the input and then discard everything 'r' returned: no children, no match and index 2394 put at the end of the match. It's the low-level engine behind ':r'. 2395 */ 2396 template discard(alias r) 2397 { 2398 ParseTree discard(ParseTree p) 2399 { 2400 ParseTree result = r(p); 2401 result.name = "discard!(" ~ getName!(r)() ~ ")"; 2402 //result.begin = result.end; 2403 result.children = null; 2404 if (result.successful) 2405 result.matches = null;//to keep error messages, if any 2406 2407 return result; 2408 } 2409 2410 ParseTree discard(string input) 2411 { 2412 return .discard!(r)(ParseTree("",false,[],input)); 2413 } 2414 2415 string discard(GetName g) 2416 { 2417 return "discard!(" ~ getName!(r)() ~ ")"; 2418 } 2419 } 2420 2421 unittest // 'discard' unit test 2422 { 2423 alias literal!"abc" abc; 2424 alias oneOrMore!abc abcs; 2425 alias discard!(literal!("abc")) dabc; 2426 alias discard!(oneOrMore!(literal!("abc")))dabcs; 2427 2428 ParseTree reference = abc("abc"); 2429 ParseTree result =dabc("abc"); 2430 2431 assert(result.successful == reference.successful); 2432 assert(result.successful); 2433 assert(result.name == `discard!(literal!("abc"))`); 2434 assert(result.matches is null); 2435 assert(result.begin == reference.begin); 2436 assert(result.end == reference.end); 2437 assert(result.children is null); 2438 2439 reference = abcs("abcabcabc"); 2440 result = dabcs("abcabcabc"); 2441 2442 assert(result.successful == reference.successful); 2443 assert(result.successful); 2444 assert(result.name == `discard!(oneOrMore!(literal!("abc")))`); 2445 assert(result.matches is null); 2446 assert(result.begin == reference.begin); 2447 assert(result.end == reference.end); 2448 assert(result.children is null); 2449 2450 // On failure 2451 reference = abcs(""); 2452 result = dabcs(""); 2453 2454 assert(result.successful == reference.successful); 2455 assert(!result.successful); 2456 assert(result.name == `discard!(oneOrMore!(literal!("abc")))`); 2457 assert(result.matches == [`"abc"`], "discard error message."); 2458 assert(result.begin == reference.begin); 2459 assert(result.end == reference.end); 2460 assert(result.children is null); 2461 2462 // Action on 'and' 2463 alias and!(abc,dabc,abc) discardMiddle; 2464 2465 result = discardMiddle("abcabcabc"); 2466 assert(result.successful); 2467 assert(result.matches == ["abc", "abc"]); 2468 assert(result.begin == 0); 2469 assert(result.end == 3*3); 2470 assert(result.children.length == 2); 2471 assert(result.children == [abc("abcabcabc"), abc(abc(abc("abcabcabc")))]); 2472 } 2473 2474 /** 2475 Calls 'r' on the input and then discards everything 'r' did, except its matches (so that 2476 they propagate upwards). Equivalent to ';r'. 2477 */ 2478 template drop(alias r) 2479 { 2480 ParseTree drop(ParseTree p) 2481 { 2482 ParseTree result = r(p); 2483 //result.begin = result.end; 2484 result.children = null; 2485 if (result.successful) 2486 result.name = "drop!(" ~ getName!(r)() ~ ")"; 2487 return result; 2488 } 2489 2490 ParseTree drop(string input) 2491 { 2492 return .drop!(r)(ParseTree("",false,[],input)); 2493 } 2494 2495 string drop(GetName g) 2496 { 2497 return "drop!(" ~ getName!(r)() ~ ")"; 2498 } 2499 } 2500 2501 unittest // 'drop' unit test 2502 { 2503 alias literal!"abc" abc; 2504 alias oneOrMore!abc abcs; 2505 alias drop!(literal!("abc")) dabc; 2506 alias drop!(oneOrMore!(literal!("abc"))) dabcs; 2507 2508 ParseTree reference = abc("abc"); 2509 ParseTree result =dabc("abc"); 2510 2511 assert(result.successful == reference.successful); 2512 assert(result.successful); 2513 assert(result.name == `drop!(literal!("abc"))`); 2514 assert(result.matches == reference.matches); 2515 assert(result.begin == reference.begin); 2516 assert(result.end == reference.end); 2517 assert(result.children is null); 2518 2519 reference = abcs("abcabcabc"); 2520 result = dabcs("abcabcabc"); 2521 2522 assert(result.successful == reference.successful); 2523 assert(result.successful); 2524 assert(result.name == `drop!(oneOrMore!(literal!("abc")))`); 2525 assert(result.matches == reference.matches); 2526 assert(result.begin == reference.begin); 2527 assert(result.end == reference.end); 2528 assert(result.children is null); 2529 2530 // On failure 2531 reference = abcs(""); 2532 result = dabcs(""); 2533 2534 assert(result.successful == reference.successful); 2535 assert(!result.successful); 2536 assert(result.name == reference.name); 2537 assert(result.matches == [`"abc"`], "'drop' error message."); 2538 assert(result.begin == reference.begin); 2539 assert(result.end == reference.end); 2540 assert(result.children is null); 2541 2542 // Action on 'and' 2543 alias and!(abc,dabc,abc) discardMiddle; 2544 2545 result = discardMiddle("abcabcabc"); 2546 assert(result.successful); 2547 assert(result.matches == ["abc", "abc", "abc"], "3 matches."); 2548 assert(result.begin == 0); 2549 assert(result.end == 3*3); 2550 assert(result.children.length == 2, "but only 2 children."); 2551 assert(result.children == [abc("abcabcabc"), abc(abc(abc("abcabcabc")))]); 2552 } 2553 2554 /** 2555 Makes r disappear in a sequence, letting its children take its place. It's equivalent 2556 to the '%' operator. Given A <- B %C D and C <- E F, a successful parse for A will 2557 generate a three with four children: B, E, F and D parse trees. 2558 */ 2559 template propagate(alias r) 2560 { 2561 ParseTree propagate(ParseTree p) 2562 { 2563 ParseTree result = r(p); 2564 if (result.successful) 2565 result.name = "propagate!(" ~ getName!(r)() ~ ")"; 2566 return result; 2567 } 2568 2569 ParseTree propagate(string input) 2570 { 2571 return .propagate!(r)(ParseTree("",false,[],input)); 2572 } 2573 2574 string propagate(GetName g) 2575 { 2576 return "propagate!(" ~ getName!(r)() ~ ")"; 2577 } 2578 } 2579 2580 /** 2581 Makes 'r's result be kept when it would be discarded by the tree-decimation done by a grammar. 2582 Equivalent to '^r'. 2583 */ 2584 template keep(alias r) 2585 { 2586 ParseTree keep(ParseTree p) 2587 { 2588 ParseTree result = r(p); 2589 if (result.successful) 2590 { 2591 result.children = [result]; 2592 result.name = "keep!(" ~ getName!(r)() ~ ")"; 2593 } 2594 return result; 2595 } 2596 2597 ParseTree keep(string input) 2598 { 2599 return .keep!(r)(ParseTree("",false,[],input)); 2600 } 2601 2602 string keep(GetName g) 2603 { 2604 return "keep!(" ~ getName!(r)() ~ ")"; 2605 } 2606 } 2607 2608 unittest // 'keep' unit test 2609 { 2610 // Grammar mimicry 2611 struct KeepTest 2612 { 2613 static bool isRule(string s) 2614 { 2615 if (s == "A" || s == "KA") 2616 return true; 2617 else 2618 return false; 2619 } 2620 2621 mixin decimateTree; 2622 2623 // Equivalent to A <- 'a' 'b' 2624 static ParseTree A(string s) 2625 { 2626 return decimateTree(named!(and!(literal!"a", literal!"b"), "A")(s)); 2627 } 2628 2629 // Here we use keep to protect 'b' 2630 // Equivalent to KA <- 'a' ^'b' 2631 static ParseTree KA(string s) 2632 { 2633 return decimateTree(named!(and!(literal!"a", keep!(literal!"b")), "KA")(s)); 2634 } 2635 } 2636 2637 ParseTree reference = KeepTest.A("abc"); 2638 ParseTree result = KeepTest.KA("abc"); 2639 2640 assert(result.successful == reference.successful); 2641 assert(result.matches == reference.matches); 2642 assert(result.matches == ["a","b"]); 2643 assert(result.begin == reference.begin); 2644 assert(result.end == reference.end); 2645 assert(reference.children.length == 0); 2646 assert(result.children.length == 1); 2647 assert(result.children == [literal!("b")(literal!("a")("abc"))], "'b' node was kept."); 2648 } 2649 2650 /* ****************** predefined rules ******************** */ 2651 2652 alias named!(or!(literal!("\r\n"), literal!("\n"), literal!("\r")), "endOfLine") endOfLine; /// predefined end-of-line parser 2653 alias endOfLine eol; /// helper alias. 2654 2655 alias or!(literal!(" "), literal!("\t"), literal!("\v")) space; /// predefined space-recognizing parser (space or tabulation). 2656 alias named!(literal!"\t", "tab") tab; /// A parser recognizing \t (tabulation) 2657 alias named!(fuse!(discardChildren!(oneOrMore!space)), 2658 "spaces") spaces; /// aka '~space+' 2659 alias or!(space, endOfLine) blank; /// Any blank char (spaces or end of line). 2660 alias named!(discard!(zeroOrMore!blank), 2661 "spacing") spacing; /// The basic space-management parser: discard zero or more blank spaces. 2662 2663 alias charRange!('0', '9') digit; /// Decimal digit: [0-9] 2664 alias named!(fuse!(discardChildren!(oneOrMore!digit)), "digits") digits; /// [0-9]+ 2665 2666 alias or!(charRange!('0','9'), charRange!('a','f'), charRange!('A', 'F')) hexDigit; /// Hexadecimal digit: [0-9a-fA-F] 2667 2668 alias charRange!('a', 'z') alpha; /// [a-z] 2669 alias charRange!('A', 'Z') Alpha; /// [A-Z] 2670 2671 alias and!(oneOrMore!(or!(alpha, Alpha, literal!("_"))), zeroOrMore!(or!(digit, alpha, Alpha, literal!("_")))) ident; 2672 alias named!(fuse!(discardChildren!ident), 2673 "identifier") identifier; /// [a-zA-Z_][a-zA-Z_0-9]*, the basic C-family identifier 2674 alias named!(fuse!(discardChildren!(and!(identifier, zeroOrMore!(and!(literal!".", identifier))))), 2675 "qualifiedIdentifier") qualifiedIdentifier; /// qualified identifiers (ids separated by dots: abd.def.g). 2676 2677 alias named!(literal!"/", "slash") slash; /// A parser recognizing '/' 2678 alias named!(literal!"\\", "backslash") backslash; /// A parser recognizing '\' 2679 alias named!(literal!"'", "quote") quote; /// A parser recognizing ' (single quote) 2680 alias named!(literal!"\"", "doublequote") doublequote; /// A parser recognizing " (double quote) 2681 alias named!(literal!"`", "backquote") backquote; /// A parser recognizing ` (backquote) 2682 2683 /// A list of elem's separated by sep's. One element minimum. 2684 template list(alias elem, alias sep) 2685 { 2686 alias named!(spaceAnd!(oneOrMore!blank, and!(elem, zeroOrMore!(spaceAnd!(discardMatches!(sep), elem)))), "list") list; 2687 } 2688 2689 /// A list of elem's separated by sep's. The empty list (no elem, no sep) is OK. 2690 template list0(alias elem, alias sep) 2691 { 2692 alias named!(spaceAnd!(oneOrMore!blank, option!(and!(elem, zeroOrMore!(spaceAnd!(discardMatches!(sep), elem))))), "list0") list0; 2693 } 2694 2695 template AddSpace(alias sp) 2696 { 2697 template AddSpace(alias r) 2698 { 2699 alias TypeTuple!(r, discard!sp) AddSpace; 2700 } 2701 } 2702 2703 /** 2704 The engine formerly behind the '< ' Pegged rule: all sequence subelements of a rule are interspersed 2705 with a space-consuming rule, given as the first template parameter. It's not used by Pegged anymore 2706 but can be useful for low-level code. It might become deprecated, but it's not there yet. 2707 2708 ---- 2709 alias and!(literal!"abc", literal!"def") rule1; // "abc" "def", equivalent to "abcdef" 2710 alias spaceAnd!(oneOrMore!blank, literal!"abc", literal!"def") rule2; // "abc" "def", but with spaces in-between. 2711 2712 string input1 = "abcdef"; 2713 string input2 = " abc 2714 2715 def "; // with spaces and end of line markers. 2716 2717 assert(rule1(input1).successful); // OK 2718 assert(!rule1(input2).successful); // NOK, couldn't find "def" after "abc" 2719 2720 assert(rule2(input1).successful); // OK 2721 assert(rule2(input2).successful); // Still OK 2722 assert(rule2(input2).matches == ["abc","def"]);// rule2 finds the literals among the spaces 2723 ---- 2724 2725 As you can see on the previous line, spaceAnd discards the matched spaces 2726 and returns matches only for the 'real' subrules. 2727 2728 Note: by using a non-space rule as the first template argument, 2729 you can use spaceAnd as a generic 'find these patterns, possibly separated by this pattern' rule. 2730 2731 For example, using digits as separators: 2732 ---- 2733 alias spaceAnd!(digit, literal!"abc", litera!"def") rule3; 2734 2735 ParseTree result = rule3("123abc45def67890"); 2736 assert(rule3.successful); 2737 assert(rule3.matches == ["abc", "def"]); 2738 assert(rule3.children.length == 2); 2739 2740 assert(rule3.begin == 0;) 2741 assert(rule3.end == "123abc45def67890".length); 2742 ---- 2743 */ 2744 template spaceAnd(alias sp, rules...) 2745 { 2746 alias and!(discard!(zeroOrMore!sp), staticMap!(AddSpace!(zeroOrMore!sp), rules)) spaceAnd; 2747 } 2748 2749 unittest // 'spaceAnd' unit test 2750 { 2751 alias literal!"a" a; 2752 alias literal!"b" b; 2753 2754 //Basic use 2755 alias and!(a,b) ab; 2756 alias spaceAnd!(oneOrMore!blank, a, b) a_b; 2757 2758 ParseTree reference = ab("ab"); 2759 ParseTree result = a_b("ab"); 2760 2761 assert(reference.successful); 2762 assert(result.successful); 2763 assert(result.matches == reference.matches); 2764 assert(result.begin == reference.begin); 2765 assert(result.end == reference.end); 2766 assert(result.children == reference.children); 2767 2768 reference = ab(" a b "); 2769 result = a_b( " a b "); 2770 2771 assert(!reference.successful); 2772 assert(result.successful); 2773 assert(result.matches == ["a","b"]); 2774 assert(result.begin == 0); 2775 assert(result.end == " a b ".length); 2776 assert(result.children.length == 2); 2777 2778 reference = ab("ac"); 2779 result = a_b("ac"); 2780 2781 assert(!reference.successful); 2782 assert(!result.successful); 2783 2784 reference = ab(" a c "); 2785 result = a_b(" a c "); 2786 2787 assert(!reference.successful); 2788 assert(!result.successful); 2789 2790 // With another separator than spaces 2791 alias spaceAnd!(digit, a, b) a0b; 2792 2793 assert(a0b("ab").successful); 2794 assert(!a0b(" a b ").successful); 2795 2796 result = a0b("012a34b567"); 2797 2798 assert(result.successful); 2799 assert(result.matches == ["a", "b"]); 2800 assert(result.begin == 0); 2801 assert(result.end == "012a34b567".length); 2802 assert(result.children.length == 2); 2803 } 2804 2805 /// Mixin to simplify a parse tree inside a grammar 2806 mixin template decimateTree() 2807 { 2808 static ParseTree decimateTree(ParseTree p) 2809 { 2810 if(p.children.length == 0) return p; 2811 2812 ParseTree[] filterChildren(ParseTree pt) 2813 { 2814 ParseTree[] result; 2815 foreach(child; pt.children) 2816 { 2817 if ( (isRule(child.name) && child.matches.length != 0) 2818 || !child.successful && child.children.length == 0) 2819 { 2820 child.children = filterChildren(child); 2821 result ~= child; 2822 } 2823 else if (child.name.startsWith("keep!(")) // 'keep' node are never discarded. 2824 // They have only one child, the node to keep 2825 { 2826 result ~= child.children[0]; 2827 } 2828 else // discard this node, but see if its children contain nodes to keep 2829 { 2830 result ~= filterChildren(child); 2831 } 2832 } 2833 return result; 2834 } 2835 p.children = filterChildren(p); 2836 return p; 2837 } 2838 } 2839 2840 /** 2841 Discard one-child nodes and replace them with their children. 2842 Most grammar tend to produce long one-child/one-child branches, 2843 simplifyTree compacts these. 2844 */ 2845 ParseTree simplifyTree(ParseTree p) 2846 { 2847 foreach(ref child; p.children) 2848 child = simplifyTree(child); 2849 2850 if (p.children.length != 1) 2851 return p; 2852 else // linear tree 2853 return p.children[0]; 2854 } 2855 2856 /** 2857 Returns: the number of nodes in a parse tree. 2858 */ 2859 size_t size(ParseTree p) 2860 { 2861 size_t result = 1; 2862 foreach(child; p.children) 2863 result += size(child); 2864 return result; 2865 } 2866 2867 /** 2868 Generic ParseTree modifier: 2869 predicate must be callable with a ParseTree and return a boolean. 2870 modifier must be callable with a ParseTree and return a ParseTree. 2871 2872 If predicate is true on input, modify calls modifier on input and return the result. 2873 If not, it continues with the children. 2874 */ 2875 ParseTree modify(alias predicate, alias modifier)(ParseTree input) 2876 { 2877 foreach(ref child; input.children) 2878 child = modify!(predicate, modifier)(child); 2879 2880 if (predicate(input)) 2881 input = modifier(input); 2882 2883 return input; 2884 }