1 /**
2 Dynamic grammars:
3     - no CT parsing (this one will stay)
4     - no memoization (could be added)
5     - slightly less handy parameterized rules (could be better)
6     - no semantic actions (drat, this one is the worse)
7     - no calling from other grammars?
8 
9 Advantages:
10     - fully runtime configurable: change rules, add rules, delete rules.
11 */
12 module pegged.dynamic.grammar;
13 
14 import std.algorithm : startsWith;
15 import std.array;
16 import std.stdio;
17 
18 import pegged.peg;
19 import pegged.parser;
20 import pegged.dynamic.peg;
21 
22 @safe:
23 
24 struct ParameterizedRule
25 {
26     size_t numArgs;
27     Dynamic delegate(Dynamic[]) @safe code;
28 
29     Dynamic opCall(D...)(D rules)
30     {
31         Dynamic[] args;
32         foreach(i,rule; rules)
33         {
34             static if (is(typeof(rule) == Dynamic))
35                 args ~= rule;
36             else
37                 args ~= rule(); // Dynamic delegate()
38         }
39 
40         if(rules.length == numArgs)
41         {
42             switch(numArgs)
43             {
44                 case 1:
45                     return code([args[0]]);
46                 case 2:
47                     return code([args[0], args[1]]);
48                 case 3:
49                     return code([args[0], args[1], args[2]]);
50                 case 4:
51                     return code([args[0], args[1], args[2], args[3]]);
52                 case 5:
53                     return code([args[0], args[1], args[2], args[3], args[4]]);
54                 case 6:
55                     return code([args[0], args[1], args[2], args[3], args[4], args[5]]);
56                 default:
57                     throw new Exception("Unimplemented parameterized rule with more than 6 arguments.");
58             }
59         }
60         else
61             throw new Exception( "ParameterizedRule called with "
62                                ~ to!string(rules.length)
63                                ~ " args. Waited for "
64                                ~ to!string(numArgs) ~ ".");
65         return code(args);
66     }
67 }
68 
69 ParameterizedRule parameterizedRule(size_t n, Dynamic delegate(Dynamic[] d) @safe code)
70 {
71     ParameterizedRule pr;
72     pr.numArgs = n;
73     pr.code = code;
74     return pr;
75 }
76 
77 struct DynamicGrammar
78 {
79     string grammarName;
80     string startingRule;
81     Dynamic[string] rules;
82     ParameterizedRule[string] paramRules;
83 
84     ParseTree decimateTree(ParseTree p)
85     {
86         if(p.children.length == 0) return p;
87 
88         ParseTree[] filterChildren(ParseTree pt)
89         {
90             ParseTree[] result;
91             foreach(child; pt.children)
92             {
93                 if (  (child.name.startsWith(grammarName) && child.matches.length != 0)
94                    || !child.successful && child.children.length == 0)
95                 {
96                     child.children = filterChildren(child);
97                     result ~= child;
98                 }
99                 else if (child.name.startsWith("keep!(")) // 'keep' node are never discarded.
100                                                // They have only one child, the node to keep
101                 {
102                     result ~= child.children[0];
103                 }
104                 else // discard this node, but see if its children contain nodes to keep
105                 {
106                     result ~= filterChildren(child);
107                 }
108             }
109             return result;
110         }
111         p.children = filterChildren(p);
112         return p;
113     }
114 
115     ParseTree opCall(ParseTree p)
116     {
117         return decimateTree(rules[startingRule](p));
118         /+
119         result.children = [result];
120         result.name = grammarName;
121         return result;
122         +/
123     }
124 
125     ParseTree opCall(string input)
126     {
127         return decimateTree(rules[startingRule](ParseTree(``, false, [], input, 0, 0)));
128         /+
129         result.children = [result];
130         result.name = grammarName;
131         return result;
132         +/
133     }
134 
135     void opIndexAssign(D)(D code, string s)
136     {
137         rules[s]= named(code, grammarName ~ "." ~ s);
138     }
139 
140     Dynamic opIndex(string s)
141     {
142         return rules[s];
143     }
144 }
145 
146 // Helper to insert 'Spacing' before and after Primaries
147 ParseTree spaceArrow(ParseTree input)
148 {
149     ParseTree wrapInSpaces(ParseTree p)
150     {
151         ParseTree spacer =
152         ParseTree("Pegged.Prefix", true, null, null, 0,0, [
153             ParseTree("Pegged.Suffix", true, null, null, 0, 0, [
154                 ParseTree("Pegged.Primary", true, null, null, 0, 0, [
155                     ParseTree("Pegged.RhsName", true, null, null, 0,0, [
156                         ParseTree("Pegged.Identifier", true, ["Spacing"])
157                     ])
158                 ])
159             ])
160         ]);
161         ParseTree result = ParseTree("Pegged.WrapAround", true, p.matches, p.input, p.begin, p.end, p.children);
162         result.children = spacer ~ result.children ~ spacer;
163         return result;
164     }
165     return modify!( p => p.name == "Pegged.Primary",
166                     wrapInSpaces)(input);
167 }
168 
169 Dynamic makeRule(string def, Dynamic[string] context)
170 {
171     ParseTree p = Pegged.decimateTree(Pegged.Definition(def));
172     return makeRule(p, context);
173 }
174 
175 Dynamic makeRule(ParseTree def, Dynamic[string] context)
176 {
177     Dynamic code;
178 
179     Dynamic getDyn(string name)
180     {
181         if (name in context)
182             return context[name];
183         else
184             throw new Exception("Unknown name: " ~ name);
185     }
186 
187     Dynamic ruleFromTree(ParseTree p) @safe
188     {
189         //writeln("rfT: ", p.name, " ", p.matches);
190         //Dynamic result;
191         switch(p.name)
192         {
193             case "Pegged.Expression":
194                 //if (p.children.length > 1) // OR expression
195                 //{
196                     Dynamic[] children;
197                     foreach(seq; p.children)
198                         children ~= ruleFromTree(seq);
199                     return distribute!(or)(children);
200                 //}
201                 //else // One child -> just a sequence, no need for a or!( , )
202                 //{
203                 //    return ruleFromTree(p.children[0]);
204                 //}
205                 //break;
206             case "Pegged.Sequence":
207                 //if (p.children.length > 1) // real sequence
208                 //{
209                     Dynamic[] children;
210                     foreach(seq; p.children)
211                         children ~= ruleFromTree(seq);
212                     return distribute!(pegged.dynamic.peg.and)(children);
213                 /+}
214                 else // One child -> just a Suffix, no need for a and!( , )
215                 {
216                     return ruleFromTree(p.children[0]);
217                 }
218                 +/
219             case "Pegged.Prefix":
220                 ParseTree temp;
221                 foreach_reverse(i,child; p.children[0..$-1]) // transforming a list into a linear tree
222                 {
223                     temp  = p.children[$-1];
224                     p.children[$-1] = child;
225                     p.children[$-1].children = [temp];
226                 }
227                 return ruleFromTree(p.children[$-1]);
228             case "Pegged.Suffix":
229                 foreach(child; p.children[1..$])
230                 {
231                     ParseTree temp = p.children[0];
232                     p.children[0] = child;
233                     p.children[0].children = [temp];
234                 }
235                 return ruleFromTree(p.children[0]);
236             case "Pegged.Primary":
237                 return ruleFromTree(p.children[0]);
238             case "Pegged.RhsName":
239                 return ruleFromTree(p.children[0]);
240             case "Pegged.Identifier":
241                 //writeln("Identifier: ", p.matches[0], " ");//, getDyn(p.matches[0])()(ParseTree()).name);
242                 return getDyn(p.matches[0]);
243             case "Pegged.Literal":
244                 //writeln("Literal: ", p.matches);
245                 if(p.matches.length == 3) // standard case
246                     return literal(p.matches[1]);
247                 else // only two children -> empty literal
248                     return eps();
249             case "Pegged.CharClass":
250                 if (p.children.length > 1)
251                 {
252                     Dynamic[] children;
253                     foreach(seq; p.children)
254                         children ~= ruleFromTree(seq);
255                     return distribute!(pegged.dynamic.peg.or)(children);
256                 }
257                 else // One child -> just a sequence, no need for a or!( , )
258                 {
259                     return ruleFromTree(p.children[0]);
260                 }
261                 //break;
262             case "Pegged.CharRange":
263                 /// Make the generation at the Char level: directly what is needed, be it `` or "" or whatever
264                 if (p.children.length > 1) // a-b range
265                 {
266                     return charRange(p.matches[0].front, p.matches[2].front);
267                 }
268                 else // lone char
269                 {
270                     //result = "pegged.peg.literal!(";
271                     string ch = p.matches[0];
272                     switch (ch)
273                     {
274                         case "\\[":
275                         case "\\]":
276                         case "\\-":
277                             return literal(ch[0..1]);
278                         case "\\\'":
279                             return literal("\'");
280                         case "\\`":
281                             return literal("`");
282                         case "\\":
283                         case "\\\\":
284                             return literal("\\");
285                         case "\"":
286                         case "\\\"":
287                             return literal("\"");
288                         case "\n":
289                         case "\r":
290                         case "\t":
291                             return literal(ch);
292                         default:
293                             return literal(ch);
294                     }
295                 }
296             case "Pegged.Char":
297                 string ch = p.matches[0];
298                 switch (ch)
299                 {
300                     case "\\[":
301                     case "\\]":
302                     case "\\-":
303 
304                     case "\\\'":
305                     case "\\\"":
306                     case "\\`":
307                     case "\\\\":
308                         return literal(ch[1..$]);
309                     case "\n":
310                     case "\r":
311                     case "\t":
312                         return literal(ch);
313                     default:
314                         return literal(ch);
315                 }
316                 //break;
317             case "Pegged.POS":
318                 return posLookahead(ruleFromTree(p.children[0]));
319             case "Pegged.NEG":
320                 return negLookahead(ruleFromTree(p.children[0]));
321             case "Pegged.FUSE":
322                 return fuse(ruleFromTree(p.children[0]));
323             case "Pegged.DISCARD":
324                 return discard(ruleFromTree(p.children[0]));
325             case "Pegged.KEEP":
326                 return keep(ruleFromTree(p.children[0]));
327             case "Pegged.DROP":
328                 return drop(ruleFromTree(p.children[0]));
329             case "Pegged.PROPAGATE":
330                 return propagate(ruleFromTree(p.children[0]));
331             case "Pegged.OPTION":
332                 return option(ruleFromTree(p.children[0]));
333             case "Pegged.ZEROORMORE":
334                 return zeroOrMore(ruleFromTree(p.children[0]));
335             case "Pegged.ONEORMORE":
336                 return oneOrMore(ruleFromTree(p.children[0]));
337             case "Pegged.Action":
338                 Dynamic result = ruleFromTree(p.children[0]);
339                 foreach(act; p.matches[1..$])
340                     result = action(result, getDyn(act));
341                 return result;
342             case "Pegged.ANY":
343                 return any();
344             case "Pegged.WrapAround":
345                 return wrapAround( ruleFromTree(p.children[0])
346                                         , ruleFromTree(p.children[1])
347                                         , ruleFromTree(p.children[2]));
348             default:
349                 throw new Exception("Bad tree: " ~ p.toString());
350         }
351     }
352 
353     switch(def.children[1].children[0].name)
354     {
355         case "Pegged.LEFTARROW":
356             code = ruleFromTree(def.children[2]);
357             break;
358         case "Pegged.FUSEARROW":
359             code = fuse(ruleFromTree(def.children[2]));
360             break;
361         case "Pegged.DISCARDARROW":
362             code = discard(ruleFromTree(def.children[2]));
363             break;
364         case "Pegged.KEEPARROW":
365             code = keep(ruleFromTree(def.children[2]));
366             break;
367         case "Pegged.DROPARROW":
368             code = drop(ruleFromTree(def.children[2]));
369             break;
370         case "Pegged.PROPAGATEARROW":
371             code = propagate(ruleFromTree(def.children[2]));
372             break;
373         case "Pegged.SPACEARROW":
374             ParseTree modified = spaceArrow(def.children[2]);
375             code = ruleFromTree(modified);
376             break;
377         case "Pegged.ACTIONARROW":
378             Dynamic actionResult = ruleFromTree(def.children[2]);
379             foreach(act; def.children[1].matches[1..$])
380                 actionResult = action(actionResult, getDyn(act));
381             code = actionResult;
382             break;
383         default:
384             throw new Exception("Unknow arrow: " ~ def.children[1].children[0].name);
385             //break;
386     }
387 
388     return named(code, def.matches[0]);
389 }
390 
391 DynamicGrammar grammar(string definition, Dynamic[string] context = null)
392 {
393     //writeln("Entering dyn gram");
394     ParseTree defAsParseTree = Pegged(definition);
395     //writeln(defAsParseTree);
396     if (!defAsParseTree.successful)
397     {
398         // To work around a strange bug with ParseTree printing at compile time
399         throw new Exception("Bad grammar input: " ~ defAsParseTree.toString(""));
400     }
401 
402     DynamicGrammar gram;
403     foreach(name, rule; context)
404     {
405         gram.rules[name] = rule;
406     }
407 
408     ParseTree p = defAsParseTree.children[0];
409 
410     string grammarName = p.children[0].matches[0];
411     string shortGrammarName = p.children[0].matches[0];
412     gram.grammarName = shortGrammarName;
413 
414     // Predefined spacing
415     gram.rules["Spacing"] = discard(zeroOrMore(or(literal(" "), literal("\t"), literal("\n"), literal("\r"))));
416 
417     ParseTree[] definitions = p.children[1 .. $];
418 
419     foreach(i,def; definitions)
420     {
421         gram[def.matches[0]] = fail();
422     }
423 
424     foreach(i,def; definitions)
425     {
426         string shortName = def.matches[0];
427 
428         if (i == 0)
429             gram.startingRule = shortName;
430         // prepending the global grammar name, to get a qualified-name rule 'Gram.Rule'
431         def.matches[0] = shortGrammarName ~ "." ~ def.matches[0];
432         gram.rules[shortName] = makeRule(def, gram.rules);
433     }
434 
435     return gram;
436 }
437 
438 Dynamic distribute(alias fun)(Dynamic[] args)
439 {
440     //mixin(makeSwitch(40));
441     switch(args.length)
442     {
443         case 1:
444             return fun(args[0]);
445         case 2:
446             return fun(args[0], args[1]);
447         case 3:
448             return fun(args[0], args[1], args[2]);
449         case 4:
450             return fun(args[0], args[1], args[2], args[3]);
451         case 5:
452             return fun(args[0], args[1], args[2], args[3], args[4]);
453         case 6:
454             return fun(args[0], args[1], args[2], args[3], args[4], args[5]);
455         default:
456             return fun(fun(args[0], args[1], args[2], args[3], args[4], args[5]), distribute!fun(args[6..$]));
457     }
458 }