Updated Branches: refs/heads/master 9229caa45 -> b4e471ae1
DRILL-4 - First version of plan parser Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/b4e471ae Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/b4e471ae Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/b4e471ae Branch: refs/heads/master Commit: b4e471ae1115d0625b7db94d3154da3b61f91a6f Parents: 9229caa Author: tdunning <[email protected]> Authored: Sun Oct 14 15:14:00 2012 -0700 Committer: tdunning <[email protected]> Committed: Sun Oct 14 22:03:51 2012 -0700 ---------------------------------------------------------------------- sandbox/plan-parser/pom.xml | 52 +++++++ .../main/antlr3/org/apache/drill/plan/ast/Plan.g | 59 ++++++++ .../main/java/org/apache/drill/plan/ParsePlan.java | 106 ++++++++++++++ .../main/java/org/apache/drill/plan/ast/Arg.java | 64 +++++++++ .../drill/plan/ast/LogicalPlanParseException.java | 11 ++ .../main/java/org/apache/drill/plan/ast/Op.java | 35 +++++ .../main/java/org/apache/drill/plan/ast/Plan.java | 29 ++++ .../java/org/apache/drill/plan/ParsePlanTest.java | 107 +++++++++++++++ .../plan-parser/src/test/resources/plan1.drillx | 3 + .../plan-parser/src/test/resources/plan2.drillx | 9 ++ .../plan-parser/src/test/resources/plan3.drillx | 11 ++ .../plan-parser/src/test/resources/plan4.drillx | 13 ++ 12 files changed, 499 insertions(+), 0 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/b4e471ae/sandbox/plan-parser/pom.xml ---------------------------------------------------------------------- diff --git a/sandbox/plan-parser/pom.xml b/sandbox/plan-parser/pom.xml new file mode 100644 index 0000000..f5ccb00 --- /dev/null +++ b/sandbox/plan-parser/pom.xml @@ -0,0 +1,52 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <groupId>plan-parser</groupId> + <artifactId>plan-parser</artifactId> + <version>0.1</version> + <dependencies> + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <version>10.0.1</version> + </dependency> + <dependency> + <groupId>org.antlr</groupId> + <artifactId>antlr</artifactId> + <version>3.4</version> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>4.8.2</version> + </dependency> + + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.antlr</groupId> + <artifactId>antlr3-maven-plugin</artifactId> + <version>3.4</version> + + <executions> + <execution> + <id>antlr-generate</id> + <!-- this is used for inheritance merges --> + <phase>generate-sources</phase> + <!-- bind to the packaging phase --> + <goals> + <goal>antlr</goal> + </goals> + </execution> + </executions> + + </plugin> + </plugins> + </build> + +</project> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/b4e471ae/sandbox/plan-parser/src/main/antlr3/org/apache/drill/plan/ast/Plan.g ---------------------------------------------------------------------- diff --git a/sandbox/plan-parser/src/main/antlr3/org/apache/drill/plan/ast/Plan.g b/sandbox/plan-parser/src/main/antlr3/org/apache/drill/plan/ast/Plan.g new file mode 100644 index 0000000..69a9ffb --- /dev/null +++ b/sandbox/plan-parser/src/main/antlr3/org/apache/drill/plan/ast/Plan.g @@ -0,0 +1,59 @@ +grammar Plan; + +options { + output = AST; +} + +@header { +package org.apache.drill.plan.ast; +import com.google.common.collect.Lists; +} + +@lexer::header { +package org.apache.drill.plan.ast; +} + +@members { + public void reportError(RecognitionException e) { + throw new LogicalPlanParseException("Syntax error in schema: ", e); + } +} + +plan returns [Plan r]: s=statements EOF {$r = $s.r;}; + +statements returns [Plan r]: + s1 = statement {$r = Plan.create($s1.r);} + ( s2 = statement {$r = $r.add($s2.r);} )* + ; + +statement returns [Op r]: + to=targets GETS o=OP from=args LINE_ENDING {$r = Op.create($o.text, $from.r, $to.r);} + | LINE_ENDING {$r = null; }; + +targets returns [List<Arg> r]: + a = symbol {$r = Lists.newArrayList($a.r);} ( COMMA b = symbol {$r.add($b.r);} )* ; + +symbol returns [Arg r]: + s = SYMBOL { $r = Arg.createSymbol($s.text); }; + +args returns [List<Arg> r]: a = arg {$r = Lists.newArrayList($a.r);} ( COMMA b = arg {$r.add($b.r);} )* ; + +arg returns [Arg r]: + s = STRING {$r = Arg.createString($s.text);} + | n = NUMBER {$r = Arg.createNumber($n.text);} + | b = BOOLEAN {$r = Arg.createBoolean($b.text);} + | s = SYMBOL {$r = Arg.createSymbol($s.text);} + ; + + +STRING: ('"'|'�') ( ~('"' | '\\') | '\\' .)* ('"'|'�') ; +GETS: ':=' ; +BOOLEAN: 'true'|'false'; +SYMBOL: '%' ('0'..'9')+; +OP: ('a'..'z'|'A'..'Z') ('a'..'z'|'A'..'Z'|'-')* + | '>' | '<' | '>=' | '<=' | '+' | '-' | '*' | '/'; +COMMA: ',' ; +NUMBER: ('0'..'9')+ ; +LINE_ENDING: '\r'? '\n'; +COMMENT: '#' (~'\n')* {$channel=HIDDEN;} ; +WHITESPACE : ( '\t' | ' ' )+ { $channel = HIDDEN; } ; http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/b4e471ae/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ParsePlan.java ---------------------------------------------------------------------- diff --git a/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ParsePlan.java b/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ParsePlan.java new file mode 100644 index 0000000..781bc62 --- /dev/null +++ b/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ParsePlan.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.plan; + +import com.google.common.base.Charsets; +import com.google.common.collect.HashMultiset; +import com.google.common.collect.Multiset; +import com.google.common.io.Files; +import com.google.common.io.InputSupplier; +import com.google.common.io.Resources; +import org.antlr.runtime.ANTLRReaderStream; +import org.antlr.runtime.CommonTokenStream; +import org.antlr.runtime.RecognitionException; +import org.apache.drill.plan.ast.*; + +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Formatter; + +/** + * Parses a plan from a resource or file. + * + * The result is validated to ensure that symbols mentioned on the left-hand side of assignments are only mentioned + * once and all referenced symbols on the right hand side are defined somewhere. + */ +public class ParsePlan { + public static Plan parseResource(File file) throws IOException, RecognitionException, ValidationException { + return ParsePlan.parse(Files.newReaderSupplier(file, Charsets.UTF_8)); + } + + public static Plan parseResource(String resourceName) throws IOException, RecognitionException, ValidationException { + return ParsePlan.parse(Resources.newReaderSupplier(Resources.getResource(resourceName), Charsets.UTF_8)); + } + + public static Plan parse(InputSupplier<InputStreamReader> in) throws IOException, RecognitionException, ValidationException { + InputStreamReader inStream = in.getInput(); + PlanLexer lex = new PlanLexer(new ANTLRReaderStream(inStream)); + PlanParser r = new PlanParser(new CommonTokenStream(lex)); + inStream.close(); + + Plan plan = r.plan().r; + validate(plan); + return plan; + } + + private static void validate(Plan r) throws ValidationException { + int errors = 0; + Formatter errorMessages = new Formatter(); + + // make sure that each output is assigned only once + Multiset<Integer> counts = HashMultiset.create(); + int line = 1; + for (Op op : r.getStatements()) { + for (Arg assignment : op.getOutputs()) { + int slot = ((Arg.Symbol) assignment).getSlot(); + counts.add(slot); + if (counts.count(slot) != 1) { + errorMessages.format("Output symbol %%%d used more than once in statement %d\n", slot, line); + errors++; + } + } + line++; + } + + // make sure that each input is defined at least once + line = 1; + for (Op op : r.getStatements()) { + for (Arg reference : op.getInputs()) { + if (reference instanceof Arg.Symbol) { + int slot = ((Arg.Symbol) reference).getSlot(); + if (counts.count(slot) <= 0) { + errorMessages.format("Undefined reference to %%%d in statement %d\n", slot, line); + errors++; + } + } + } + line++; + } + + if (errors > 0) { + throw new ValidationException(errorMessages.toString()); + } + } + + public static class ValidationException extends Exception { + public ValidationException(String s) { + super(s); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/b4e471ae/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/Arg.java ---------------------------------------------------------------------- diff --git a/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/Arg.java b/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/Arg.java new file mode 100644 index 0000000..1890bfd --- /dev/null +++ b/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/Arg.java @@ -0,0 +1,64 @@ +package org.apache.drill.plan.ast; + +import com.google.common.base.CharMatcher; + +/** + * Created with IntelliJ IDEA. User: tdunning Date: 10/12/12 Time: 11:40 PM To change this template + * use File | Settings | File Templates. + */ +public class Arg { + private static final CharMatcher quotes = CharMatcher.is('"'); + private static final CharMatcher percent = CharMatcher.is('%'); + + public static Arg createString(String s) { + return new QuotedString(s); + } + + public static Arg createNumber(String n) { + return new Number(Double.parseDouble(n)); + } + + public static Arg createSymbol(String s) { + return new Symbol(Integer.parseInt(percent.trimLeadingFrom(s))); + } + + public static Arg createBoolean(String b) { + return new BooleanConstant(Boolean.parseBoolean(b)); + } + + public static class QuotedString extends Arg { + private String s; + + public QuotedString(String s) { + this.s = quotes.trimFrom(quotes.trimFrom(s)); + } + } + + public static class BooleanConstant extends Arg { + private boolean v; + + public BooleanConstant(boolean b) { + v = b; + } + } + + public static class Number extends Arg { + private double value; + + public Number(double v) { + value = v; + } + } + + public static class Symbol extends Arg { + private int slot; + + public Symbol(int slot) { + this.slot = slot; + } + + public int getSlot() { + return slot; + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/b4e471ae/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/LogicalPlanParseException.java ---------------------------------------------------------------------- diff --git a/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/LogicalPlanParseException.java b/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/LogicalPlanParseException.java new file mode 100644 index 0000000..5e8a547 --- /dev/null +++ b/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/LogicalPlanParseException.java @@ -0,0 +1,11 @@ +package org.apache.drill.plan.ast; + +/** + * Created with IntelliJ IDEA. User: tdunning Date: 10/12/12 Time: 11:25 PM To change this template + * use File | Settings | File Templates. + */ +public class LogicalPlanParseException extends RuntimeException { + public LogicalPlanParseException(String msg, Throwable cause) { + super(msg, cause); + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/b4e471ae/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/Op.java ---------------------------------------------------------------------- diff --git a/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/Op.java b/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/Op.java new file mode 100644 index 0000000..e64414f --- /dev/null +++ b/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/Op.java @@ -0,0 +1,35 @@ +package org.apache.drill.plan.ast; + +import java.util.List; + +/** +* Created with IntelliJ IDEA. User: tdunning Date: 10/12/12 Time: 11:23 PM To change this template +* use File | Settings | File Templates. +*/ +public class Op { + private String op; + + private List<Arg> inputs; + private List<Arg> outputs; + + + public static Op create(String op, List<Arg> inputs, List<Arg> outputs) { + Op r = new Op(); + r.op = op; + r.inputs = inputs; + r.outputs = outputs; + return r; + } + + public List<Arg> getInputs() { + return inputs; + } + + public String getOp() { + return op; + } + + public List<Arg> getOutputs() { + return outputs; + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/b4e471ae/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/Plan.java ---------------------------------------------------------------------- diff --git a/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/Plan.java b/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/Plan.java new file mode 100644 index 0000000..a0425a9 --- /dev/null +++ b/sandbox/plan-parser/src/main/java/org/apache/drill/plan/ast/Plan.java @@ -0,0 +1,29 @@ +package org.apache.drill.plan.ast; + +import com.google.common.collect.Lists; + +import java.util.List; + +/** + * Created with IntelliJ IDEA. User: tdunning Date: 10/12/12 Time: 7:41 PM To change this template + * use File | Settings | File Templates. + */ +public class Plan { + private List<Op> statements = Lists.newArrayList(); + + public static Plan create(Op first) { + Plan r = new Plan(); + return r.add(first); + } + + public Plan add(Op next) { + if (next != null) { + statements.add(next); + } + return this; + } + + public List<Op> getStatements() { + return statements; + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/b4e471ae/sandbox/plan-parser/src/test/java/org/apache/drill/plan/ParsePlanTest.java ---------------------------------------------------------------------- diff --git a/sandbox/plan-parser/src/test/java/org/apache/drill/plan/ParsePlanTest.java b/sandbox/plan-parser/src/test/java/org/apache/drill/plan/ParsePlanTest.java new file mode 100644 index 0000000..e90e8e4 --- /dev/null +++ b/sandbox/plan-parser/src/test/java/org/apache/drill/plan/ParsePlanTest.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.plan; + +import com.google.common.base.Charsets; +import com.google.common.collect.Lists; +import com.google.common.io.Resources; +import org.antlr.runtime.ANTLRReaderStream; +import org.antlr.runtime.RecognitionException; +import org.antlr.runtime.Token; +import org.apache.drill.plan.ast.Plan; +import org.apache.drill.plan.ast.PlanLexer; +import org.junit.Test; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Iterator; +import java.util.List; + +import static junit.framework.Assert.assertEquals; + +public class ParsePlanTest { + @Test + public void testParse1() throws IOException, RecognitionException, ParsePlan.ValidationException { + Plan r = ParsePlan.parseResource("plan1.drillx"); + assertEquals("Lines", 3, r.getStatements().size()); + } + + @Test + public void testParse2() throws IOException, RecognitionException, ParsePlan.ValidationException { + Plan r = ParsePlan.parseResource("plan2.drillx"); + assertEquals("Lines", 6, r.getStatements().size()); + } + + @Test + public void testParse3() throws IOException, RecognitionException, ParsePlan.ValidationException { + Plan r = ParsePlan.parseResource("plan3.drillx"); + assertEquals("Lines", 8, r.getStatements().size()); + } + + @Test + public void testLexer() throws IOException { + List<String> ref = Lists.newArrayList( + "%3", ",", "%4", ":=", "explode", "\"data\"", ",", "\"var-to-explode\"", "\n", + "%5", ":=", "modify", "%4", "\n", + "%6", ",", "%7", ":=", "flatten", "%3", ",", "%5", "\n"); + + InputStreamReader inStream = Resources.newReaderSupplier(Resources.getResource("plan1.drillx"), Charsets.UTF_8).getInput(); + PlanLexer lex = new PlanLexer(new ANTLRReaderStream(inStream)); + Token t = lex.nextToken(); + Iterator<String> i = ref.iterator(); + while (t != null && t.getType() != -1) { + if (t.getChannel() != 99) { + assertEquals(i.next(), t.getText()); + } + t = lex.nextToken(); + } + inStream.close(); + + } + + @Test + public void testLexer2() throws IOException { + List<String> ref = Lists.newArrayList( + "%1", ":=", "scan-json", "\"table-1\"", "EOL", + "EOL", + "%2", ":=", "bind", "\"x\"", ",", "%1", "EOL", + "EOL", + "EOL", + "%3", ":=", "bind", "\"y\"", ",", "%2", "EOL", + "%4", ":=", ">", "%2", ",", "3", "EOL", + "%5", ":=", "filter", "%4", ",", "%1", "EOL", + "%6", ":=", "project", "%5", ",", "%2", ",", "%3", "EOL"); + + InputStreamReader inStream = Resources.newReaderSupplier(Resources.getResource("plan2.drillx"), Charsets.UTF_8).getInput(); + PlanLexer lex = new PlanLexer(new ANTLRReaderStream(inStream)); + Token t = lex.nextToken(); + Iterator<String> i = ref.iterator(); + while (t != null && t.getType() != -1) { + if (t.getChannel() != 99) { + String tokenText = t.getText(); + if (t.getText().equals("\n")) { + tokenText = "EOL"; + } + assertEquals(i.next(), tokenText); + } + t = lex.nextToken(); + } + inStream.close(); + + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/b4e471ae/sandbox/plan-parser/src/test/resources/plan1.drillx ---------------------------------------------------------------------- diff --git a/sandbox/plan-parser/src/test/resources/plan1.drillx b/sandbox/plan-parser/src/test/resources/plan1.drillx new file mode 100644 index 0000000..2b35353 --- /dev/null +++ b/sandbox/plan-parser/src/test/resources/plan1.drillx @@ -0,0 +1,3 @@ +%3, %4 := explode "data", "var-to-explode" +%5 := modify %4 +%6, %7 := flatten %3, %5 http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/b4e471ae/sandbox/plan-parser/src/test/resources/plan2.drillx ---------------------------------------------------------------------- diff --git a/sandbox/plan-parser/src/test/resources/plan2.drillx b/sandbox/plan-parser/src/test/resources/plan2.drillx new file mode 100644 index 0000000..49e7b5a --- /dev/null +++ b/sandbox/plan-parser/src/test/resources/plan2.drillx @@ -0,0 +1,9 @@ +%1 := scan-json "table-1" # from table-1 + +%2 := bind "x", %1 # variable x + +# full-line comment +%3 := bind "y", %2 # and y +%4 := > %2, 3 # where x > 3 +%5 := filter %4, %1 # apply the filter +%6 := project %5, %2, %3 # select columns for final output http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/b4e471ae/sandbox/plan-parser/src/test/resources/plan3.drillx ---------------------------------------------------------------------- diff --git a/sandbox/plan-parser/src/test/resources/plan3.drillx b/sandbox/plan-parser/src/test/resources/plan3.drillx new file mode 100644 index 0000000..7f3ae43 --- /dev/null +++ b/sandbox/plan-parser/src/test/resources/plan3.drillx @@ -0,0 +1,11 @@ +#comment taking up a full line + +# and then a blank line +%1 := scan-json "table-1" # from table-1 +%2 := bind "a.x", %1 # we will sum a.x +%3 := > %2, 3 # where x > 3 +%4 := filter %3, %1 # apply the filter +%5 := aggregate "sum", %3, %2, %4 +%6 := as %5, "total", %4 # rename total +%7 := bind "total", %6 # set up final select +%9 := project %4, %6 http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/b4e471ae/sandbox/plan-parser/src/test/resources/plan4.drillx ---------------------------------------------------------------------- diff --git a/sandbox/plan-parser/src/test/resources/plan4.drillx b/sandbox/plan-parser/src/test/resources/plan4.drillx new file mode 100644 index 0000000..b78e2fc --- /dev/null +++ b/sandbox/plan-parser/src/test/resources/plan4.drillx @@ -0,0 +1,13 @@ +%1 := scan-json �table-1� +%2 := bind �y�, %1 +%3 := bind �z�, %1 +%4,%5 := group %1, %2, %3 # group by y, z, group ref in %5 +%6 := explode %4, %5 # aggregate on group value +%7 := bind �x�, %6 # sum(x), note where ref is bound +%8 := aggregate �sum�, %6, %7 +%9,%10:= flatten %4, %8 # splice result back into data +%11 := as %10, �s�, %9 # name the aggregated result +%12 := bind �s�, %11 +%13 := bind �y�, %11 +%14 := bind �z�, %11 +%14 := project %11, %12, %13, %14 # select s,y,z
