Author: szita Date: Fri Jul 7 13:38:22 2017 New Revision: 1801161 URL: http://svn.apache.org/viewvc?rev=1801161&view=rev Log: PIG-5237: Fix DOT file parsing to enable DOT-based physical plan testing (YaShock via szita)
Modified: pig/trunk/CHANGES.txt pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java pig/trunk/test/org/apache/pig/test/utils/dotGraph/DOTParser.jjt pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotEdge.java pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraph.java pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraphReader.java pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotNode.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1801161&r1=1801160&r2=1801161&view=diff ============================================================================== --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Fri Jul 7 13:38:22 2017 @@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-5237: Fix DOT file parsing to enable DOT-based physical plan testing (YaShock via szita) + PIG-5269: MapReduceLauncher and MRJobStats imports org.python.google.common.collect.Lists instead of org.google.common.collect.Lists (nkollar via szita) PIG-4700: Enable progress reporting for Tasks in Tez (satishsaley via rohini) Modified: pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java?rev=1801161&r1=1801160&r2=1801161&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java (original) +++ pig/trunk/test/org/apache/pig/spark/TestSparkCompiler.java Fri Jul 7 13:38:22 2017 @@ -24,7 +24,6 @@ import java.io.FileOutputStream; import java.io.PrintStream; import java.util.Properties; import java.util.Random; - import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; @@ -44,13 +43,15 @@ import org.apache.pig.impl.plan.NodeIdGe import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.test.Util; import org.apache.pig.test.utils.TestHelper; +import org.apache.pig.test.utils.dotGraph.DotGraph; +import org.apache.pig.test.utils.dotGraph.DotGraphReader; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * Test cases to test the SparkCompiler. VERY IMPORTANT NOTE: The tests here @@ -74,9 +75,8 @@ public class TestSparkCompiler { public void doPrint(PrintStream ps, SparkOperPlan plan) throws VisitorException, ParserConfigurationException, TransformerException { switch (this) { case DOT: - throw new RuntimeException("Testing in DOT format not supported yet"); - //(new DotSparkPrinter(plan, ps)).dump(); - //break; + (new DotSparkPrinter(plan, ps)).dump(); + break; case XML: XMLSparkPrinter printer = new XMLSparkPrinter(ps, plan); printer.visit(); @@ -88,6 +88,19 @@ public class TestSparkCompiler { break; } } + + public boolean compare(String goldenPlan, String compiledPlan) { + switch (this) { + case DOT: + DotGraph a = DotGraphReader.load(goldenPlan); + DotGraph b = DotGraphReader.load(compiledPlan); + return a.isomorphic(b); + case XML: + case TEXT: + default: + return TestHelper.sortUDFs(Util.removeSignature(goldenPlan)).equals(TestHelper.sortUDFs(Util.removeSignature(compiledPlan))); + } + } } // If for some reason, the golden files need to be regenerated, set this to @@ -135,8 +148,7 @@ public class TestSparkCompiler { run(query, "test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-text.gld", PlanPrinter.TEXT); run(query, "test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-xml.gld", PlanPrinter.XML); - //TODO: enable this when DOT file comparison is supported - //run(query, "test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-dot.gld", PlanPrinter.DOT); + run(query, "test/org/apache/pig/test/data/GoldenFiles/spark/SPARKC-LoadStore-1-dot.gld", PlanPrinter.DOT); } private void run(String query, String expectedFile, PlanPrinter planPrinter) throws Exception { @@ -174,8 +186,8 @@ public class TestSparkCompiler { String goldenPlanClean = Util.standardizeNewline(goldenPlan).trim(); String compiledPlanClean = Util.standardizeNewline(compiledPlan).trim(); - assertEquals(TestHelper.sortUDFs(Util.removeSignature(goldenPlanClean)), - TestHelper.sortUDFs(Util.removeSignature(compiledPlanClean))); + + assertTrue(planPrinter.compare(goldenPlanClean, compiledPlanClean)); } } Modified: pig/trunk/test/org/apache/pig/test/utils/dotGraph/DOTParser.jjt URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/utils/dotGraph/DOTParser.jjt?rev=1801161&r1=1801160&r2=1801161&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/utils/dotGraph/DOTParser.jjt (original) +++ pig/trunk/test/org/apache/pig/test/utils/dotGraph/DOTParser.jjt Fri Jul 7 13:38:22 2017 @@ -25,23 +25,31 @@ options { PARSER_BEGIN(DOTParser) -package org.apache.pig.test.utils.dotGraph.parser ; +package org.apache.pig.test.utils.dotGraph.parser; -import java.util.*; -import java.io.*; -import org.apache.pig.test.utils.dotGraph.* ; +import java.util.Map; +import java.util.HashMap; + +import org.apache.pig.test.utils.dotGraph.DotGraph; +import org.apache.pig.test.utils.dotGraph.DotNode; +import org.apache.pig.test.utils.dotGraph.DotEdge; +import org.apache.pig.test.utils.dotGraph.DotGraphReader; public class DOTParser { + public static Map<String, DotNode> nodeMap = new HashMap<String, DotNode>(); static String unquote(String s) { return s.substring(1, s.length()-1); } - - static class DotState { - public Map<String,String> nodeAttributes = new HashMap<String,String>() ; - public Map<String,String> edgeAttributes = new HashMap<String,String>() ; - } + public static DotNode getNodeByName(String name) { + DotNode node = nodeMap.get(name); + if (node == null) { + node = new DotNode(name); + nodeMap.put(name, node); + } + return node; + } } PARSER_END(DOTParser) @@ -96,95 +104,132 @@ TOKEN: | <NODE: "node"> | <GRAPH: "graph"> | <DIGRAPH : "digraph"> + | <SUBGRAPH : "subgraph"> | <#LETTER : ["a"-"z", "A"-"Z"] > | <#DIGIT : ["0"-"9"] > | <#SPECIAL_CHAR : "_" | "$" > | <NAME : <LETTER> ( <LETTER> | <DIGIT> | <SPECIAL_CHAR> )* > + | <NUMBER : ( <DIGIT> )+ > | <QUOTEDSTRING : "\"" (~["\""])* "\""> } - DotGraph Parse() : { DotGraph dotGraph = null ; - DotState dotState = new DotState() ; Token graphName ; } { ( <DIGRAPH> - graphName = <NAME> { dotGraph = new DotGraph(graphName.image) ; } + (graphName = <NAME> { dotGraph = new DotGraph(graphName.image); dotGraph.topLevel = true; })? <LPAREN> - - ( LOOKAHEAD(2) - EdgeStatement(dotGraph, dotState) - | NodeStatement(dotGraph, dotState) - | AttributeStatement(dotGraph, dotState) - )+ - + StatementList(dotGraph) <RPAREN> ) { return dotGraph ; } } -void AttributeStatement(DotGraph dotGraph, DotState dotState) : +void StatementList(DotGraph dotGraph) : +{ + ; +} +{ + ( + Statement(dotGraph) + )+ +} + +void Statement(DotGraph dotGraph) : +{ + String[] attr; + DotGraph subGraph; +} +{ + ( + LOOKAHEAD(2) EdgeStatement(dotGraph) + | AttributeStatement(dotGraph) + | LOOKAHEAD(2) ( attr = Attribute() ) { dotGraph.attributes.put(attr[0], attr[1]); } + | LOOKAHEAD(2) NodeStatement(dotGraph) + | subGraph = SubGraph() { dotGraph.nodes.add(subGraph); } + ) + ( <SEMICOLON> )? +} + +DotGraph SubGraph() : +{ + DotGraph dotGraph = null ; + Token graphName ; +} +{ + <SUBGRAPH> + graphName = <NAME> { dotGraph = new DotGraph(graphName.image); } + <LPAREN> + StatementList(dotGraph) + <RPAREN> + { return dotGraph; } +} + +void AttributeStatement(DotGraph dotGraph) : { Map<String,String> attributes ; } { ( - ( <EDGE> attributes = AttributeList() { dotState.edgeAttributes = attributes ; } ) - | ( <NODE> attributes = AttributeList() { dotState.nodeAttributes = attributes ; } ) + ( <EDGE> attributes = AttributeList() { dotGraph.edgeAttributes = attributes; } ) + | ( <NODE> attributes = AttributeList() { dotGraph.nodeAttributes = attributes; } ) | ( <GRAPH> attributes = AttributeList() { dotGraph.attributes = attributes ; } ) ) - <SEMICOLON> } -void NodeStatement(DotGraph dotGraph, DotState dotState) : +void NodeStatement(DotGraph dotGraph) : { - Token nodeName ; - DotNode node = new DotNode() ; + String nodeName ; + DotNode node ; Map<String,String> attributes ; } { - nodeName = <NAME> { node.name = nodeName.image ; } + ( nodeName = NodeName() ) { node = getNodeByName(nodeName); } ( attributes = AttributeList() { node.attributes = new HashMap<String,String>() ; - if (dotState != null) { - node.attributes.putAll(dotState.nodeAttributes) ; - } node.attributes.putAll(attributes) ; } )? - <SEMICOLON> { dotGraph.nodes.add(node) ; } } -void EdgeStatement(DotGraph dotGraph, DotState dotState) : +void EdgeStatement(DotGraph dotGraph) : { - Token nodeName1 ; - Token nodeName2 ; - String startingNode ; - DotNode node = new DotNode() ; + String nodeName1 ; + String nodeName2 ; + DotNode startingNode ; Map<String,String> attributes ; + DotEdge edge = new DotEdge() ; } { - nodeName1 = <NAME> { startingNode = nodeName1.image ; } + nodeName1 = NodeName() { startingNode = getNodeByName(nodeName1) ; } ( <DIRECTED_EDGE> - nodeName2 = <NAME> + nodeName2 = NodeName() { - DotEdge edge = new DotEdge() ; edge.fromNode = startingNode ; - edge.toNode = nodeName2.image ; + DotNode node2 = getNodeByName(nodeName2) ; + edge.toNode = node2 ; + dotGraph.nodes.add(startingNode); + dotGraph.nodes.add(node2); dotGraph.edges.add(edge) ; - startingNode = nodeName2.image ; + if (startingNode != node2) + startingNode.edgeTo.add(node2); + + startingNode = node2 ; } )+ - <SEMICOLON> - + ( attributes = AttributeList() { + edge.attributes = new HashMap<String,String>() ; + edge.attributes.putAll(attributes) ; + } + )? } Map<String,String> AttributeList() : @@ -208,14 +253,39 @@ Map<String,String> AttributeList() : String[] Attribute() : { Token attName ; - Token value ; + String value ; String[] keyValuePair = new String[2] ; } { ( attName = <NAME> { keyValuePair[0] = attName.image ; } <EQUAL> - value = <QUOTEDSTRING> { keyValuePair[1] = unquote(value.image) ; } + value = Value() { keyValuePair[1] = value; } ) { return keyValuePair ; } } + +String Value() : +{ + Token value; +} +{ + ( + value = <QUOTEDSTRING> + | value = <NAME> + | value = <NUMBER> + ) + { return value.image; } +} + +String NodeName() : +{ + Token name ; +} +{ + ( + name = <NAME> + | name = <NUMBER> + ) + { return name.image; } +} Modified: pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotEdge.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotEdge.java?rev=1801161&r1=1801160&r2=1801161&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotEdge.java (original) +++ pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotEdge.java Fri Jul 7 13:38:22 2017 @@ -18,11 +18,46 @@ package org.apache.pig.test.utils.dotGraph; +import java.util.HashMap; +import java.util.Map; + /** * This represents an edge in DOT format. * An edge in DOT can have attributes but we're not interested */ public class DotEdge { - public String fromNode ; - public String toNode ; -} + public DotNode fromNode; + public DotNode toNode; + public Map<String, String> attributes = new HashMap<String, String>(); + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(fromNode.name + " -> " + toNode.name); + if (attributes.size() > 0) { + int index = 0; + sb.append(" ["); + for (Map.Entry<String, String> attr : attributes.entrySet()) { + sb.append(attr.getKey() + "=" + attr.getValue()); + if (index < attributes.size() - 1) + sb.append(", "); + index++; + } + sb.append("]"); + } + return sb.toString(); + } + + @Override + public boolean equals(Object other) { + if (other instanceof DotEdge) { + DotEdge edge = (DotEdge) other; + return fromNode.equals(edge.fromNode) && toNode.equals(edge.toNode); + } + return false; + } + + @Override + public int hashCode() { + return fromNode.hashCode() * toNode.hashCode(); + } +} \ No newline at end of file Modified: pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraph.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraph.java?rev=1801161&r1=1801160&r2=1801161&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraph.java (original) +++ pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraph.java Fri Jul 7 13:38:22 2017 @@ -18,24 +18,137 @@ package org.apache.pig.test.utils.dotGraph; -import java.util.List; import java.util.ArrayList; -import java.util.Map; +import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; /*** * This represents graph structure in DOT format */ -public class DotGraph { +public class DotGraph extends DotNode { + public boolean topLevel = false; + public Set<DotEdge> edges = new HashSet<DotEdge>(); + public Set<DotNode> nodes = new HashSet<DotNode>(); + public Map<String, String> edgeAttributes = new HashMap<String, String>(); + public Map<String, String> nodeAttributes = new HashMap<String, String>(); - public String name; - public List<DotEdge> edges = new ArrayList<DotEdge>() ; - public List<DotNode> nodes = new ArrayList<DotNode>() ; - public Map<String, String> attributes = new HashMap<String,String>() ; + public DotGraph(String name) { + super(name); + } + @Override + public String toString() { + String graphType = topLevel ? "digraph " : "subgraph "; + StringBuilder sb = new StringBuilder(graphType); + sb.append(name); + sb.append("{\n"); + + for (Map.Entry<String, String> attr : attributes.entrySet()) + sb.append(attr.getKey() + "=" + attr.getValue() + ";\n"); + + if (nodeAttributes.size() > 0) { + int index = 0; + sb.append("node ["); + for (Map.Entry<String, String> attr : nodeAttributes.entrySet()) { + sb.append(attr.getKey() + "=" + attr.getValue()); + if (index < nodeAttributes.size() - 1) + sb.append(", "); + index++; + } + sb.append("];\n"); + } + + if (edgeAttributes.size() > 0) { + int index = 0; + sb.append("edge ["); + for (Map.Entry<String, String> attr : edgeAttributes.entrySet()) { + sb.append(attr.getKey() + "=" + attr.getValue()); + if (index < edgeAttributes.size() - 1) + sb.append(", "); + index++; + } + sb.append("];\n"); + } - public DotGraph(String name) { - this.name = name ; + for (DotNode node : nodes) + sb.append(node.toString() + ";\n"); + + for (DotEdge edge : edges) + sb.append(edge.toString() + ";\n"); + + sb.append("}"); + return sb.toString(); + } + + @Override + public boolean equals(Object other) { + if (other != null && other instanceof DotGraph) { + DotGraph graph = (DotGraph) other; + return graph.getLabel().equals(getLabel()) && edges.equals(graph.edges) && nodes.equals(graph.nodes); + } + return false; + } + + @Override + public int hashCode() { + return getLabel().hashCode() * edgeAttributes.hashCode() * nodeAttributes.hashCode(); + } + + private Set<DotNode> getRootNodes() { + Set<DotNode> roots = new HashSet<DotNode>(nodes); + + for (DotEdge edge : edges) + roots.remove(edge.toNode); + + return roots; } -} + public boolean isomorphic(DotNode other) { + if (other instanceof DotGraph) { + DotGraph graph = (DotGraph) other; + return graph.getLabel().equals(getLabel()) && graph.getCanonicalName().equals(getCanonicalName()); + } + return false; + } + + @Override + public String getCanonicalName() { + StringBuilder sb = new StringBuilder(""); + ArrayList<String> children = new ArrayList<>(); + + Set<DotNode> roots = getRootNodes(); + + for (DotNode root : roots) + children.add(root.getCanonicalName()); + + Collections.sort(children); + for (String nodeName : children) + sb.append(nodeName); + + sb.insert(0, '#'); + sb.insert(sb.length(), '#'); + return sb.toString(); + } + + @Override + public String getLabel() { + StringBuilder label = new StringBuilder(super.getLabel()); + ArrayList<String> children = new ArrayList<>(); + + Set<DotNode> roots = getRootNodes(); + + for (DotNode root : roots) + children.add(root.getLabel()); + + Collections.sort(children); + for (String nodeLabel : children) + label.append(nodeLabel); + + label.insert(0, '\"'); + label.insert(label.length(), '\"'); + return label.toString(); + } +} \ No newline at end of file Modified: pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraphReader.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraphReader.java?rev=1801161&r1=1801160&r2=1801161&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraphReader.java (original) +++ pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotGraphReader.java Fri Jul 7 13:38:22 2017 @@ -18,11 +18,15 @@ package org.apache.pig.test.utils.dotGraph; +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; + import org.apache.pig.test.utils.dotGraph.parser.DOTParser; import org.apache.pig.test.utils.dotGraph.parser.ParseException; -import java.io.*; - /*** * This class is responsible for loading textual Dot graph * into object representation. @@ -36,19 +40,18 @@ public class DotGraphReader { * @return graph */ - public DotGraph load(String dotContent) { + static public DotGraph load(String dotContent) { ByteArrayInputStream stream - = new ByteArrayInputStream(dotContent.getBytes()) ; - DOTParser dotParser = new DOTParser(stream) ; - DotGraph graph = null ; + = new ByteArrayInputStream(dotContent.getBytes()); + DOTParser dotParser = new DOTParser(stream); + DotGraph graph = null; try { - graph = dotParser.Parse() ; - } - catch (ParseException pe) { - System.out.println(pe.getMessage()) ; - throw new RuntimeException("Bad Dot file") ; + graph = dotParser.Parse(); + } catch (ParseException pe) { + System.out.println(pe.getMessage()); + throw new RuntimeException("Bad Dot file"); } - return graph ; + return graph; } /*** @@ -57,24 +60,22 @@ public class DotGraphReader { * @return graph */ - public DotGraph loadFromFile(String file) { - StringBuilder sb = new StringBuilder() ; - BufferedReader br = null ; + static public DotGraph loadFromFile(String file) { + StringBuilder sb = new StringBuilder(); + BufferedReader br = null; try { - br = new BufferedReader(new FileReader(file)) ; - String str ; - while((str=br.readLine())!=null) { - sb.append(str) ; - sb.append("\n") ; + br = new BufferedReader(new FileReader(file)); + String str; + while ((str = br.readLine()) != null) { + sb.append(str); + sb.append("\n"); } - } - catch (FileNotFoundException fnfe) { - throw new RuntimeException("file:" + file + " not found!") ; - } - catch (IOException ioe) { - throw new RuntimeException("Error while reading from:" + file) ; + } catch (FileNotFoundException fnfe) { + throw new RuntimeException("file:" + file + " not found!"); + } catch (IOException ioe) { + throw new RuntimeException("Error while reading from:" + file); } - return load(sb.toString()) ; + return load(sb.toString()); } } Modified: pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotNode.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotNode.java?rev=1801161&r1=1801160&r2=1801161&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotNode.java (original) +++ pig/trunk/test/org/apache/pig/test/utils/dotGraph/DotNode.java Fri Jul 7 13:38:22 2017 @@ -18,15 +18,89 @@ package org.apache.pig.test.utils.dotGraph; -import java.util.Map; +import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; /*** * This represents a node in DOT format */ public class DotNode { + public String name; + public Map<String, String> attributes = new HashMap<String, String>(); + public Set<DotNode> edgeTo = new HashSet<DotNode>(); + + public DotNode(String name) { + this.name = name; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(name); + if (attributes.size() > 0) { + int index = 0; + sb.append(" ["); + for (Map.Entry<String, String> attr : attributes.entrySet()) { + sb.append(attr.getKey() + "=" + attr.getValue()); + if (index < attributes.size() - 1) + sb.append(", "); + index++; + } + sb.append("]"); + } + return sb.toString(); + } + + public String getLabel() { + String label = ""; + for (Map.Entry<String, String> attr : attributes.entrySet()) { + if (attr.getKey().equals("label")) { + label = attr.getValue(); + break; + } + } + return label; + } + + public boolean isInvisStyle() { + for (Map.Entry<String, String> attr : attributes.entrySet()) + if (attr.getKey().equals("style") && attr.getValue().equals("invis")) + return true; + + return false; + } + + @Override + public boolean equals(Object other) { + if (other instanceof DotNode && !(other instanceof DotGraph)) { + DotNode node = (DotNode) other; + return name.equals(node.name); + } + return false; + } + + @Override + public int hashCode() { + return name.hashCode(); + } + + public String getCanonicalName() { + StringBuilder sb = new StringBuilder(""); + ArrayList<String> canonicalNames = new ArrayList<>(); + + for (DotNode node : edgeTo) + if (!node.isInvisStyle()) + canonicalNames.add(node.getCanonicalName()); - public String name ; - public Map<String, String> attributes = new HashMap<String,String>() ; + Collections.sort(canonicalNames); + for (String nodeName : canonicalNames) + sb.append(nodeName); -} + sb.insert(0, '0'); + sb.insert(sb.length(), '1'); + return sb.toString(); + } +} \ No newline at end of file