Author: hashutosh Date: Sun Feb 7 18:45:36 2010 New Revision: 907463 URL: http://svn.apache.org/viewvc?rev=907463&view=rev Log: PIG-1046: join algorithm specification is within double quotes
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=907463&r1=907462&r2=907463&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Sun Feb 7 18:45:36 2010 @@ -24,6 +24,8 @@ IMPROVEMENTS +PIG-1046: join algorithm specification is within double quotes (ashutoshc) + PIG-1209: Port POJoinPackage to proactively spill (ashutoshc) PIG-1190: Handling of quoted strings in pig-latin/grunt commands (ashutoshc) Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=907463&r1=907462&r2=907463&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Sun Feb 7 18:45:36 2010 @@ -320,6 +320,31 @@ return cogroup; } + private LogicalOperator parseUsingForGroupBy(String modifier, ArrayList<CogroupInput> gis, LogicalPlan lp) throws ParseException, PlanException{ + + if(modifier.equalsIgnoreCase("collected")){ + if (gis.size() != 1) { + throw new ParseException("Collected group is only supported for single input"); + } + if (!isColumnProjectionsOrStar(gis.get(0))) { + throw new ParseException("Collected group is only supported for columns or star projection"); + } + LogicalOperator cogroup = parseCogroup(gis, lp, LOCogroup.GROUPTYPE.COLLECTED); + cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE); + return cogroup; + } + + else if (modifier.equalsIgnoreCase("regular")){ + LogicalOperator cogroup = parseCogroup(gis, lp, LOCogroup.GROUPTYPE.REGULAR); + cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE); + return cogroup; + } + + else{ + throw new ParseException("Only COLLECTED or REGULAR are valid GROUP modifiers."); + } + } + /** * Join parser. */ @@ -435,7 +460,42 @@ return foreach; } - void assertAtomic(LogicalOperator spec, boolean desiredAtomic) throws ParseException{ + private LogicalOperator parseUsingForJoin(String modifier, ArrayList<CogroupInput> gis, + LogicalPlan lp, boolean isFullOuter, boolean isRightOuter, boolean isOuter) throws + ParseException, PlanException{ + + if (modifier.equalsIgnoreCase("repl") || modifier.equalsIgnoreCase("replicated")) { + if(isFullOuter || isRightOuter) { + throw new ParseException("Replicated join does not support (right|full) outer joins"); + } + LogicalOperator joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.REPLICATED); + joinOp.pinOption(LOJoin.OPTION_JOIN); + return joinOp; + } + else if (modifier.equalsIgnoreCase("hash") || modifier.equalsIgnoreCase("default")) { + LogicalOperator joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.HASH); + joinOp.pinOption(LOJoin.OPTION_JOIN); + return joinOp; + } + else if (modifier.equalsIgnoreCase("skewed")) { + LogicalOperator joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.SKEWED); + joinOp.pinOption(LOJoin.OPTION_JOIN); + return joinOp; + } + else if (modifier.equalsIgnoreCase("merge")) { + if(isOuter) { + throw new ParseException("Merge join does not support (left|right|full) outer joins"); + } + LogicalOperator joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.MERGE); + joinOp.pinOption(LOJoin.OPTION_JOIN); + return joinOp; + } + else{ + throw new ParseException("Only REPL, REPLICATED, HASH, SKEWED and MERGE are vaild JOIN modifiers."); + } + } + + void assertAtomic(LogicalOperator spec, boolean desiredAtomic) throws ParseException{ Boolean isAtomic = null; if ( spec instanceof LOConst || (spec instanceof LOUserFunc && @@ -1713,29 +1773,22 @@ ArrayList<CogroupInput> gis = new ArrayList<CogroupInput>(); LogicalOperator cogroup = null; log.trace("Entering CoGroupClause"); + Token t; } { - (gi = GroupItem(lp) { gis.add(gi); } ("," gi = GroupItem(lp) { gis.add(gi); })* - ( - [<USING> ("\"collected\"" { - if (gis.size() != 1) { - throw new ParseException("Collected group is only supported for single input"); - } - if (!isColumnProjectionsOrStar(gis.get(0))) { - throw new ParseException("Collected group is only supported for columns or star projection"); - } - cogroup = parseCogroup(gis, lp, LOCogroup.GROUPTYPE.COLLECTED); - cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE); - } - |"\"regular\"" { - cogroup = parseCogroup(gis, lp, LOCogroup.GROUPTYPE.REGULAR); - cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE); - } - ) - ] - ) + ([ <USING> ( + (t = < QUOTEDSTRING> { cogroup = parseUsingForGroupBy(unquote (t.image), gis, lp); }) + |("\"collected\"") { + log.info("[WARN] Use of double-quoted string to specify hint is deprecated. Please specify hint in single quotes."); + cogroup = parseUsingForGroupBy("collected", gis, lp); + } + |("\"regular\"") { + log.info("[WARN] Use of double-quoted string to specify hint is deprecated. Please specify hint in single quotes."); + cogroup = parseUsingForGroupBy("regular", gis, lp); + } + )]) ) { @@ -2033,6 +2086,7 @@ boolean isRightOuter = false; boolean isFullOuter = false; boolean isOuter = false; + Token t; } { (gi = JoinItem(lp) { gis.add(gi); } @@ -2081,43 +2135,25 @@ } // For all types of join we create LOJoin and mark what type of join it is. - ( - [<USING> ("\"replicated\"" { - if(isFullOuter || isRightOuter) { - throw new ParseException("Replicated join does not support (right|full) outer joins"); - } - joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.REPLICATED); - joinOp.pinOption(LOJoin.OPTION_JOIN); - } - | "\"repl\"" { - if(isFullOuter || isRightOuter) { - throw new ParseException("Replicated join does not support (right|full) outer joins"); - } - joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.REPLICATED); - joinOp.pinOption(LOJoin.OPTION_JOIN); - } - |"\"skewed\"" { - joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.SKEWED); - joinOp.pinOption(LOJoin.OPTION_JOIN); + ([<USING> ( + (t = <QUOTEDSTRING> { joinOp = parseUsingForJoin(unquote(t.image), gis, lp, isFullOuter, isRightOuter, isOuter);}) + | ("\"repl\"" | "\"replicated\"") { + log.info("[WARN] Use of double-quotes for specifying join algorithm is deprecated. Please use single quotes."); + joinOp = parseUsingForJoin("replicated", gis, lp, isFullOuter, isRightOuter, isOuter); + } + | ("\"skewed\"") { + log.info("[WARN] Use of double-quotes for specifying join algorithm is deprecated. Please use single quotes."); + joinOp = parseUsingForJoin("skewed", gis, lp, isFullOuter, isRightOuter, isOuter); } - |"\"merge\"" { - if(isOuter) { - throw new ParseException("Merge join does not support (left|right|full) outer joins"); - } - joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.MERGE); - joinOp.pinOption(LOJoin.OPTION_JOIN); - } - |"\"hash\"" { - joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.HASH); - joinOp.pinOption(LOJoin.OPTION_JOIN); + | ("\"merge\"") { + log.info("[WARN] Use of double-quotes for specifying join algorithm is deprecated. Please use single quotes."); + joinOp = parseUsingForJoin("merge", gis, lp, isFullOuter, isRightOuter, isOuter); + } + | ("\"hash\"" | "\"default\"") { + log.info("[WARN] Use of double-quotes for specifying join algorithm is deprecated. Please use single quotes."); + joinOp = parseUsingForJoin("hash", gis, lp, isFullOuter, isRightOuter, isOuter); } - |"\"default\"" { - joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.HASH); - joinOp.pinOption(LOJoin.OPTION_JOIN); - }) - ] - ) - ) + )])) {log.trace("Exiting JoinClause"); if (joinOp!=null) { Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java?rev=907463&r1=907462&r2=907463&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java Sun Feb 7 18:45:36 2010 @@ -32,9 +32,12 @@ import org.apache.pig.data.BagFactory; import org.apache.pig.data.DataBag; import org.apache.pig.data.Tuple; +import org.apache.pig.test.utils.LogicalPlanTester; import org.apache.pig.test.utils.TestHelper; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCollectedGroup; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan; +import org.apache.pig.impl.logicalLayer.LOCogroup; +import org.apache.pig.impl.logicalLayer.LogicalPlan; import org.apache.pig.impl.plan.OperatorKey; import org.junit.After; import org.junit.Before; @@ -76,6 +79,22 @@ Util.deleteFile(cluster, INPUT_FILE); } + public void testCollectedGrpSpecifiedInSingleQuotes1(){ + + LogicalPlanTester lpt = new LogicalPlanTester(); + lpt.buildPlan("A = LOAD '" + INPUT_FILE + "' as (id, name, grade);"); + LogicalPlan lp = lpt.buildPlan("B = group A by id using 'collected';"); + assertEquals(LOCogroup.GROUPTYPE.COLLECTED, ((LOCogroup)lp.getLeaves().get(0)).getGroupType()); + } + + public void testCollectedGrpSpecifiedInSingleQuotes2(){ + + LogicalPlanTester lpt = new LogicalPlanTester(); + lpt.buildPlan("A = LOAD '" + INPUT_FILE + "' as (id, name, grade);"); + LogicalPlan lp = lpt.buildPlan("B = group A all using 'regular';"); + assertEquals(LOCogroup.GROUPTYPE.REGULAR, ((LOCogroup)lp.getLeaves().get(0)).getGroupType()); + } + public void testPOMapsideGroupNoNullPlans() throws IOException { POCollectedGroup pmg = new POCollectedGroup(new OperatorKey()); List<PhysicalPlan> plans = pmg.getPlans(); Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java?rev=907463&r1=907462&r2=907463&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Sun Feb 7 18:45:36 2010 @@ -32,10 +32,12 @@ import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.apache.pig.impl.io.FileLocalizer; +import org.apache.pig.impl.logicalLayer.LOJoin; +import org.apache.pig.impl.logicalLayer.LogicalPlan; +import org.apache.pig.impl.logicalLayer.LOJoin.JOINTYPE; import org.apache.pig.impl.logicalLayer.parser.ParseException; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.util.LogUtils; -import org.apache.pig.test.utils.Identity; import org.apache.pig.test.utils.LogicalPlanTester; import org.junit.Before; import org.junit.Test; @@ -95,7 +97,6 @@ } } - @Test public void testJoinUnkownSchema() throws Exception { // If any of the input schema is unknown, the resulting schema should be unknown as well @@ -109,7 +110,7 @@ assertTrue(schema == null); } } - + @Test public void testDefaultJoin() throws IOException, ParseException { for (ExecType execType : execTypes) { @@ -553,5 +554,54 @@ deleteInputFile(execType, secondInput); } } - + + @Test + public void testLiteralsForJoinAlgoSpecification1() { + + LogicalPlanTester lpt = new LogicalPlanTester(); + lpt.buildPlan("a = load 'A'; "); + lpt.buildPlan("b = load 'B'; "); + LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'merge'; "); + assertEquals(JOINTYPE.MERGE, ((LOJoin)lp.getLeaves().get(0)).getJoinType()); + } + + @Test + public void testLiteralsForJoinAlgoSpecification2() { + + LogicalPlanTester lpt = new LogicalPlanTester(); + lpt.buildPlan("a = load 'A'; "); + lpt.buildPlan("b = load 'B'; "); + LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'hash'; "); + assertEquals(JOINTYPE.HASH, ((LOJoin)lp.getLeaves().get(0)).getJoinType()); + } + + @Test + public void testLiteralsForJoinAlgoSpecification5() { + + LogicalPlanTester lpt = new LogicalPlanTester(); + lpt.buildPlan("a = load 'A'; "); + lpt.buildPlan("b = load 'B'; "); + LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'default'; "); + assertEquals(JOINTYPE.HASH, ((LOJoin)lp.getLeaves().get(0)).getJoinType()); + } + + @Test + public void testLiteralsForJoinAlgoSpecification3() { + + LogicalPlanTester lpt = new LogicalPlanTester(); + lpt.buildPlan("a = load 'A'; "); + lpt.buildPlan("b = load 'B'; "); + LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'repl'; "); + assertEquals(JOINTYPE.REPLICATED, ((LOJoin)lp.getLeaves().get(0)).getJoinType()); + } + + @Test + public void testLiteralsForJoinAlgoSpecification4() { + + LogicalPlanTester lpt = new LogicalPlanTester(); + lpt.buildPlan("a = load 'A'; "); + lpt.buildPlan("b = load 'B'; "); + LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'replicated'; "); + assertEquals(JOINTYPE.REPLICATED, ((LOJoin)lp.getLeaves().get(0)).getJoinType()); + } }