Author: hashutosh Date: Sat Feb 13 21:46:49 2010 New Revision: 909921 URL: http://svn.apache.org/viewvc?rev=909921&view=rev Log: PIG-1131: Pig simple join does not work when it contains empty lines
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=909921&r1=909920&r2=909921&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Sat Feb 13 21:46:49 2010 @@ -93,6 +93,8 @@ BUG FIXES +PIG-1131: Pig simple join does not work when it contains empty lines (ashutoshc) + PIG-834: incorrect plan when algebraic functions are nested (ashutoshc) PIG-1217: Fix argToFuncMapping in Piggybank Top function (dvryaboy via gates) Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java?rev=909921&r1=909920&r2=909921&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java Sat Feb 13 21:46:49 2010 @@ -118,9 +118,6 @@ private int mProjectedColsMapSize = 0; private int mSecondaryProjectedColsMapSize = 0; - private ArrayList<Integer> minValuePositions; - private int minValuePositionsSize = 0; - private Tuple lrOutput; private boolean useSecondaryKey = false; @@ -459,27 +456,14 @@ Tuple minimalValue = null; if(!mProjectStar) { - if(minValuePositions == null) { - // the very first time, we will have to build - // the "value" tuple piecemeal but we can - // do better next time round - minValuePositions = new ArrayList<Integer>(); - minimalValue = mTupleFactory.newTuple(); - // look for individual columns that we are - // projecting - for (int i = 0; i < value.size(); i++) { - if(mProjectedColsMap.get(i) == null) { - // this column was not found in the "key" - // so send it in the "value" - minimalValue.append(value.get(i)); - minValuePositions.add(i); - } - } - minValuePositionsSize = minValuePositions.size(); - } else { - minimalValue = mTupleFactory.newTuple(minValuePositionsSize); - for(int i = 0; i < minValuePositionsSize; i++) { - minimalValue.set(i, value.get(minValuePositions.get(i))); + minimalValue = mTupleFactory.newTuple(); + // look for individual columns that we are + // projecting + for (int i = 0; i < value.size(); i++) { + if(mProjectedColsMap.get(i) == null) { + // this column was not found in the "key" + // so send it in the "value" + minimalValue.append(value.get(i)); } } } else { @@ -487,7 +471,7 @@ // we would send out an empty tuple as // the "value" since all elements are in the // "key" - minimalValue = mTupleFactory.newTuple(); + minimalValue = mTupleFactory.newTuple(0); } lrOutput.set(2, minimalValue); Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java?rev=909921&r1=909920&r2=909921&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Sat Feb 13 21:46:49 2010 @@ -98,6 +98,40 @@ } @Test + public void testJoinWithMissingFieldsInTuples() throws IOException{ + + setUp(ExecType.MAPREDUCE); + String[] input1 = { + "ff ff ff", + "", + "", + "", + "", + "ff ff ff", + "", + "" + }; + String[] input2 = { + "", + "", + "", + "", + "" + }; + + String firstInput = createInputFile(ExecType.MAPREDUCE, "a.txt", input1); + String secondInput = createInputFile(ExecType.MAPREDUCE, "b.txt", input2); + String script = "a = load 'a.txt' using PigStorage(' ');" + + "b = load 'b.txt' using PigStorage('\u0001');" + + "c = join a by $0, b by $0;"; + Util.registerMultiLineQuery(pigServer, script); + Iterator<Tuple> it = pigServer.openIterator("c"); + assertFalse(it.hasNext()); + deleteInputFile(ExecType.MAPREDUCE, firstInput); + deleteInputFile(ExecType.MAPREDUCE, secondInput); + } + + @Test public void testJoinUnkownSchema() throws Exception { // If any of the input schema is unknown, the resulting schema should be unknown as well for (ExecType execType : execTypes) {