Author: daijy Date: Thu Mar 11 18:37:11 2010 New Revision: 921975 URL: http://svn.apache.org/viewvc?rev=921975&view=rev Log: PIG-1275: empty bag in PigStorage read as null
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java hadoop/pig/trunk/test/org/apache/pig/test/TestTextDataParser.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=921975&r1=921974&r2=921975&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Thu Mar 11 18:37:11 2010 @@ -147,6 +147,8 @@ OPTIMIZATIONS BUG FIXES +PIG-1275: empty bag in PigStorage read as null (daijy) + PIG-1252: Diamond splitter does not generate correct results when using Multi-query optimization (rding) Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java?rev=921975&r1=921974&r2=921975&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java Thu Mar 11 18:37:11 2010 @@ -20,6 +20,7 @@ package org.apache.pig.builtin; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.PushbackInputStream; import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -67,7 +68,7 @@ public class Utf8StorageConverter implem } } - private DataBag consumeBag(ByteArrayInputStream in, ResourceFieldSchema fieldSchema) throws IOException { + private DataBag consumeBag(PushbackInputStream in, ResourceFieldSchema fieldSchema) throws IOException { if (fieldSchema==null) { throw new IOException("Schema is null"); } @@ -85,7 +86,8 @@ public class Utf8StorageConverter implem DataBag db = DefaultBagFactory.getInstance().newDefaultBag(); while (true) { t = consumeTuple(in, fs); - db.add(t); + if (t!=null) + db.add(t); while ((buf=in.read())!='}'&&buf!=',') { if (buf==-1) { throw new IOException("Unexpect end of bag"); @@ -97,17 +99,21 @@ public class Utf8StorageConverter implem return db; } - private Tuple consumeTuple(ByteArrayInputStream in, ResourceFieldSchema fieldSchema) throws IOException { + private Tuple consumeTuple(PushbackInputStream in, ResourceFieldSchema fieldSchema) throws IOException { if (fieldSchema==null) { throw new IOException("Schema is null"); } int buf; ByteArrayOutputStream mOut; - while ((buf=in.read())!='(') { + while ((buf=in.read())!='('||buf=='}') { if (buf==-1) { throw new IOException("Unexpect end of tuple"); } + if (buf=='}') { + in.unread(buf); + return null; + } } Tuple t = DefaultTupleFactory.getInstance().newTuple(); if (fieldSchema.getSchema()!=null && fieldSchema.getSchema().getFields().length!=0) { @@ -172,7 +178,7 @@ public class Utf8StorageConverter implem return t; } - private Map<String, Object> consumeMap(ByteArrayInputStream in, ResourceFieldSchema fieldSchema) throws IOException { + private Map<String, Object> consumeMap(PushbackInputStream in, ResourceFieldSchema fieldSchema) throws IOException { if (fieldSchema==null) { throw new IOException("Schema is null"); } @@ -232,7 +238,7 @@ public class Utf8StorageConverter implem return m; } - private Object consumeComplexType(ByteArrayInputStream in, ResourceFieldSchema complexFieldSchema) throws IOException { + private Object consumeComplexType(PushbackInputStream in, ResourceFieldSchema complexFieldSchema) throws IOException { Object field; switch (complexFieldSchema.getType()) { case DataType.BAG: @@ -285,7 +291,8 @@ public class Utf8StorageConverter implem return null; DataBag db; try { - ByteArrayInputStream in = new ByteArrayInputStream(b); + ByteArrayInputStream bis = new ByteArrayInputStream(b); + PushbackInputStream in = new PushbackInputStream(bis); db = consumeBag(in, schema); } catch (IOException e) { LogUtils.warn(this, "Unable to interpret value " + Arrays.toString(b) + " in field being " + @@ -424,7 +431,8 @@ public class Utf8StorageConverter implem ResourceFieldSchema fs = new ResourceFieldSchema(); fs.setType(DataType.MAP); try { - ByteArrayInputStream in = new ByteArrayInputStream(b); + ByteArrayInputStream bis = new ByteArrayInputStream(b); + PushbackInputStream in = new PushbackInputStream(bis); map = consumeMap(in, fs); } catch (IOException e) { @@ -443,7 +451,8 @@ public class Utf8StorageConverter implem Tuple t; try { - ByteArrayInputStream in = new ByteArrayInputStream(b); + ByteArrayInputStream bis = new ByteArrayInputStream(b); + PushbackInputStream in = new PushbackInputStream(bis); t = consumeTuple(in, fieldSchema); } catch (IOException e) { Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestTextDataParser.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestTextDataParser.java?rev=921975&r1=921974&r2=921975&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestTextDataParser.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestTextDataParser.java Thu Mar 11 18:37:11 2010 @@ -216,5 +216,14 @@ public class TestTextDataParser extends expectedTuple.set(1, "b"); expectedBag.add(expectedTuple); assertTrue(b.equals(expectedBag)); + } + + @Test + public void testEmptyBag() throws Exception{ + String myBag = "{}"; + Object o = ps.getLoadCaster().bytesToBag(myBag.getBytes(), getBagFieldSchema()); + assertTrue(o instanceof DataBag); + DataBag b = (DataBag)o; + assertTrue(b.size()==0); } }