Author: daijy
Date: Fri Aug 7 02:27:15 2009
New Revision: 801865
URL: http://svn.apache.org/viewvc?rev=801865view=rev
Log:
PIG-905: TOKENIZE throws exception on null data
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java
hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java
hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=801865r1=801864r2=801865view=diff
==
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri Aug 7 02:27:15 2009
@@ -46,6 +46,8 @@
BUG FIXES
+PIG-905: TOKENIZE throws exception on null data (daijy)
+
PIG-901: InputSplit (SliceWrapper) created by Pig is big in size due to
serialized PigContext (pradeepkth)
Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java?rev=801865r1=801864r2=801865view=diff
==
--- hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java Fri Aug 7
02:27:15 2009
@@ -18,6 +18,8 @@
package org.apache.pig.builtin;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import java.util.StringTokenizer;
import org.apache.pig.EvalFunc;
@@ -30,7 +32,7 @@
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-
+import org.apache.pig.FuncSpec;
public class TOKENIZE extends EvalFuncDataBag {
TupleFactory mTupleFactory = TupleFactory.getInstance();
@@ -39,8 +41,14 @@
@Override
public DataBag exec(Tuple input) throws IOException {
try {
-DataBag output = mBagFactory.newDefaultBag();
+if (input==null)
+return null;
+if (input.size()==0)
+return null;
Object o = input.get(0);
+if (o==null)
+return null;
+DataBag output = mBagFactory.newDefaultBag();
if (!(o instanceof String)) {
int errCode = 2114;
String msg = Expected input to be chararray, but +
@@ -86,5 +94,11 @@
}
}
-
+public ListFuncSpec getArgToFuncMapping() throws FrontendException {
+ListFuncSpec funcList = new ArrayListFuncSpec();
+Schema s = new Schema();
+s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+funcList.add(new FuncSpec(this.getClass().getName(), s));
+return funcList;
+}
};
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java?rev=801865r1=801864r2=801865view=diff
==
--- hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestBuiltin.java Fri Aug 7
02:27:15 2009
@@ -1355,6 +1355,33 @@
assertTrue(f1.equals(f2));
}
+
+@Test
+public void testTOKENIZE() throws Exception {
+TupleFactory tf = TupleFactory.getInstance();
+Tuple t1 = tf.newTuple(1);
+t1.set(0, 123 456\789);
+Tuple t2 = tf.newTuple(1);
+t2.set(0, null);
+Tuple t3 = tf.newTuple(0);
+
+TOKENIZE f = new TOKENIZE();
+DataBag b = f.exec(t1);
+assertTrue(b.size()==3);
+IteratorTuple i = b.iterator();
+Tuple rt = i.next();
+assertTrue(rt.get(0).equals(123));
+rt = i.next();
+assertTrue(rt.get(0).equals(456));
+rt = i.next();
+assertTrue(rt.get(0).equals(789));
+
+b = f.exec(t2);
+assertTrue(b==null);
+
+b = f.exec(t3);
+assertTrue(b==null);
+}
@Test
public void testDIFF() throws Exception {
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java?rev=801865r1=801864r2=801865view=diff
==
--- hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestPigContext.java Fri Aug 7
02:27:15 2009
@@ -213,7 +213,7 @@
private ListString getCommands() {
ListString commands = new ArrayListString();
commands.add(my_input = LOAD ' +
Util.encodeEscape(input.getAbsolutePath()) + ' USING PigStorage(););
-commands.add(words = FOREACH