Author: brock
Date: Mon Oct 14 21:22:12 2013
New Revision: 1532103

URL: http://svn.apache.org/r1532103
Log:
HIVE-5423 - Speed up testing of scalar UDFS (Edward Capriolo via Brock Noland)

Added:
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/
    
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/BaseScalarUdfTest.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/DataBuilder.java
    
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/OperatorTestUtils.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFRound.java
    
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFConcat.java

Added: 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/BaseScalarUdfTest.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/BaseScalarUdfTest.java?rev=1532103&view=auto
==============================================================================
--- 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/BaseScalarUdfTest.java
 (added)
+++ 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/BaseScalarUdfTest.java
 Mon Oct 14 21:22:12 2013
@@ -0,0 +1,76 @@
+package org.apache.hadoop.hive.ql.testutil;
+
+import java.util.List;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.ql.exec.CollectOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.CollectDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.mapred.JobConf;
+import org.junit.Ignore;
+
+/**
+ *
+ * Provides a base environment for testing scalar UDF's. Users should extend 
this class
+ * and override the abstract methods. It is highly suggested to test with 
multiple rows
+ * of input because UDFS are stateful in some cases, null, and boundary 
conditions.
+ *
+ */
+@SuppressWarnings("deprecation")
+@Ignore
+public abstract class BaseScalarUdfTest extends TestCase {
+
+  /**
+   * The data from this method will be fed through the
+   * select operator. It is considered the source data
+   * for the test.
+   * @return The source table that will be fed through the operator tree
+   */
+  public abstract InspectableObject [] getBaseTable();
+
+  /**
+   * The data returned from this UDF will be compared to the results
+   * of the test. The DataBuilder class can be used to construct
+   * the result.
+   * @return The data that will be compared to the results
+   */
+  public abstract InspectableObject [] getExpectedResult();
+
+  /**
+   * Implementors of this method create an expression list. This list
+   * transforms the source data into the final output. The DataBuilder
+   * class can be used to construct the result.
+   * @return A list of expressions
+   * @throws UDFArgumentException if the UDF has been formulated incorrectly
+   */
+  public abstract List<ExprNodeDesc> getExpressionList() throws 
UDFArgumentException;
+
+  /**
+   * This method drives the test. It takes the data from getBaseTable() and
+   * feeds it through a SELECT operator with a COLLECT operator after. Each
+   * row that is produced by the collect operator is compared to 
getExpectedResult()
+   * and if every row is the expected result the method completes without 
asserting.
+   * @throws HiveException
+   */
+  public final void testUdf() throws HiveException {
+    InspectableObject [] data = getBaseTable();
+    List<ExprNodeDesc> expressionList = getExpressionList();
+    SelectDesc selectCtx = new SelectDesc(expressionList,
+        OperatorTestUtils.createOutputColumnNames(expressionList));
+    Operator<SelectDesc> op = OperatorFactory.get(SelectDesc.class);
+    op.setConf(selectCtx);
+    CollectDesc cd = new CollectDesc(Integer.valueOf(10));
+    CollectOperator cdop = (CollectOperator) 
OperatorFactory.getAndMakeChild(cd, op);
+    op.initialize(new JobConf(OperatorTestUtils.class), new ObjectInspector[] 
{data[0].oi});
+    OperatorTestUtils.assertResults(op, cdop, data, getExpectedResult());
+  }
+
+}
\ No newline at end of file

Added: 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/DataBuilder.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/DataBuilder.java?rev=1532103&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/DataBuilder.java 
(added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/DataBuilder.java 
Mon Oct 14 21:22:12 2013
@@ -0,0 +1,62 @@
+package org.apache.hadoop.hive.ql.testutil;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+
+/**
+ *
+ * DataBuilder used to build InspectableObject arrays that are used
+ * as part of testing.
+ *
+ */
+public class DataBuilder {
+
+  private final List<String> columnNames;
+  private final List<ObjectInspector> columnTypes;
+  private final List<List<Object>> rows;
+
+  public DataBuilder(){
+    columnNames = new ArrayList<String>();
+    columnTypes = new ArrayList<ObjectInspector>();
+    rows = new ArrayList<List<Object>>();
+  }
+
+  public void setColumnNames(String ... names){
+    for (String name: names){
+      columnNames.add(name);
+    }
+  }
+
+  public void setColumnTypes(ObjectInspector ... types){
+    for (ObjectInspector type: types){
+      columnTypes.add(type);
+    }
+  }
+
+  public void addRow(Object ... columns){
+    List<Object> objects = Arrays.asList(columns);
+    rows.add(objects);
+  }
+
+  /**
+   * returns the InspectableObject array the builder methods
+   * helped to assemble.
+   * @return InspectableObject array (objects that have data coupled with
+   * and object inspector )
+   */
+  public InspectableObject[] createRows(){
+    InspectableObject[] toReturn = new InspectableObject[this.rows.size()];
+    for (int i=0; i<toReturn.length;i++){
+      toReturn[i] = new InspectableObject();
+      toReturn[i].o = rows.get(i);
+      toReturn[i].oi = ObjectInspectorFactory.getStandardStructObjectInspector(
+          this.columnNames, this.columnTypes);
+    }
+    return toReturn;
+  }
+}

Added: 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/OperatorTestUtils.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/OperatorTestUtils.java?rev=1532103&view=auto
==============================================================================
--- 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/OperatorTestUtils.java
 (added)
+++ 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/testutil/OperatorTestUtils.java
 Mon Oct 14 21:22:12 2013
@@ -0,0 +1,79 @@
+package org.apache.hadoop.hive.ql.testutil;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.hive.ql.exec.CollectOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+public class OperatorTestUtils {
+
+  public static ExprNodeColumnDesc getStringColumn(String columnName) {
+    return new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, columnName, 
"", false);
+  }
+
+  /**
+   *
+   * @param expressionList
+   * @return A list of columns named _colX where x is a number
+   */
+  public static List<String> createOutputColumnNames(List<ExprNodeDesc> 
expressionList){
+    List<String> outputCols = new ArrayList<String>();
+    for (int i = 0; i < expressionList.size(); i++) {
+      outputCols.add("_col" + i);
+    }
+    return outputCols;
+  }
+
+  /**
+   * Given a select operator and a collectOperator feed the sourceData into 
the operator
+   * tree and assert that each row matches the expectedResult
+   * @param selectOp
+   * @param collectOp
+   * @param sourceData
+   * @param expected
+   * @throws HiveException
+   */
+  public static void assertResults(Operator<SelectDesc> selectOp, 
CollectOperator collectOp,
+      InspectableObject [] sourceData, InspectableObject [] expected) throws 
HiveException {
+    InspectableObject resultRef = new InspectableObject();
+    for (int i = 0; i < sourceData.length; i++) {
+      selectOp.process(sourceData[i].o, 0);
+      collectOp.retrieve(resultRef);
+      StructObjectInspector expectedOi = (StructObjectInspector) 
expected[i].oi;
+      List<? extends StructField> expectedFields = 
expectedOi.getAllStructFieldRefs();
+      StructObjectInspector destinationOi = (StructObjectInspector) 
resultRef.oi;
+      List<? extends StructField> destinationFields = 
destinationOi.getAllStructFieldRefs();
+      Assert.assertEquals("Source and destination have differing numbers of 
fields ", expectedFields.size(), destinationFields.size());
+      for (StructField field : expectedFields){
+        StructField dest = expectedOi.getStructFieldRef(field.getFieldName());
+        Assert.assertNotNull("Cound not find column named 
"+field.getFieldName(), dest);
+        Assert.assertEquals(field.getFieldObjectInspector(), 
dest.getFieldObjectInspector());
+        Assert.assertEquals("comparing " +
+            expectedOi.getStructFieldData(expected[i].o, field)+" "+
+            field.getFieldObjectInspector().getClass().getSimpleName()+" to "+
+            destinationOi.getStructFieldData(resultRef.o, dest) + " " +
+            dest.getFieldObjectInspector().getClass().getSimpleName(), 0,
+            ObjectInspectorUtils.compare(
+            expectedOi.getStructFieldData(expected[i].o, field), 
field.getFieldObjectInspector(),
+            destinationOi.getStructFieldData(resultRef.o, dest), 
dest.getFieldObjectInspector()
+            )
+        );
+      }
+
+    }
+    selectOp.close(false);
+  }
+
+}

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFRound.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFRound.java?rev=1532103&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFRound.java 
(added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFRound.java Mon 
Oct 14 21:22:12 2013
@@ -0,0 +1,61 @@
+package org.apache.hadoop.hive.ql.udf;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.testutil.DataBuilder;
+import org.apache.hadoop.hive.ql.testutil.OperatorTestUtils;
+import org.apache.hadoop.hive.ql.testutil.BaseScalarUdfTest;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.IntWritable;
+
+public class TestUDFRound extends BaseScalarUdfTest {
+
+  @Override
+  public InspectableObject[] getBaseTable() {
+    DataBuilder db = new DataBuilder();
+    db.setColumnNames("a", "b", "c");
+    db.setColumnTypes(
+        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+        PrimitiveObjectInspectorFactory.javaIntObjectInspector,
+        PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
+    db.addRow("one", 1, new Double("1.1"));
+    db.addRow( null, null, null);
+    db.addRow("two", 2,  new Double("2.1"));
+    return db.createRows();
+  }
+
+  @Override
+  public InspectableObject[] getExpectedResult() {
+    DataBuilder db = new DataBuilder();
+    db.setColumnNames("_col1", "_col2", "_col3");
+    
db.setColumnTypes(PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+        PrimitiveObjectInspectorFactory.writableIntObjectInspector,
+        PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+    db.addRow(null, new IntWritable(1), new DoubleWritable(1.0));
+    db.addRow(null, null, null);
+    db.addRow(null, new IntWritable(2), new DoubleWritable(2.0));
+    return db.createRows();
+  }
+
+  @Override
+  public List<ExprNodeDesc> getExpressionList() throws UDFArgumentException {
+    ExprNodeDesc expr1 = OperatorTestUtils.getStringColumn("a");
+    ExprNodeDesc expr2 = OperatorTestUtils.getStringColumn("b");
+    ExprNodeDesc expr3 = OperatorTestUtils.getStringColumn("c");
+    ExprNodeDesc r1 = 
TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("round", expr1);
+    ExprNodeDesc r2 = 
TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("round", expr2);
+    ExprNodeDesc r3 = 
TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("round", expr3);
+    List<ExprNodeDesc> earr = new ArrayList<ExprNodeDesc>();
+    earr.add(r1);
+    earr.add(r2);
+    earr.add(r3);
+    return earr;
+  }
+
+}

Added: 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFConcat.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFConcat.java?rev=1532103&view=auto
==============================================================================
--- 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFConcat.java
 (added)
+++ 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFConcat.java
 Mon Oct 14 21:22:12 2013
@@ -0,0 +1,54 @@
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.testutil.DataBuilder;
+import org.apache.hadoop.hive.ql.testutil.OperatorTestUtils;
+import org.apache.hadoop.hive.ql.testutil.BaseScalarUdfTest;
+import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+public class TestGenericUDFConcat extends BaseScalarUdfTest {
+
+  @Override
+  public InspectableObject[] getBaseTable() {
+    DataBuilder db = new DataBuilder();
+    db.setColumnNames("a", "b", "c");
+    db.setColumnTypes(
+        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+        PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+    db.addRow("one", "two", "three");
+    db.addRow("four","two", "three");
+    db.addRow( null, "two", "three");
+    return db.createRows();
+  }
+
+  @Override
+  public InspectableObject[] getExpectedResult() {
+    DataBuilder db = new DataBuilder();
+    db.setColumnNames("_col1", "_col2");
+    
db.setColumnTypes(PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+        PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+    db.addRow("one", "onetwo");
+    db.addRow("four", "fourtwo");
+    db.addRow(null, null);
+    return db.createRows();
+  }
+
+  @Override
+  public List<ExprNodeDesc> getExpressionList() throws UDFArgumentException {
+    ExprNodeDesc expr1 = OperatorTestUtils.getStringColumn("a");
+    ExprNodeDesc expr2 = OperatorTestUtils.getStringColumn("b");
+    ExprNodeDesc exprDesc2 = 
TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("concat", expr1, 
expr2);
+    List<ExprNodeDesc> earr = new ArrayList<ExprNodeDesc>();
+    earr.add(expr1);
+    earr.add(exprDesc2);
+    return earr;
+  }
+
+}


Reply via email to