Author: ehans Date: Fri Jan 3 18:27:45 2014 New Revision: 1555189 URL: http://svn.apache.org/r1555189 Log: HIVE-5757: Implement vectorized support for CASE (Eric Hanson)
Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1555189&r1=1555188&r2=1555189&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Fri Jan 3 18:27:45 2014 @@ -293,7 +293,20 @@ public class VectorizationContext { || arg0Type(expr).equals("float"))) { return true; } - } else if (gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string")) { + } else if ((gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string")) + + /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because + * of their complexity and generality. In the future, variations of these + * can be optimized to run faster for the vectorized code path. For example, + * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END + * is an example of a GenericUDFCase that has all constant arguments + * except for the first argument. This is probably a common case and a + * good candidate for a fast, special-purpose VectorExpression. Then + * the UDF Adaptor code path could be used as a catch-all for + * non-optimized general cases. + */ + || gudf instanceof GenericUDFCase + || gudf instanceof GenericUDFWhen) { return true; } return false; Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1555189&r1=1555188&r2=1555189&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Fri Jan 3 18:27:45 2014 @@ -123,6 +123,7 @@ import org.apache.hadoop.hive.ql.udf.gen import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCeil; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFFloor; @@ -153,6 +154,7 @@ import org.apache.hadoop.hive.ql.udf.gen import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; public class Vectorizer implements PhysicalPlanResolver { @@ -250,6 +252,8 @@ public class Vectorizer implements Physi supportedGenericUDFs.add(GenericUDFAbs.class); supportedGenericUDFs.add(GenericUDFBetween.class); supportedGenericUDFs.add(GenericUDFIn.class); + supportedGenericUDFs.add(GenericUDFCase.class); + supportedGenericUDFs.add(GenericUDFWhen.class); // For type casts supportedGenericUDFs.add(UDFToLong.class); Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q?rev=1555189&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q Fri Jan 3 18:27:45 2014 @@ -0,0 +1,37 @@ +set hive.vectorized.execution.enabled = true +; +explain +select + csmallint, + case + when csmallint = 418 then "a" + when csmallint = 12205 then "b" + else "c" + end, + case csmallint + when 418 then "a" + when 12205 then "b" + else "c" + end +from alltypesorc +where csmallint = 418 +or csmallint = 12205 +or csmallint = 10583 +; +select + csmallint, + case + when csmallint = 418 then "a" + when csmallint = 12205 then "b" + else "c" + end, + case csmallint + when 418 then "a" + when 12205 then "b" + else "c" + end +from alltypesorc +where csmallint = 418 +or csmallint = 12205 +or csmallint = 10583 +; Added: hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out?rev=1555189&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out Fri Jan 3 18:27:45 2014 @@ -0,0 +1,120 @@ +PREHOOK: query: explain +select + csmallint, + case + when csmallint = 418 then "a" + when csmallint = 12205 then "b" + else "c" + end, + case csmallint + when 418 then "a" + when 12205 then "b" + else "c" + end +from alltypesorc +where csmallint = 418 +or csmallint = 12205 +or csmallint = 10583 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + csmallint, + case + when csmallint = 418 then "a" + when csmallint = 12205 then "b" + else "c" + end, + case csmallint + when 418 then "a" + when 12205 then "b" + else "c" + end +from alltypesorc +where csmallint = 418 +or csmallint = 12205 +or csmallint = 10583 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL csmallint)) (TOK_SELEXPR (TOK_FUNCTION when (= (TOK_TABLE_OR_COL csmallint) 418) "a" (= (TOK_TABLE_OR_COL csmallint) 12205) "b" "c")) (TOK_SELEXPR (TOK_FUNCTION case (TOK_TABLE_OR_COL csmallint) 418 "a" 12205 "b" "c"))) (TOK_WHERE (or (or (= (TOK_TABLE_OR_COL csmallint) 418) (= (TOK_TABLE_OR_COL csmallint) 12205)) (= (TOK_TABLE_OR_COL csmallint) 10583))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypesorc + TableScan + alias: alltypesorc + Filter Operator + predicate: + expr: (((csmallint = 418) or (csmallint = 12205)) or (csmallint = 10583)) + type: boolean + Vectorized execution: true + Select Operator + expressions: + expr: csmallint + type: smallint + expr: CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END + type: string + expr: CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END + type: string + outputColumnNames: _col0, _col1, _col2 + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: select + csmallint, + case + when csmallint = 418 then "a" + when csmallint = 12205 then "b" + else "c" + end, + case csmallint + when 418 then "a" + when 12205 then "b" + else "c" + end +from alltypesorc +where csmallint = 418 +or csmallint = 12205 +or csmallint = 10583 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select + csmallint, + case + when csmallint = 418 then "a" + when csmallint = 12205 then "b" + else "c" + end, + case csmallint + when 418 then "a" + when 12205 then "b" + else "c" + end +from alltypesorc +where csmallint = 418 +or csmallint = 12205 +or csmallint = 10583 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12205 b b +10583 c c +418 a a +12205 b b