Author: ehans
Date: Fri Jan  3 18:27:45 2014
New Revision: 1555189

URL: http://svn.apache.org/r1555189
Log:
HIVE-5757: Implement vectorized support for CASE (Eric Hanson)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q
    hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out
Modified:
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1555189&r1=1555188&r2=1555189&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
 Fri Jan  3 18:27:45 2014
@@ -293,7 +293,20 @@ public class VectorizationContext {
                    || arg0Type(expr).equals("float"))) {
         return true;
       }
-    } else if (gudf instanceof GenericUDFTimestamp && 
arg0Type(expr).equals("string")) {
+    } else if ((gudf instanceof GenericUDFTimestamp && 
arg0Type(expr).equals("string"))
+
+            /* GenericUDFCase and GenericUDFWhen are implemented with the UDF 
Adaptor because
+             * of their complexity and generality. In the future, variations 
of these
+             * can be optimized to run faster for the vectorized code path. 
For example,
+             * CASE col WHEN 1 then "one" WHEN 2 THEN "two" ELSE "other" END
+             * is an example of a GenericUDFCase that has all constant 
arguments
+             * except for the first argument. This is probably a common case 
and a
+             * good candidate for a fast, special-purpose VectorExpression. 
Then
+             * the UDF Adaptor code path could be used as a catch-all for
+             * non-optimized general cases.
+             */
+            || gudf instanceof GenericUDFCase
+            || gudf instanceof GenericUDFWhen) {
       return true;
     }
     return false;

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1555189&r1=1555188&r2=1555189&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
 Fri Jan  3 18:27:45 2014
@@ -123,6 +123,7 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCeil;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFFloor;
@@ -153,6 +154,7 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen;
 
 public class Vectorizer implements PhysicalPlanResolver {
 
@@ -250,6 +252,8 @@ public class Vectorizer implements Physi
     supportedGenericUDFs.add(GenericUDFAbs.class);
     supportedGenericUDFs.add(GenericUDFBetween.class);
     supportedGenericUDFs.add(GenericUDFIn.class);
+    supportedGenericUDFs.add(GenericUDFCase.class);
+    supportedGenericUDFs.add(GenericUDFWhen.class);
 
     // For type casts
     supportedGenericUDFs.add(UDFToLong.class);

Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q?rev=1555189&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_case.q Fri Jan  3 
18:27:45 2014
@@ -0,0 +1,37 @@
+set hive.vectorized.execution.enabled = true
+;
+explain
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else "c"
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then "b"
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+;
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else "c"
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then "b"
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+;

Added: hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out?rev=1555189&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorized_case.q.out Fri Jan 
 3 18:27:45 2014
@@ -0,0 +1,120 @@
+PREHOOK: query: explain
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else "c"
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then "b"
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else "c"
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then "b"
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypesorc))) (TOK_INSERT 
(TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 
(TOK_TABLE_OR_COL csmallint)) (TOK_SELEXPR (TOK_FUNCTION when (= 
(TOK_TABLE_OR_COL csmallint) 418) "a" (= (TOK_TABLE_OR_COL csmallint) 12205) 
"b" "c")) (TOK_SELEXPR (TOK_FUNCTION case (TOK_TABLE_OR_COL csmallint) 418 "a" 
12205 "b" "c"))) (TOK_WHERE (or (or (= (TOK_TABLE_OR_COL csmallint) 418) (= 
(TOK_TABLE_OR_COL csmallint) 12205)) (= (TOK_TABLE_OR_COL csmallint) 10583)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        alltypesorc 
+          TableScan
+            alias: alltypesorc
+            Filter Operator
+              predicate:
+                  expr: (((csmallint = 418) or (csmallint = 12205)) or 
(csmallint = 10583))
+                  type: boolean
+              Vectorized execution: true
+              Select Operator
+                expressions:
+                      expr: csmallint
+                      type: smallint
+                      expr: CASE WHEN ((csmallint = 418)) THEN ('a') WHEN 
((csmallint = 12205)) THEN ('b') ELSE ('c') END
+                      type: string
+                      expr: CASE (csmallint) WHEN (418) THEN ('a') WHEN 
(12205) THEN ('b') ELSE ('c') END
+                      type: string
+                outputColumnNames: _col0, _col1, _col2
+                Vectorized execution: true
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  Vectorized execution: true
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+PREHOOK: query: select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else "c"
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then "b"
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else "c"
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then "b"
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+12205  b       b
+10583  c       c
+418    a       a
+12205  b       b


Reply via email to