[4/7] hive git commit: HIVE-14016 : Vectorization: Add support for Grouping Sets (Matt McCline, reviewed by Gopal Vijayaraghavan, Jesus Camacho Rodriguez)

mmccline Sat, 18 Mar 2017 23:54:39 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/47ef02e4/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out
new file mode 100644
index 0000000..5d34347
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out
@@ -0,0 +1,668 @@
+PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT 
DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1_text
+POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT 
DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1_text
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' 
INTO TABLE T1_text
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1_text
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' 
INTO TABLE T1_text
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1_text
+PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@t1_text
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@t1_text
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, 
type:string, comment:null), ]
+POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, 
type:string, comment:null), ]
+POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, 
type:string, comment:null), ]
+t1_text.a      t1_text.b       t1_text.c
+PREHOOK: query: SELECT * FROM T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+t1.a   t1.b    t1.c
+1      1       3
+2      2       4
+2      3       5
+3      2       8
+5      2       2
+8      1       1
+PREHOOK: query: EXPLAIN
+SELECT a, b, count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a, b, count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: string), b (type: string)
+                    outputColumnNames: a, b
+                    Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: a (type: string), b (type: string), 0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: int)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: int)
+                        Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col3 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+a      b       _c2
+1      1       1
+1      NULL    1
+2      2       1
+2      3       1
+2      NULL    2
+3      2       1
+3      NULL    1
+5      2       1
+5      NULL    1
+8      1       1
+8      NULL    1
+NULL   1       2
+NULL   2       3
+NULL   3       1
+NULL   NULL    6
+PREHOOK: query: EXPLAIN
+SELECT a, b, count(*) from T1 group by cube(a, b)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a, b, count(*) from T1 group by cube(a, b)
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: string), b (type: string)
+                    outputColumnNames: a, b
+                    Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: a (type: string), b (type: string), 0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: int)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: int)
+                        Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col3 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, count(*) from T1 group by cube(a, b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+a      b       _c2
+1      1       1
+1      NULL    1
+2      2       1
+2      3       1
+2      NULL    2
+3      2       1
+3      NULL    1
+5      2       1
+5      NULL    1
+8      1       1
+8      NULL    1
+NULL   1       2
+NULL   2       3
+NULL   3       1
+NULL   NULL    6
+PREHOOK: query: EXPLAIN
+SELECT a, b, count(*) FROM T1 GROUP BY a, b  GROUPING SETS (a, (a, b), b, ())
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a, b, count(*) FROM T1 GROUP BY a, b  GROUPING SETS (a, (a, b), b, ())
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: string), b (type: string)
+                    outputColumnNames: a, b
+                    Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: a (type: string), b (type: string), 0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: int)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: int)
+                        Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col3 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b  GROUPING SETS (a, 
(a, b), b, ())
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b  GROUPING SETS 
(a, (a, b), b, ())
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+a      b       _c2
+1      1       1
+1      NULL    1
+2      2       1
+2      3       1
+2      NULL    2
+3      2       1
+3      NULL    1
+5      2       1
+5      NULL    1
+8      1       1
+8      NULL    1
+NULL   1       2
+NULL   2       3
+NULL   3       1
+NULL   NULL    6
+PREHOOK: query: EXPLAIN
+SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b))
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: string), b (type: string)
+                    outputColumnNames: a, b
+                    Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: a (type: string), b (type: string), 0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: int)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: int)
+                        Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col3 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, 
(a, b))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, 
(a, b))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+a      b       _c2
+1      1       1
+1      NULL    1
+2      2       1
+2      3       1
+2      NULL    2
+3      2       1
+3      NULL    1
+5      2       1
+5      NULL    1
+8      1       1
+8      NULL    1
+PREHOOK: query: EXPLAIN
+SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c)
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: string), b (type: string), c (type: 
string)
+                    outputColumnNames: a, b, c
+                    Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: a (type: string), b (type: string), c (type: 
string), 0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 18 Data size: 4590 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: string), _col3 (type: int)
+                        sort order: ++++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: string), _col3 (type: int)
+                        Statistics: Num rows: 18 Data size: 4590 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: string), KEY._col3 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 9 Data size: 2295 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 9 Data size: 2295 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 9 Data size: 2295 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+a
+1
+2
+3
+5
+8
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+PREHOOK: query: EXPLAIN
+SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a))
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: string)
+                    outputColumnNames: a
+                    Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: a (type: string), 0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
int)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: int)
+                        Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+a
+1
+2
+3
+5
+8
+PREHOOK: query: EXPLAIN
+SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b)
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: (UDFToDouble(a) + UDFToDouble(b)) (type: 
double)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: _col0 (type: double), 0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: double), _col1 (type: 
int)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: double), 
_col1 (type: int)
+                        Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: double), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col2
+                Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: double), _col2 (type: bigint)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 3 Data size: 765 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 3 Data size: 765 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS 
(a+b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS 
(a+b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+_c0    _c1
+2.0    1
+4.0    1
+5.0    2
+7.0    1
+9.0    1


http://git-wip-us.apache.org/repos/asf/hive/blob/47ef02e4/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out
new file mode 100644
index 0000000..4deef94
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out
@@ -0,0 +1,469 @@
+PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT 
DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1_text
+POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT 
DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1_text
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' 
INTO TABLE T1_text
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1_text
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' 
INTO TABLE T1_text
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1_text
+PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@t1_text
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@t1_text
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, 
type:string, comment:null), ]
+POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, 
type:string, comment:null), ]
+POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, 
type:string, comment:null), ]
+t1_text.a      t1_text.b       t1_text.c
+PREHOOK: query: EXPLAIN
+SELECT a, b, count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a, b, count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: string), b (type: string)
+                    outputColumnNames: a, b
+                    Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: a (type: string), b (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 
(type: int)
+                mode: partials
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: int)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: int)
+                  Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col3 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int)
+                mode: final
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT a, b, count(*) from T1 group by cube(a, b)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a, b, count(*) from T1 group by cube(a, b)
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: string), b (type: string)
+                    outputColumnNames: a, b
+                    Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: a (type: string), b (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 
(type: int)
+                mode: partials
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: int)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: int)
+                  Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col3 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int)
+                mode: final
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+a      b       _c2
+1      1       1
+1      NULL    1
+2      2       1
+2      3       1
+2      NULL    2
+3      2       1
+3      NULL    1
+5      2       1
+5      NULL    1
+8      1       1
+8      NULL    1
+NULL   1       2
+NULL   2       3
+NULL   3       1
+NULL   NULL    6
+PREHOOK: query: EXPLAIN
+SELECT a, b, sum(c) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a, b, sum(c) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: string), b (type: string), c (type: 
string)
+                    outputColumnNames: a, b, c
+                    Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: sum(c)
+                      keys: a (type: string), b (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 6 Data size: 1530 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: double)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 
(type: int)
+                mode: partials
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: int)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: int)
+                  Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col3 (type: double)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int)
+                mode: final
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: double)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, sum(c) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+a      b       _c2
+1      1       3.0
+1      NULL    3.0
+2      2       4.0
+2      3       5.0
+2      NULL    9.0
+3      2       8.0
+3      NULL    8.0
+5      2       2.0
+5      NULL    2.0
+8      1       1.0
+8      NULL    1.0
+NULL   1       4.0
+NULL   2       14.0
+NULL   3       5.0
+NULL   NULL    23.0
+PREHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T2
+POSTHOOK: query: CREATE TABLE T2(a STRING, b STRING, c int, d int) STORED AS 
ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T2
+PREHOOK: query: INSERT OVERWRITE TABLE T2
+SELECT a, b, c, c from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t2
+POSTHOOK: query: INSERT OVERWRITE TABLE T2
+SELECT a, b, c, c from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t2.a SIMPLE [(t1)t1.FieldSchema(name:a, type:string, 
comment:null), ]
+POSTHOOK: Lineage: t2.b SIMPLE [(t1)t1.FieldSchema(name:b, type:string, 
comment:null), ]
+POSTHOOK: Lineage: t2.c EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, 
comment:null), ]
+POSTHOOK: Lineage: t2.d EXPRESSION [(t1)t1.FieldSchema(name:c, type:string, 
comment:null), ]
+_col0  _col1   _col2   _col3
+PREHOOK: query: EXPLAIN
+SELECT a, b, sum(c+d) from T2 group by a, b with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a, b, sum(c+d) from T2 group by a, b with cube
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t2
+                  Statistics: Num rows: 6 Data size: 1068 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: string), b (type: string), (c + d) 
(type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 6 Data size: 1068 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: sum(_col2)
+                      keys: _col0 (type: string), _col1 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 6 Data size: 1068 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 6 Data size: 1068 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 
(type: int)
+                mode: partials
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: int)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: int)
+                  Statistics: Num rows: 24 Data size: 4272 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col3 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int)
+                mode: final
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 12 Data size: 2136 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 12 Data size: 2136 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, sum(c+d) from T2 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+a      b       _c2
+1      1       6
+1      NULL    6
+2      2       8
+2      3       10
+2      NULL    18
+3      2       16
+3      NULL    16
+5      2       4
+5      NULL    4
+8      1       2
+8      NULL    2
+NULL   1       8
+NULL   2       28
+NULL   3       10
+NULL   NULL    46

http://git-wip-us.apache.org/repos/asf/hive/blob/47ef02e4/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out 
b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out
new file mode 100644
index 0000000..b30aabd
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out
@@ -0,0 +1,314 @@
+PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT 
DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1_text
+POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT 
DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1_text
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' 
INTO TABLE T1_text
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1_text
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' 
INTO TABLE T1_text
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1_text
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' 
INTO TABLE T1_text
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1_text
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' 
INTO TABLE T1_text
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1_text
+PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@t1_text
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@t1_text
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, 
type:string, comment:null), ]
+POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, 
type:string, comment:null), ]
+POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, 
type:string, comment:null), ]
+t1_text.a      t1_text.b       t1_text.c
+PREHOOK: query: EXPLAIN
+SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: string), b (type: string), c (type: 
string)
+                    outputColumnNames: a, b, c
+                    Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: avg(c), count()
+                      keys: a (type: string), b (type: string), 0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                      Statistics: Num rows: 48 Data size: 12240 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: int)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: int)
+                        Statistics: Num rows: 48 Data size: 12240 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col3 (type: 
struct<count:bigint,sum:double,input:string>), _col4 (type: bigint)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0), count(VALUE._col1)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3, _col4
+                Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: double), _col4 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT a, b, avg(c), count(*) from T1 group by cube(a, b)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a, b, avg(c), count(*) from T1 group by cube(a, b)
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: string), b (type: string), c (type: 
string)
+                    outputColumnNames: a, b, c
+                    Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: avg(c), count()
+                      keys: a (type: string), b (type: string), 0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                      Statistics: Num rows: 48 Data size: 12240 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: int)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: int)
+                        Statistics: Num rows: 48 Data size: 12240 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col3 (type: 
struct<count:bigint,sum:double,input:string>), _col4 (type: bigint)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0), count(VALUE._col1)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col3, _col4
+                Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: double), _col4 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+a      b       _c2     _c3
+1      1       3.0     2
+1      2       2.0     1
+1      NULL    2.6666666666666665      3
+2      2       5.333333333333333       3
+2      3       5.0     2
+2      NULL    5.2     5
+3      2       8.0     1
+3      NULL    8.0     1
+5      1       2.0     1
+5      NULL    2.0     1
+8      1       1.0     2
+8      NULL    1.0     2
+NULL   1       2.0     5
+NULL   2       5.2     5
+NULL   3       5.0     2
+NULL   NULL    3.8333333333333335      12
+PREHOOK: query: EXPLAIN
+SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: string), b (type: string), c (type: 
string)
+                    outputColumnNames: a, b, c
+                    Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: avg(c), count()
+                      keys: a (type: string), b (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 12 Data size: 3060 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col2 (type: 
struct<count:bigint,sum:double,input:string>), _col3 (type: bigint)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0), count(VALUE._col1)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 
(type: int)
+                mode: partials
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 48 Data size: 12240 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: int)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: int)
+                  Statistics: Num rows: 48 Data size: 12240 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col3 (type: 
struct<count:bigint,sum:double,input:string>), _col4 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: avg(VALUE._col0), count(VALUE._col1)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int)
+                mode: final
+                outputColumnNames: _col0, _col1, _col3, _col4
+                Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE 
Column stats: NONE
+                pruneGroupingSetId: true
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: double), _col4 (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 24 Data size: 6120 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+a      b       _c2     _c3
+1      1       3.0     2
+1      2       2.0     1
+1      NULL    2.6666666666666665      3
+2      2       5.333333333333333       3
+2      3       5.0     2
+2      NULL    5.2     5
+3      2       8.0     1
+3      NULL    8.0     1
+5      1       2.0     1
+5      NULL    2.0     1
+8      1       1.0     2
+8      NULL    1.0     2
+NULL   1       2.0     5
+NULL   2       5.2     5
+NULL   3       5.0     2
+NULL   NULL    3.8333333333333335      12

[4/7] hive git commit: HIVE-14016 : Vectorization: Add support for Grouping Sets (Matt McCline, reviewed by Gopal Vijayaraghavan, Jesus Camacho Rodriguez)

Reply via email to