[2/3] hive git commit: HIVE-17043: Remove non unique columns from group by keys if not referenced later (Vineet Garg, reviewed by Jesus Camacho Rodriguez)

vgarg Sun, 14 Oct 2018 20:34:28 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out 
b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
new file mode 100644
index 0000000..b45b7c4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
@@ -0,0 +1,2010 @@
+PREHOOK: query: CREATE TABLE `customer_removal_n0`(
+  `c_custkey` bigint,
+  `c_name` string,
+  `c_address` string,
+  `c_city` string,
+  `c_nation` string,
+  `c_region` string,
+  `c_phone` string,
+  `c_mktsegment` string,
+  primary key (`c_custkey`) disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@customer_removal_n0
+POSTHOOK: query: CREATE TABLE `customer_removal_n0`(
+  `c_custkey` bigint,
+  `c_name` string,
+  `c_address` string,
+  `c_city` string,
+  `c_nation` string,
+  `c_region` string,
+  `c_phone` string,
+  `c_mktsegment` string,
+  primary key (`c_custkey`) disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@customer_removal_n0
+PREHOOK: query: CREATE TABLE `dates_removal_n0`(
+  `d_datekey` bigint,
+  `d_id` bigint,
+  `d_date` string,
+  `d_dayofweek` string,
+  `d_month` string,
+  `d_year` int,
+  `d_yearmonthnum` int,
+  `d_yearmonth` string,
+  `d_daynuminweek` int,
+  `d_daynuminmonth` int,
+  `d_daynuminyear` int,
+  `d_monthnuminyear` int,
+  `d_weeknuminyear` int,
+  `d_sellingseason` string,
+  `d_lastdayinweekfl` int,
+  `d_lastdayinmonthfl` int,
+  `d_holidayfl` int ,
+  `d_weekdayfl`int,
+  primary key (`d_datekey`, `d_id`) disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dates_removal_n0
+POSTHOOK: query: CREATE TABLE `dates_removal_n0`(
+  `d_datekey` bigint,
+  `d_id` bigint,
+  `d_date` string,
+  `d_dayofweek` string,
+  `d_month` string,
+  `d_year` int,
+  `d_yearmonthnum` int,
+  `d_yearmonth` string,
+  `d_daynuminweek` int,
+  `d_daynuminmonth` int,
+  `d_daynuminyear` int,
+  `d_monthnuminyear` int,
+  `d_weeknuminyear` int,
+  `d_sellingseason` string,
+  `d_lastdayinweekfl` int,
+  `d_lastdayinmonthfl` int,
+  `d_holidayfl` int ,
+  `d_weekdayfl`int,
+  primary key (`d_datekey`, `d_id`) disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dates_removal_n0
+PREHOOK: query: -- group by key has single primary key
+  EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 
'INDIA') group by c_custkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- group by key has single primary key
+  EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 
'INDIA') group by c_custkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: customer_removal_n0
+          filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+          Filter Operator
+            predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+            Select Operator
+              expressions: c_custkey (type: bigint)
+              outputColumnNames: _col0
+              ListSink
+
+PREHOOK: query: -- mix of primary + non-primary keys
+  EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 
'INDIA') group by c_custkey, c_nation
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- mix of primary + non-primary keys
+  EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 
'INDIA') group by c_custkey, c_nation
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: customer_removal_n0
+          filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+          Filter Operator
+            predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+            Select Operator
+              expressions: c_custkey (type: bigint)
+              outputColumnNames: _col0
+              ListSink
+
+PREHOOK: query: -- multiple keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) 
group by d_datekey, d_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- multiple keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) 
group by d_datekey, d_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: dates_removal_n0
+          filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+          Filter Operator
+            predicate: (d_year) IN (1985, 2004) (type: boolean)
+            Select Operator
+              expressions: d_datekey (type: bigint)
+              outputColumnNames: _col0
+              ListSink
+
+PREHOOK: query: -- multiple keys + non-keys + different order
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) 
group by d_id, d_datekey, d_sellingseason
+    order by d_datekey limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- multiple keys + non-keys + different order
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) 
group by d_id, d_datekey, d_sellingseason
+    order by d_datekey limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dates_removal_n0
+                  filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: (d_year) IN (1985, 2004) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_datekey (type: bigint)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: bigint)
+                        sort order: +
+                        Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                        TopN Hash Memory Usage: 0.1
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                Limit
+                  Number of rows: 10
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- multiple keys in different order and mixed with non-keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) 
group by d_id, d_daynuminmonth, d_datekey,
+  d_sellingseason order by d_datekey limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- multiple keys in different order and mixed with non-keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) 
group by d_id, d_daynuminmonth, d_datekey,
+  d_sellingseason order by d_datekey limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dates_removal_n0
+                  filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: (d_year) IN (1985, 2004) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_datekey (type: bigint)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: bigint)
+                        sort order: +
+                        Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                        TopN Hash Memory Usage: 0.1
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                Limit
+                  Number of rows: 10
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- same as above but with aggregate
+  EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 
2004) group by d_id, d_daynuminmonth, d_datekey,
+  d_sellingseason order by d_datekey limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- same as above but with aggregate
+  EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 
2004) group by d_id, d_daynuminmonth, d_datekey,
+  d_sellingseason order by d_datekey limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dates_removal_n0
+                  filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: (d_year) IN (1985, 2004) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_datekey (type: bigint), d_id (type: 
bigint)
+                      outputColumnNames: d_datekey, d_id
+                      Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
+                      Top N Key Operator
+                        sort order: ++
+                        keys: d_datekey (type: bigint), d_id (type: bigint)
+                        Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
+                        top n: 10
+                        Group By Operator
+                          aggregations: count()
+                          keys: d_datekey (type: bigint), d_id (type: bigint)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            key expressions: _col0 (type: bigint), _col1 
(type: bigint)
+                            sort order: ++
+                            Map-reduce partition columns: _col0 (type: 
bigint), _col1 (type: bigint)
+                            Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
+                            TopN Hash Memory Usage: 0.1
+                            value expressions: _col2 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col2 (type: bigint), _col0 (type: bigint)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col1 (type: bigint)
+                    sort order: +
+                    Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
+                    TopN Hash Memory Usage: 0.1
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+                Limit
+                  Number of rows: 10
+                  Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 20 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- join
+  insert into dates_removal_n0(d_datekey, d_id)  values(3, 0)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dates_removal_n0
+POSTHOOK: query: -- join
+  insert into dates_removal_n0(d_datekey, d_id)  values(3, 0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dates_removal_n0
+POSTHOOK: Lineage: dates_removal_n0.d_date SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_datekey SCRIPT []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminmonth SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminweek SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_dayofweek SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_holidayfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_id SCRIPT []
+POSTHOOK: Lineage: dates_removal_n0.d_lastdayinmonthfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_lastdayinweekfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_month SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_monthnuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_sellingseason SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_weekdayfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_weeknuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_year SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_yearmonth SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_yearmonthnum SIMPLE []
+PREHOOK: query: insert into dates_removal_n0(d_datekey, d_id)  values(3, 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dates_removal_n0
+POSTHOOK: query: insert into dates_removal_n0(d_datekey, d_id)  values(3, 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dates_removal_n0
+POSTHOOK: Lineage: dates_removal_n0.d_date SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_datekey SCRIPT []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminmonth SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminweek SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_dayofweek SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_holidayfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_id SCRIPT []
+POSTHOOK: Lineage: dates_removal_n0.d_lastdayinmonthfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_lastdayinweekfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_month SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_monthnuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_sellingseason SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_weekdayfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_weeknuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_year SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_yearmonth SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_yearmonthnum SIMPLE []
+PREHOOK: query: insert into customer_removal_n0 (c_custkey) values(3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@customer_removal_n0
+POSTHOOK: query: insert into customer_removal_n0 (c_custkey) values(3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@customer_removal_n0
+POSTHOOK: Lineage: customer_removal_n0.c_address SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_city SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_custkey SCRIPT []
+POSTHOOK: Lineage: customer_removal_n0.c_mktsegment SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_name SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_nation SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_phone SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_region SIMPLE []
+PREHOOK: query: EXPLAIN SELECT d_datekey from dates_removal_n0 join 
customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT d_datekey from dates_removal_n0 join 
customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dates_removal_n0
+                  Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: d_datekey (type: bigint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 2 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 2 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: customer_removal_n0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: c_custkey (type: bigint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: bigint)
+                  1 _col0 (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT d_datekey from dates_removal_n0 join 
customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT d_datekey from dates_removal_n0 join 
customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+3
+3
+PREHOOK: query: -- group by keys are not primary keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) 
group by d_datekey, d_sellingseason
+    order by d_datekey limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- group by keys are not primary keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) 
group by d_datekey, d_sellingseason
+    order by d_datekey limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dates_removal_n0
+                  filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+                  Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (d_year) IN (1985, 2004) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: d_datekey (type: bigint), d_sellingseason 
(type: string)
+                      outputColumnNames: d_datekey, d_sellingseason
+                      Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Top N Key Operator
+                        sort order: ++
+                        keys: d_datekey (type: bigint), d_sellingseason (type: 
string)
+                        Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        top n: 10
+                        Group By Operator
+                          keys: d_datekey (type: bigint), d_sellingseason 
(type: string)
+                          mode: hash
+                          outputColumnNames: _col0, _col1
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            key expressions: _col0 (type: bigint), _col1 
(type: string)
+                            sort order: ++
+                            Map-reduce partition columns: _col0 (type: 
bigint), _col1 (type: string)
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                            TopN Hash Memory Usage: 0.1
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: bigint), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: bigint)
+                    sort order: +
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    TopN Hash Memory Usage: 0.1
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Limit
+                  Number of rows: 10
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- negative
+  -- with aggregate function
+  EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN 
('USA', 'INDIA')
+    group by c_custkey, c_nation
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- negative
+  -- with aggregate function
+  EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN 
('USA', 'INDIA')
+    group by c_custkey, c_nation
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: customer_removal_n0
+                  filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+                  Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+                    Statistics: Num rows: 1 Data size: 92 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: c_custkey (type: bigint)
+                      outputColumnNames: c_custkey
+                      Statistics: Num rows: 1 Data size: 92 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: count()
+                        keys: c_custkey (type: bigint)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 1 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: bigint)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: bigint)
+                          Statistics: Num rows: 1 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: DROP TABLE customer_removal_n0
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Output: default@customer_removal_n0
+POSTHOOK: query: DROP TABLE customer_removal_n0
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Output: default@customer_removal_n0
+PREHOOK: query: DROP TABLE dates_removal_n0
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dates_removal_n0
+PREHOOK: Output: default@dates_removal_n0
+POSTHOOK: query: DROP TABLE dates_removal_n0
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dates_removal_n0
+POSTHOOK: Output: default@dates_removal_n0
+PREHOOK: query: -- group by reduction optimization
+  create table dest_g21 (key1 int, value1 double, primary key(key1) disable 
rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_g21
+POSTHOOK: query: -- group by reduction optimization
+  create table dest_g21 (key1 int, value1 double, primary key(key1) disable 
rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest_g21
+PREHOOK: query: insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, 
null), (6, null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dest_g21
+POSTHOOK: query: insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, 
null), (6, null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dest_g21
+POSTHOOK: Lineage: dest_g21.key1 SCRIPT []
+POSTHOOK: Lineage: dest_g21.value1 SCRIPT []
+PREHOOK: query: -- value1 will removed because it is unused, then whole group 
by will be removed because key1 is unique
+  explain select key1 from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- value1 will removed because it is unused, then whole group 
by will be removed because key1 is unique
+  explain select key1 from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: dest_g21
+          Select Operator
+            expressions: key1 (type: int)
+            outputColumnNames: _col0
+            ListSink
+
+PREHOOK: query: select key1 from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1 from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+2
+3
+4
+5
+6
+PREHOOK: query: -- same query but with filter
+  explain select key1 from dest_g21 where value1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- same query but with filter
+  explain select key1 from dest_g21 where value1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: dest_g21
+          filterExpr: (value1 > 1.0D) (type: boolean)
+          Filter Operator
+            predicate: (value1 > 1.0D) (type: boolean)
+            Select Operator
+              expressions: key1 (type: int)
+              outputColumnNames: _col0
+              ListSink
+
+PREHOOK: query: select key1 from dest_g21 where value1 > 1 group by key1, 
value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1 from dest_g21 where value1 > 1 group by key1, 
value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+2
+4
+PREHOOK: query: explain select key1 from dest_g21 where key1 > 1 group by 
key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: explain select key1 from dest_g21 where key1 > 1 group by 
key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: dest_g21
+          filterExpr: (key1 > 1) (type: boolean)
+          Filter Operator
+            predicate: (key1 > 1) (type: boolean)
+            Select Operator
+              expressions: key1 (type: int)
+              outputColumnNames: _col0
+              ListSink
+
+PREHOOK: query: select key1 from dest_g21 where key1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1 from dest_g21 where key1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+2
+3
+4
+5
+6
+PREHOOK: query: -- only value1 will be removed because there is aggregate call
+  explain select count(key1) from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- only value1 will be removed because there is aggregate call
+  explain select count(key1) from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dest_g21
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: key1 (type: int)
+                    outputColumnNames: key1
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count()
+                      keys: key1 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 3 Data size: 36 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 3 Data size: 36 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(key1) from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select count(key1) from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+1
+1
+1
+1
+1
+PREHOOK: query: explain select count(key1) from dest_g21 where value1 > 1 
group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: explain select count(key1) from dest_g21 where value1 > 1 
group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dest_g21
+                  filterExpr: (value1 > 1.0D) (type: boolean)
+                  Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (value1 > 1.0D) (type: boolean)
+                    Statistics: Num rows: 6 Data size: 64 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key1 (type: int)
+                      outputColumnNames: key1
+                      Statistics: Num rows: 6 Data size: 64 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: count()
+                        keys: key1 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 3 Data size: 36 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 3 Data size: 36 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(key1) from dest_g21 where value1 > 1 group by 
key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select count(key1) from dest_g21 where value1 > 1 group by 
key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+1
+1
+PREHOOK: query: -- t1.key is unique even after join therefore group by = group 
by (t1.key)
+  explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = 
t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- t1.key is unique even after join therefore group by = 
group by (t1.key)
+  explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = 
t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: key1 (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: t2
+                  filterExpr: (value1 > 2.0D) (type: boolean)
+                  Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (value1 > 2.0D) (type: boolean)
+                    Statistics: Num rows: 2 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key1 (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 2 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = 
t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = 
t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+4
+PREHOOK: query: explain select count(t1.key1) from dest_g21 t1 join dest_g21 
t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: explain select count(t1.key1) from dest_g21 t1 join dest_g21 
t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: key1 (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: t2
+                  filterExpr: (value1 > 2.0D) (type: boolean)
+                  Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (value1 > 2.0D) (type: boolean)
+                    Statistics: Num rows: 2 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key1 (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 2 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count()
+                  keys: _col0 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on 
t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on 
t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+PREHOOK: query: -- both aggregate and one of the key1 should be removed
+  explain select key1 from (select key1, count(key1) from dest_g21 where 
value1 < 4.5 group by key1, value1) sub
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- both aggregate and one of the key1 should be removed
+  explain select key1 from (select key1, count(key1) from dest_g21 where 
value1 < 4.5 group by key1, value1) sub
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: dest_g21
+          filterExpr: (value1 < 4.5D) (type: boolean)
+          Filter Operator
+            predicate: (value1 < 4.5D) (type: boolean)
+            Select Operator
+              expressions: key1 (type: int)
+              outputColumnNames: _col0
+              ListSink
+
+PREHOOK: query: select key1 from (select key1, count(key1) from dest_g21 where 
value1 < 4.5 group by key1, value1) sub
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1 from (select key1, count(key1) from dest_g21 
where value1 < 4.5 group by key1, value1) sub
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+2
+3
+4
+PREHOOK: query: -- one of the aggregate will be removed and one of the key1 
will be removed
+  explain select key1, sm from (select key1, count(key1), sum(key1) as sm from 
dest_g21 where value1 < 4.5 group by key1, value1) sub
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- one of the aggregate will be removed and one of the key1 
will be removed
+  explain select key1, sm from (select key1, count(key1), sum(key1) as sm from 
dest_g21 where value1 < 4.5 group by key1, value1) sub
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dest_g21
+                  filterExpr: (value1 < 4.5D) (type: boolean)
+                  Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (value1 < 4.5D) (type: boolean)
+                    Statistics: Num rows: 6 Data size: 64 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key1 (type: int)
+                      outputColumnNames: key1
+                      Statistics: Num rows: 6 Data size: 64 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: sum(key1)
+                        keys: key1 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 3 Data size: 36 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 3 Data size: 36 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key1, sm from (select key1, count(key1), sum(key1) as 
sm from dest_g21 where value1 < 4.5 group by key1, value1) sub
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1, sm from (select key1, count(key1), sum(key1) as 
sm from dest_g21 where value1 < 4.5 group by key1, value1) sub
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1      1
+3      3
+4      4
+2      2
+PREHOOK: query: DROP table dest_g21
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_g21
+PREHOOK: Output: default@dest_g21
+POSTHOOK: query: DROP table dest_g21
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest_g21
+POSTHOOK: Output: default@dest_g21
+PREHOOK: query: CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT 
NULL disable norely, d_year string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tconst
+POSTHOOK: query: CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT 
NULL disable norely, d_year string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tconst
+PREHOOK: query: INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), 
(3, 3, '2010')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tconst
+POSTHOOK: query: INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), 
(3, 3, '2010')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tconst
+POSTHOOK: Lineage: tconst.d_year SCRIPT []
+POSTHOOK: Lineage: tconst.i SCRIPT []
+POSTHOOK: Lineage: tconst.j SCRIPT []
+PREHOOK: query: explain select i, j from tconst where i is not null group by 
i,j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select i, j from tconst where i is not null group by 
i,j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tconst
+                  Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: i (type: int), j (type: int), d_year (type: 
string)
+                    outputColumnNames: i, j, d_year
+                    Statistics: Num rows: 3 Data size: 288 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: i (type: int), j (type: int), d_year (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: string)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: string)
+                        Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 
(type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select i, j from tconst where i is not null group by i,j, 
d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select i, j from tconst where i is not null group by i,j, 
d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+3      3
+2      NULL
+1      1
+PREHOOK: query: explain select i, j from tconst where i IS NOT NULL and j IS 
NOT NULL group by i,j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select i, j from tconst where i IS NOT NULL and j IS 
NOT NULL group by i,j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tconst
+                  filterExpr: j is not null (type: boolean)
+                  Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    predicate: j is not null (type: boolean)
+                    Statistics: Num rows: 2 Data size: 192 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: i (type: int), j (type: int), d_year (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: string)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: string)
+                        Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 
(type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select i, j from tconst where i IS NOT NULL and j IS NOT NULL 
group by i,j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select i, j from tconst where i IS NOT NULL and j IS NOT NULL 
group by i,j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+3      3
+1      1
+PREHOOK: query: explain select i,j from tconst where i is not null OR j IS NOT 
NULL group by i, j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select i,j from tconst where i is not null OR j IS 
NOT NULL group by i, j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tconst
+                  Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: i (type: int), j (type: int), d_year (type: 
string)
+                    outputColumnNames: i, j, d_year
+                    Statistics: Num rows: 3 Data size: 288 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: i (type: int), j (type: int), d_year (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: string)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: int), _col1 
(type: int), _col2 (type: string)
+                        Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 
(type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select i,j from tconst where i is not null OR j IS NOT NULL 
group by i, j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select i,j from tconst where i is not null OR j IS NOT NULL 
group by i, j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+3      3
+2      NULL
+1      1
+PREHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on 
t1.i=t2.j group by t1.i, t1.d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on 
t1.i=t2.j group by t1.i, t1.d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: i (type: int), d_year (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 3 Data size: 276 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 3 Data size: 276 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: t2
+                  filterExpr: j is not null (type: boolean)
+                  Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    predicate: j is not null (type: boolean)
+                    Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    Select Operator
+                      expressions: j (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Group By Operator
+                  aggregations: sum(_col0)
+                  keys: _col0 (type: int), _col1 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: int), _col1 
(type: string)
+                    Statistics: Num rows: 1 Data size: 100 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: _col2 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: int), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j 
group by t1.i, t1.d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j 
group by t1.i, t1.d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+1
+3
+PREHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on 
t1.i=t2.i group by t1.i, t1.d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on 
t1.i=t2.i group by t1.i, t1.d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: i (type: int), d_year (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 3 Data size: 276 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 3 Data size: 276 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: t2
+                  Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: i (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Group By Operator
+                  aggregations: sum(_col0)
+                  keys: _col0 (type: int), _col1 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: int), _col1 
(type: string)
+                    Statistics: Num rows: 1 Data size: 100 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: _col2 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: int), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i 
group by t1.i, t1.d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i 
group by t1.i, t1.d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+1
+2
+3
+PREHOOK: query: DROP TABLE tconst
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tconst
+PREHOOK: Output: default@tconst
+POSTHOOK: query: DROP TABLE tconst
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tconst
+POSTHOOK: Output: default@tconst
+PREHOOK: query: create table dest_g21 (key1 int NOT NULL disable rely, value1 
double, UNIQUE(key1) disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_g21
+POSTHOOK: query: create table dest_g21 (key1 int NOT NULL disable rely, value1 
double, UNIQUE(key1) disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest_g21
+PREHOOK: query: explain select key1 from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: explain select key1 from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: dest_g21
+          Select Operator
+            expressions: key1 (type: int)
+            outputColumnNames: _col0
+            ListSink
+
+PREHOOK: query: create table dest_g24 (key1 int , value1 double, UNIQUE(key1) 
disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_g24
+POSTHOOK: query: create table dest_g24 (key1 int , value1 double, UNIQUE(key1) 
disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest_g24
+PREHOOK: query: explain select key1 from dest_g24 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g24
+#### A masked pattern was here ####
+POSTHOOK: query: explain select key1 from dest_g24 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g24
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dest_g24
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key1 (type: int), value1 (type: double)
+                    outputColumnNames: key1, value1
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: key1 (type: int), value1 (type: double)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: 
double)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: int), _col1 
(type: double)
+                        Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: double)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: DROP TABLE dest_g21
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_g21
+PREHOOK: Output:


<TRUNCATED>

[2/3] hive git commit: HIVE-17043: Remove non unique columns from group by keys if not referenced later (Vineet Garg, reviewed by Jesus Camacho Rodriguez)

Reply via email to