[hive] branch master updated: HIVE-21760: Sharedwork optimization should be bypassed for SMB joins (Vineet Garg, reviewed by Jesus Camacho Rodriguez)

vgarg Tue, 21 May 2019 11:28:09 -0700

This is an automated email from the ASF dual-hosted git repository.

vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 734cc2c  HIVE-21760: Sharedwork optimization should be bypassed for 
SMB joins (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
734cc2c is described below

commit 734cc2c6a80b99b7c692b20b0df7d7d59cbaffd5
Author: Vineet Garg <vg...@apache.org>
AuthorDate: Tue May 21 11:27:19 2019 -0700

    HIVE-21760: Sharedwork optimization should be bypassed for SMB joins 
(Vineet Garg, reviewed by Jesus Camacho Rodriguez)
---
 .../hive/ql/optimizer/SharedWorkOptimizer.java     |  12 +-
 ql/src/test/queries/clientpositive/sharedwork.q    |  30 ++
 .../results/clientpositive/llap/sharedwork.q.out   | 547 +++++++++++++++++++++
 3 files changed, 583 insertions(+), 6 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index f70a6dc..40a0c4e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -1374,21 +1374,21 @@ public class SharedWorkOptimizer extends Transform {
     // 1.1. None of the works that we are merging can contain a Union
     // operator. This is not supported yet as we might end up with cycles in
     // the Tez DAG.
-    // 1.2. There cannot be more than one DummyStore operator in the new 
resulting
-    // work when the operators are merged. This is due to an assumption in
-    // MergeJoinProc that needs to be further explored.
+    // 1.2. There cannot be any DummyStore operator in the works being merged.
+    //  This is due to an assumption in MergeJoinProc that needs to be further 
explored.
+    //  This is also due to some assumption in task generation
     // If any of these conditions are not met, we cannot merge.
     // TODO: Extend rule so it can be applied for these cases.
     final Set<Operator<?>> workOps1 = findWorkOperators(optimizerCache, op1);
     final Set<Operator<?>> workOps2 = findWorkOperators(optimizerCache, op2);
-    boolean foundDummyStoreOp = false;
     for (Operator<?> op : workOps1) {
       if (op instanceof UnionOperator) {
         // We cannot merge (1.1)
         return false;
       }
       if (op instanceof DummyStoreOperator) {
-        foundDummyStoreOp = true;
+        // We cannot merge (1.2)
+        return false;
       }
     }
     for (Operator<?> op : workOps2) {
@@ -1396,7 +1396,7 @@ public class SharedWorkOptimizer extends Transform {
         // We cannot merge (1.1)
         return false;
       }
-      if (foundDummyStoreOp && op instanceof DummyStoreOperator) {
+      if (op instanceof DummyStoreOperator) {
         // We cannot merge (1.2)
         return false;
       }
diff --git a/ql/src/test/queries/clientpositive/sharedwork.q 
b/ql/src/test/queries/clientpositive/sharedwork.q
index 404a29a..ac65c23 100644
--- a/ql/src/test/queries/clientpositive/sharedwork.q
+++ b/ql/src/test/queries/clientpositive/sharedwork.q
@@ -1,3 +1,4 @@
+--! qt:dataset:part
 set hive.optimize.shared.work.extended=false;
 
 create table MY_TABLE_0001 (
@@ -38,3 +39,32 @@ WHERE 1=1
 AND  (cast(Table__323.col_7 AS DOUBLE) IS NOT NULL OR Table__323.col_7 IS NULL)
 AND CAST(Table__323.col_3 AS DATE)  BETWEEN  '2018-07-01'  AND  '2019-01-23'
 AND Table__323.col_20  IN  ('part1','part2','part3');
+
+
+set hive.optimize.shared.work.extended=true;
+explain extended
+SELECT `t`.`p_name`
+FROM (SELECT `p_name`, `p_type`, `p_size` + 1 AS `size`
+FROM `part`) AS `t`
+LEFT JOIN (SELECT `t5`.`size`, `t2`.`c`, `t2`.`ck`
+FROM (SELECT `p_size` + 1 AS `+`, COUNT(*) AS `c`, COUNT(`p_type`) AS `ck`
+FROM `part`
+WHERE `p_size` IS NOT NULL
+GROUP BY `p_size` + 1) AS `t2`
+INNER JOIN (SELECT `p_size` + 1 AS `size`
+FROM `part`
+WHERE `p_size` IS NOT NULL
+GROUP BY `p_size` + 1) AS `t5` ON `t2`.`+` = `t5`.`size`) AS `t6` ON 
`t`.`size` = `t6`.`size`
+LEFT JOIN (SELECT `t9`.`p_type`, `t12`.`size`, TRUE AS `$f2`
+FROM (SELECT `p_type`, `p_size` + 1 AS `+`
+FROM `part`
+WHERE `p_size` IS NOT NULL AND `p_type` IS NOT NULL
+GROUP BY `p_type`, `p_size` + 1) AS `t9`
+INNER JOIN (SELECT `p_size` + 1 AS `size`
+FROM `part`
+WHERE `p_size` IS NOT NULL
+GROUP BY `p_size` + 1) AS `t12` ON `t9`.`+` = `t12`.`size`) AS `t14` ON 
`t`.`p_type` = `t14`.`p_type` AND `t`.`size` = `t14`.`size`
+WHERE (`t14`.`$f2` IS NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL)
+    AND (`t`.`p_type` IS NOT NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL OR 
`t14`.`$f2` IS NOT NULL)
+    AND (`t6`.`ck` < `t6`.`c` IS NOT TRUE OR `t6`.`c` = 0 OR `t6`.`c` IS NULL 
OR `t14`.`$f2` IS NOT NULL
+    OR `t`.`p_type` IS NULL);
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/sharedwork.q.out 
b/ql/src/test/results/clientpositive/llap/sharedwork.q.out
index f46c132..e3bf830 100644
--- a/ql/src/test/results/clientpositive/llap/sharedwork.q.out
+++ b/ql/src/test/results/clientpositive/llap/sharedwork.q.out
@@ -575,3 +575,550 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: explain extended
+SELECT `t`.`p_name`
+FROM (SELECT `p_name`, `p_type`, `p_size` + 1 AS `size`
+FROM `part`) AS `t`
+LEFT JOIN (SELECT `t5`.`size`, `t2`.`c`, `t2`.`ck`
+FROM (SELECT `p_size` + 1 AS `+`, COUNT(*) AS `c`, COUNT(`p_type`) AS `ck`
+FROM `part`
+WHERE `p_size` IS NOT NULL
+GROUP BY `p_size` + 1) AS `t2`
+INNER JOIN (SELECT `p_size` + 1 AS `size`
+FROM `part`
+WHERE `p_size` IS NOT NULL
+GROUP BY `p_size` + 1) AS `t5` ON `t2`.`+` = `t5`.`size`) AS `t6` ON 
`t`.`size` = `t6`.`size`
+LEFT JOIN (SELECT `t9`.`p_type`, `t12`.`size`, TRUE AS `$f2`
+FROM (SELECT `p_type`, `p_size` + 1 AS `+`
+FROM `part`
+WHERE `p_size` IS NOT NULL AND `p_type` IS NOT NULL
+GROUP BY `p_type`, `p_size` + 1) AS `t9`
+INNER JOIN (SELECT `p_size` + 1 AS `size`
+FROM `part`
+WHERE `p_size` IS NOT NULL
+GROUP BY `p_size` + 1) AS `t12` ON `t9`.`+` = `t12`.`size`) AS `t14` ON 
`t`.`p_type` = `t14`.`p_type` AND `t`.`size` = `t14`.`size`
+WHERE (`t14`.`$f2` IS NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL)
+    AND (`t`.`p_type` IS NOT NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL OR 
`t14`.`$f2` IS NOT NULL)
+    AND (`t6`.`ck` < `t6`.`c` IS NOT TRUE OR `t6`.`c` = 0 OR `t6`.`c` IS NULL 
OR `t14`.`$f2` IS NOT NULL
+    OR `t`.`p_type` IS NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+SELECT `t`.`p_name`
+FROM (SELECT `p_name`, `p_type`, `p_size` + 1 AS `size`
+FROM `part`) AS `t`
+LEFT JOIN (SELECT `t5`.`size`, `t2`.`c`, `t2`.`ck`
+FROM (SELECT `p_size` + 1 AS `+`, COUNT(*) AS `c`, COUNT(`p_type`) AS `ck`
+FROM `part`
+WHERE `p_size` IS NOT NULL
+GROUP BY `p_size` + 1) AS `t2`
+INNER JOIN (SELECT `p_size` + 1 AS `size`
+FROM `part`
+WHERE `p_size` IS NOT NULL
+GROUP BY `p_size` + 1) AS `t5` ON `t2`.`+` = `t5`.`size`) AS `t6` ON 
`t`.`size` = `t6`.`size`
+LEFT JOIN (SELECT `t9`.`p_type`, `t12`.`size`, TRUE AS `$f2`
+FROM (SELECT `p_type`, `p_size` + 1 AS `+`
+FROM `part`
+WHERE `p_size` IS NOT NULL AND `p_type` IS NOT NULL
+GROUP BY `p_type`, `p_size` + 1) AS `t9`
+INNER JOIN (SELECT `p_size` + 1 AS `size`
+FROM `part`
+WHERE `p_size` IS NOT NULL
+GROUP BY `p_size` + 1) AS `t12` ON `t9`.`+` = `t12`.`size`) AS `t14` ON 
`t`.`p_type` = `t14`.`p_type` AND `t`.`size` = `t14`.`size`
+WHERE (`t14`.`$f2` IS NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL)
+    AND (`t`.`p_type` IS NOT NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL OR 
`t14`.`$f2` IS NOT NULL)
+    AND (`t6`.`ck` < `t6`.`c` IS NOT TRUE OR `t6`.`c` = 0 OR `t6`.`c` IS NULL 
OR `t14`.`$f2` IS NOT NULL
+    OR `t`.`p_type` IS NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `p_name`
+FROM (SELECT `part`.`p_name`, `part`.`p_type`, `part`.`p_size` + 1 AS `size`, 
`t5`.`size` AS `size0`, `t5`.`c`, `t5`.`ck`, `t12`.`p_type` AS `p_type0`, 
`t12`.`size` AS `size1`, `t12`.`$f2`
+FROM `default`.`part`
+LEFT JOIN (SELECT `t4`.`$f0` AS `size`, `t1`.`$f1` AS `c`, `t1`.`$f2` AS `ck`
+FROM (SELECT `p_size` + 1 AS `$f0`, COUNT(*) AS `$f1`, COUNT(`p_type`) AS `$f2`
+FROM `default`.`part`
+WHERE `p_size` IS NOT NULL
+GROUP BY `p_size` + 1) AS `t1`
+INNER JOIN (SELECT `p_size` + 1 AS `$f0`
+FROM `default`.`part`
+WHERE `p_size` IS NOT NULL
+GROUP BY `p_size` + 1) AS `t4` ON `t1`.`$f0` = `t4`.`$f0`) AS `t5` ON 
`part`.`p_size` + 1 = `t5`.`size`
+LEFT JOIN (SELECT `t8`.`$f0` AS `p_type`, `t11`.`$f0` AS `size`, TRUE AS `$f2`
+FROM (SELECT `p_type` AS `$f0`, `p_size` + 1 AS `$f1`
+FROM `default`.`part`
+WHERE `p_size` IS NOT NULL AND `p_type` IS NOT NULL
+GROUP BY `p_type`, `p_size` + 1) AS `t8`
+INNER JOIN (SELECT `p_size` + 1 AS `$f0`
+FROM `default`.`part`
+WHERE `p_size` IS NOT NULL
+GROUP BY `p_size` + 1) AS `t11` ON `t8`.`$f1` = `t11`.`$f0`) AS `t12` ON 
`part`.`p_type` = `t12`.`p_type` AND `part`.`p_size` + 1 = `t12`.`size`) AS 
`t13`
+WHERE (`t13`.`$f2` IS NULL OR `t13`.`c` = 0 OR `t13`.`c` IS NULL) AND 
(`t13`.`p_type` IS NOT NULL OR `t13`.`c` = 0 OR `t13`.`c` IS NULL OR 
`t13`.`$f2` IS NOT NULL) AND (`t13`.`ck` < (`t13`.`c` IS NOT TRUE) OR `t13`.`c` 
= 0 OR `t13`.`c` IS NULL OR `t13`.`$f2` IS NOT NULL OR `t13`.`p_type` IS NULL)
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+        Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+        Reducer 5 <- Map 1 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE)
+        Reducer 9 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  filterExpr: (p_size is not null or (p_size is not null and 
p_type is not null)) (type: boolean)
+                  Statistics: Num rows: 26 Data size: 2808 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  GatherStats: false
+                  Filter Operator
+                    isSamplingPred: false
+                    predicate: p_size is not null (type: boolean)
+                    Statistics: Num rows: 26 Data size: 2808 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: (p_size + 1) (type: int), p_type (type: 
string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 26 Data size: 2808 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: count(), count(_col1)
+                        keys: _col0 (type: int)
+                        minReductionHashAggr: 0.0
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2
+                        Statistics: Num rows: 13 Data size: 260 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          null sort order: a
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 13 Data size: 260 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          tag: -1
+                          value expressions: _col1 (type: bigint), _col2 
(type: bigint)
+                          auto parallelism: true
+                  Filter Operator
+                    isSamplingPred: false
+                    predicate: (p_size is not null and p_type is not null) 
(type: boolean)
+                    Statistics: Num rows: 26 Data size: 2808 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: p_type (type: string), (p_size + 1) (type: 
int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 26 Data size: 2808 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        keys: _col1 (type: int), _col0 (type: string)
+                        minReductionHashAggr: 0.0
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 13 Data size: 1404 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int), _col1 (type: 
string)
+                          null sort order: aa
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: int), 
_col1 (type: string)
+                          Statistics: Num rows: 13 Data size: 1404 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          tag: -1
+                          auto parallelism: true
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: part
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}}
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns 
p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
+                    columns.comments 
+                    columns.types 
int:string:string:string:string:int:string:double:string
+#### A masked pattern was here ####
+                    name default.part
+                    numFiles 1
+                    numRows 26
+                    rawDataSize 3147
+                    serialization.ddl struct part { i32 p_partkey, string 
p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string 
p_container, double p_retailprice, string p_comment}
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 3173
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}}
+                      bucket_count -1
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns 
p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
+                      columns.comments 
+                      columns.types 
int:string:string:string:string:int:string:double:string
+#### A masked pattern was here ####
+                      name default.part
+                      numFiles 1
+                      numRows 26
+                      rawDataSize 3147
+                      serialization.ddl struct part { i32 p_partkey, string 
p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string 
p_container, double p_retailprice, string p_comment}
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      totalSize 3173
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.part
+                  name: default.part
+            Truncated Path -> Alias:
+              /part [part]
+        Map 10 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 5954 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  GatherStats: false
+                  Reduce Output Operator
+                    key expressions: (p_size + 1) (type: int)
+                    null sort order: a
+                    sort order: +
+                    Map-reduce partition columns: (p_size + 1) (type: int)
+                    Statistics: Num rows: 26 Data size: 5954 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    tag: 0
+                    value expressions: p_name (type: string), p_type (type: 
string), p_size (type: int)
+                    auto parallelism: true
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: part
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}}
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns 
p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
+                    columns.comments 
+                    columns.types 
int:string:string:string:string:int:string:double:string
+#### A masked pattern was here ####
+                    name default.part
+                    numFiles 1
+                    numRows 26
+                    rawDataSize 3147
+                    serialization.ddl struct part { i32 p_partkey, string 
p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string 
p_container, double p_retailprice, string p_comment}
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 3173
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}}
+                      bucket_count -1
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns 
p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
+                      columns.comments 
+                      columns.types 
int:string:string:string:string:int:string:double:string
+#### A masked pattern was here ####
+                      name default.part
+                      numFiles 1
+                      numRows 26
+                      rawDataSize 3147
+                      serialization.ddl struct part { i32 p_partkey, string 
p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string 
p_container, double p_retailprice, string p_comment}
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      totalSize 3173
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.part
+                  name: default.part
+            Truncated Path -> Alias:
+              /part [part]
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  filterExpr: p_size is not null (type: boolean)
+                  Statistics: Num rows: 26 Data size: 104 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  GatherStats: false
+                  Filter Operator
+                    isSamplingPred: false
+                    predicate: p_size is not null (type: boolean)
+                    Statistics: Num rows: 26 Data size: 104 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: (p_size + 1) (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 26 Data size: 104 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        keys: _col0 (type: int)
+                        minReductionHashAggr: 0.0
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 13 Data size: 52 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          null sort order: a
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 13 Data size: 52 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          tag: -1
+                          auto parallelism: true
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          null sort order: a
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 13 Data size: 52 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          tag: -1
+                          auto parallelism: false
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: part
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}}
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns 
p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
+                    columns.comments 
+                    columns.types 
int:string:string:string:string:int:string:double:string
+#### A masked pattern was here ####
+                    name default.part
+                    numFiles 1
+                    numRows 26
+                    rawDataSize 3147
+                    serialization.ddl struct part { i32 p_partkey, string 
p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string 
p_container, double p_retailprice, string p_comment}
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 3173
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}}
+                      bucket_count -1
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns 
p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
+                      columns.comments 
+                      columns.types 
int:string:string:string:string:int:string:double:string
+#### A masked pattern was here ####
+                      name default.part
+                      numFiles 1
+                      numRows 26
+                      rawDataSize 3147
+                      serialization.ddl struct part { i32 p_partkey, string 
p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string 
p_container, double p_retailprice, string p_comment}
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      totalSize 3173
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.part
+                  name: default.part
+            Truncated Path -> Alias:
+              /part [part]
+        Reducer 2 
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Execution mode: llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), count(VALUE._col1)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Merge Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: int)
+                    1 _col0 (type: int)
+                  outputColumnNames: _col1, _col2, _col3
+                  Position of Big Table: 0
+                  Statistics: Num rows: 13 Data size: 260 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col3 (type: int), _col1 (type: bigint), 
_col2 (type: bigint)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 13 Data size: 260 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      null sort order: a
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 13 Data size: 260 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      tag: 1
+                      value expressions: _col1 (type: bigint), _col2 (type: 
bigint)
+                      auto parallelism: true
+        Reducer 3 
+            Execution mode: llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 (p_size + 1) (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col4, _col5, _col13, _col14
+                Position of Big Table: 0
+                Statistics: Num rows: 32 Data size: 7600 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col4 (type: string), (_col5 + 1) (type: 
int)
+                  null sort order: aa
+                  sort order: ++
+                  Map-reduce partition columns: _col4 (type: string), (_col5 + 
1) (type: int)
+                  Statistics: Num rows: 32 Data size: 7600 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  tag: 0
+                  value expressions: _col1 (type: string), _col13 (type: 
bigint), _col14 (type: bigint)
+                  auto parallelism: true
+        Reducer 4 
+            Execution mode: llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col4 (type: string), (_col5 + 1) (type: int)
+                  1 _col0 (type: string), _col1 (type: int)
+                outputColumnNames: _col1, _col4, _col13, _col14, _col17
+                Position of Big Table: 0
+                Statistics: Num rows: 39 Data size: 9231 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: string), _col4 (type: string), 
_col13 (type: bigint), _col14 (type: bigint), _col17 (type: boolean)
+                  outputColumnNames: _col0, _col1, _col4, _col5, _col8
+                  Statistics: Num rows: 39 Data size: 9387 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    isSamplingPred: false
+                    predicate: (((_col5 < _col4 is not true) or (_col4 = 0L) 
or _col4 is null or _col8 is not null or _col1 is null) and (_col1 is not null 
or (_col4 = 0L) or _col4 is null or _col8 is not null) and (_col8 is null or 
(_col4 = 0L) or _col4 is null)) (type: boolean)
+                    Statistics: Num rows: 39 Data size: 9387 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: _col0 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 39 Data size: 4719 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        GlobalTableId: 0
+#### A masked pattern was here ####
+                        NumFilesPerFileSink: 1
+                        Statistics: Num rows: 39 Data size: 4719 Basic stats: 
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            properties:
+                              columns _col0
+                              columns.types string
+                              escape.delim \
+                              
hive.serialization.extend.additional.nesting.levels true
+                              serialization.escape.crlf true
+                              serialization.format 1
+                              serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        TotalFiles: 1
+                        GatherStats: false
+                        MultiFileSpray: false
+        Reducer 5 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: string), _col0 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 13 Data size: 1404 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col1 (type: int)
+                    null sort order: a
+                    sort order: +
+                    Map-reduce partition columns: _col1 (type: int)
+                    Statistics: Num rows: 13 Data size: 1404 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    tag: 0
+                    value expressions: _col0 (type: string)
+                    auto parallelism: false
+        Reducer 6 
+            Execution mode: llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col2
+                Position of Big Table: 0
+                Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: int), true 
(type: boolean)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 13 Data size: 1456 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: int)
+                    null sort order: aa
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: int)
+                    Statistics: Num rows: 13 Data size: 1456 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    tag: 1
+                    value expressions: _col2 (type: boolean)
+                    auto parallelism: true
+        Reducer 9 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  null sort order: a
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  tag: 1
+                  auto parallelism: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

[hive] branch master updated: HIVE-21760: Sharedwork optimization should be bypassed for SMB joins (Vineet Garg, reviewed by Jesus Camacho Rodriguez)

Reply via email to