This is an automated email from the ASF dual-hosted git repository. vgarg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 734cc2c HIVE-21760: Sharedwork optimization should be bypassed for SMB joins (Vineet Garg, reviewed by Jesus Camacho Rodriguez) 734cc2c is described below commit 734cc2c6a80b99b7c692b20b0df7d7d59cbaffd5 Author: Vineet Garg <vg...@apache.org> AuthorDate: Tue May 21 11:27:19 2019 -0700 HIVE-21760: Sharedwork optimization should be bypassed for SMB joins (Vineet Garg, reviewed by Jesus Camacho Rodriguez) --- .../hive/ql/optimizer/SharedWorkOptimizer.java | 12 +- ql/src/test/queries/clientpositive/sharedwork.q | 30 ++ .../results/clientpositive/llap/sharedwork.q.out | 547 +++++++++++++++++++++ 3 files changed, 583 insertions(+), 6 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java index f70a6dc..40a0c4e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java @@ -1374,21 +1374,21 @@ public class SharedWorkOptimizer extends Transform { // 1.1. None of the works that we are merging can contain a Union // operator. This is not supported yet as we might end up with cycles in // the Tez DAG. - // 1.2. There cannot be more than one DummyStore operator in the new resulting - // work when the operators are merged. This is due to an assumption in - // MergeJoinProc that needs to be further explored. + // 1.2. There cannot be any DummyStore operator in the works being merged. + // This is due to an assumption in MergeJoinProc that needs to be further explored. + // This is also due to some assumption in task generation // If any of these conditions are not met, we cannot merge. // TODO: Extend rule so it can be applied for these cases. final Set<Operator<?>> workOps1 = findWorkOperators(optimizerCache, op1); final Set<Operator<?>> workOps2 = findWorkOperators(optimizerCache, op2); - boolean foundDummyStoreOp = false; for (Operator<?> op : workOps1) { if (op instanceof UnionOperator) { // We cannot merge (1.1) return false; } if (op instanceof DummyStoreOperator) { - foundDummyStoreOp = true; + // We cannot merge (1.2) + return false; } } for (Operator<?> op : workOps2) { @@ -1396,7 +1396,7 @@ public class SharedWorkOptimizer extends Transform { // We cannot merge (1.1) return false; } - if (foundDummyStoreOp && op instanceof DummyStoreOperator) { + if (op instanceof DummyStoreOperator) { // We cannot merge (1.2) return false; } diff --git a/ql/src/test/queries/clientpositive/sharedwork.q b/ql/src/test/queries/clientpositive/sharedwork.q index 404a29a..ac65c23 100644 --- a/ql/src/test/queries/clientpositive/sharedwork.q +++ b/ql/src/test/queries/clientpositive/sharedwork.q @@ -1,3 +1,4 @@ +--! qt:dataset:part set hive.optimize.shared.work.extended=false; create table MY_TABLE_0001 ( @@ -38,3 +39,32 @@ WHERE 1=1 AND (cast(Table__323.col_7 AS DOUBLE) IS NOT NULL OR Table__323.col_7 IS NULL) AND CAST(Table__323.col_3 AS DATE) BETWEEN '2018-07-01' AND '2019-01-23' AND Table__323.col_20 IN ('part1','part2','part3'); + + +set hive.optimize.shared.work.extended=true; +explain extended +SELECT `t`.`p_name` +FROM (SELECT `p_name`, `p_type`, `p_size` + 1 AS `size` +FROM `part`) AS `t` +LEFT JOIN (SELECT `t5`.`size`, `t2`.`c`, `t2`.`ck` +FROM (SELECT `p_size` + 1 AS `+`, COUNT(*) AS `c`, COUNT(`p_type`) AS `ck` +FROM `part` +WHERE `p_size` IS NOT NULL +GROUP BY `p_size` + 1) AS `t2` +INNER JOIN (SELECT `p_size` + 1 AS `size` +FROM `part` +WHERE `p_size` IS NOT NULL +GROUP BY `p_size` + 1) AS `t5` ON `t2`.`+` = `t5`.`size`) AS `t6` ON `t`.`size` = `t6`.`size` +LEFT JOIN (SELECT `t9`.`p_type`, `t12`.`size`, TRUE AS `$f2` +FROM (SELECT `p_type`, `p_size` + 1 AS `+` +FROM `part` +WHERE `p_size` IS NOT NULL AND `p_type` IS NOT NULL +GROUP BY `p_type`, `p_size` + 1) AS `t9` +INNER JOIN (SELECT `p_size` + 1 AS `size` +FROM `part` +WHERE `p_size` IS NOT NULL +GROUP BY `p_size` + 1) AS `t12` ON `t9`.`+` = `t12`.`size`) AS `t14` ON `t`.`p_type` = `t14`.`p_type` AND `t`.`size` = `t14`.`size` +WHERE (`t14`.`$f2` IS NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL) + AND (`t`.`p_type` IS NOT NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL OR `t14`.`$f2` IS NOT NULL) + AND (`t6`.`ck` < `t6`.`c` IS NOT TRUE OR `t6`.`c` = 0 OR `t6`.`c` IS NULL OR `t14`.`$f2` IS NOT NULL + OR `t`.`p_type` IS NULL); \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/sharedwork.q.out b/ql/src/test/results/clientpositive/llap/sharedwork.q.out index f46c132..e3bf830 100644 --- a/ql/src/test/results/clientpositive/llap/sharedwork.q.out +++ b/ql/src/test/results/clientpositive/llap/sharedwork.q.out @@ -575,3 +575,550 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain extended +SELECT `t`.`p_name` +FROM (SELECT `p_name`, `p_type`, `p_size` + 1 AS `size` +FROM `part`) AS `t` +LEFT JOIN (SELECT `t5`.`size`, `t2`.`c`, `t2`.`ck` +FROM (SELECT `p_size` + 1 AS `+`, COUNT(*) AS `c`, COUNT(`p_type`) AS `ck` +FROM `part` +WHERE `p_size` IS NOT NULL +GROUP BY `p_size` + 1) AS `t2` +INNER JOIN (SELECT `p_size` + 1 AS `size` +FROM `part` +WHERE `p_size` IS NOT NULL +GROUP BY `p_size` + 1) AS `t5` ON `t2`.`+` = `t5`.`size`) AS `t6` ON `t`.`size` = `t6`.`size` +LEFT JOIN (SELECT `t9`.`p_type`, `t12`.`size`, TRUE AS `$f2` +FROM (SELECT `p_type`, `p_size` + 1 AS `+` +FROM `part` +WHERE `p_size` IS NOT NULL AND `p_type` IS NOT NULL +GROUP BY `p_type`, `p_size` + 1) AS `t9` +INNER JOIN (SELECT `p_size` + 1 AS `size` +FROM `part` +WHERE `p_size` IS NOT NULL +GROUP BY `p_size` + 1) AS `t12` ON `t9`.`+` = `t12`.`size`) AS `t14` ON `t`.`p_type` = `t14`.`p_type` AND `t`.`size` = `t14`.`size` +WHERE (`t14`.`$f2` IS NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL) + AND (`t`.`p_type` IS NOT NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL OR `t14`.`$f2` IS NOT NULL) + AND (`t6`.`ck` < `t6`.`c` IS NOT TRUE OR `t6`.`c` = 0 OR `t6`.`c` IS NULL OR `t14`.`$f2` IS NOT NULL + OR `t`.`p_type` IS NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain extended +SELECT `t`.`p_name` +FROM (SELECT `p_name`, `p_type`, `p_size` + 1 AS `size` +FROM `part`) AS `t` +LEFT JOIN (SELECT `t5`.`size`, `t2`.`c`, `t2`.`ck` +FROM (SELECT `p_size` + 1 AS `+`, COUNT(*) AS `c`, COUNT(`p_type`) AS `ck` +FROM `part` +WHERE `p_size` IS NOT NULL +GROUP BY `p_size` + 1) AS `t2` +INNER JOIN (SELECT `p_size` + 1 AS `size` +FROM `part` +WHERE `p_size` IS NOT NULL +GROUP BY `p_size` + 1) AS `t5` ON `t2`.`+` = `t5`.`size`) AS `t6` ON `t`.`size` = `t6`.`size` +LEFT JOIN (SELECT `t9`.`p_type`, `t12`.`size`, TRUE AS `$f2` +FROM (SELECT `p_type`, `p_size` + 1 AS `+` +FROM `part` +WHERE `p_size` IS NOT NULL AND `p_type` IS NOT NULL +GROUP BY `p_type`, `p_size` + 1) AS `t9` +INNER JOIN (SELECT `p_size` + 1 AS `size` +FROM `part` +WHERE `p_size` IS NOT NULL +GROUP BY `p_size` + 1) AS `t12` ON `t9`.`+` = `t12`.`size`) AS `t14` ON `t`.`p_type` = `t14`.`p_type` AND `t`.`size` = `t14`.`size` +WHERE (`t14`.`$f2` IS NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL) + AND (`t`.`p_type` IS NOT NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL OR `t14`.`$f2` IS NOT NULL) + AND (`t6`.`ck` < `t6`.`c` IS NOT TRUE OR `t6`.`c` = 0 OR `t6`.`c` IS NULL OR `t14`.`$f2` IS NOT NULL + OR `t`.`p_type` IS NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT `p_name` +FROM (SELECT `part`.`p_name`, `part`.`p_type`, `part`.`p_size` + 1 AS `size`, `t5`.`size` AS `size0`, `t5`.`c`, `t5`.`ck`, `t12`.`p_type` AS `p_type0`, `t12`.`size` AS `size1`, `t12`.`$f2` +FROM `default`.`part` +LEFT JOIN (SELECT `t4`.`$f0` AS `size`, `t1`.`$f1` AS `c`, `t1`.`$f2` AS `ck` +FROM (SELECT `p_size` + 1 AS `$f0`, COUNT(*) AS `$f1`, COUNT(`p_type`) AS `$f2` +FROM `default`.`part` +WHERE `p_size` IS NOT NULL +GROUP BY `p_size` + 1) AS `t1` +INNER JOIN (SELECT `p_size` + 1 AS `$f0` +FROM `default`.`part` +WHERE `p_size` IS NOT NULL +GROUP BY `p_size` + 1) AS `t4` ON `t1`.`$f0` = `t4`.`$f0`) AS `t5` ON `part`.`p_size` + 1 = `t5`.`size` +LEFT JOIN (SELECT `t8`.`$f0` AS `p_type`, `t11`.`$f0` AS `size`, TRUE AS `$f2` +FROM (SELECT `p_type` AS `$f0`, `p_size` + 1 AS `$f1` +FROM `default`.`part` +WHERE `p_size` IS NOT NULL AND `p_type` IS NOT NULL +GROUP BY `p_type`, `p_size` + 1) AS `t8` +INNER JOIN (SELECT `p_size` + 1 AS `$f0` +FROM `default`.`part` +WHERE `p_size` IS NOT NULL +GROUP BY `p_size` + 1) AS `t11` ON `t8`.`$f1` = `t11`.`$f0`) AS `t12` ON `part`.`p_type` = `t12`.`p_type` AND `part`.`p_size` + 1 = `t12`.`size`) AS `t13` +WHERE (`t13`.`$f2` IS NULL OR `t13`.`c` = 0 OR `t13`.`c` IS NULL) AND (`t13`.`p_type` IS NOT NULL OR `t13`.`c` = 0 OR `t13`.`c` IS NULL OR `t13`.`$f2` IS NOT NULL) AND (`t13`.`ck` < (`t13`.`c` IS NOT TRUE) OR `t13`.`c` = 0 OR `t13`.`c` IS NULL OR `t13`.`$f2` IS NOT NULL OR `t13`.`p_type` IS NULL) +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) + Reducer 9 <- Map 7 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + filterExpr: (p_size is not null or (p_size is not null and p_type is not null)) (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (p_size + 1) (type: int), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col1) + keys: _col0 (type: int) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: bigint), _col2 (type: bigint) + auto parallelism: true + Filter Operator + isSamplingPred: false + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), (p_size + 1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: int), _col0 (type: string) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment + columns.comments + columns.types int:string:string:string:string:int:string:double:string +#### A masked pattern was here #### + name default.part + numFiles 1 + numRows 26 + rawDataSize 3147 + serialization.ddl struct part { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3173 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment + columns.comments + columns.types int:string:string:string:string:int:string:double:string +#### A masked pattern was here #### + name default.part + numFiles 1 + numRows 26 + rawDataSize 3147 + serialization.ddl struct part { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3173 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part + name: default.part + Truncated Path -> Alias: + /part [part] + Map 10 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: (p_size + 1) (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: (p_size + 1) (type: int) + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: p_name (type: string), p_type (type: string), p_size (type: int) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment + columns.comments + columns.types int:string:string:string:string:int:string:double:string +#### A masked pattern was here #### + name default.part + numFiles 1 + numRows 26 + rawDataSize 3147 + serialization.ddl struct part { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3173 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment + columns.comments + columns.types int:string:string:string:string:int:string:double:string +#### A masked pattern was here #### + name default.part + numFiles 1 + numRows 26 + rawDataSize 3147 + serialization.ddl struct part { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3173 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part + name: default.part + Truncated Path -> Alias: + /part [part] + Map 7 + Map Operator Tree: + TableScan + alias: part + filterExpr: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (p_size + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: part + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment + columns.comments + columns.types int:string:string:string:string:int:string:double:string +#### A masked pattern was here #### + name default.part + numFiles 1 + numRows 26 + rawDataSize 3147 + serialization.ddl struct part { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3173 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment + columns.comments + columns.types int:string:string:string:string:int:string:double:string +#### A masked pattern was here #### + name default.part + numFiles 1 + numRows 26 + rawDataSize 3147 + serialization.ddl struct part { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3173 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part + name: default.part + Truncated Path -> Alias: + /part [part] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Position of Big Table: 0 + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: int), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + value expressions: _col1 (type: bigint), _col2 (type: bigint) + auto parallelism: true + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 (p_size + 1) (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5, _col13, _col14 + Position of Big Table: 0 + Statistics: Num rows: 32 Data size: 7600 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string), (_col5 + 1) (type: int) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col4 (type: string), (_col5 + 1) (type: int) + Statistics: Num rows: 32 Data size: 7600 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col1 (type: string), _col13 (type: bigint), _col14 (type: bigint) + auto parallelism: true + Reducer 4 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col4 (type: string), (_col5 + 1) (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col1, _col4, _col13, _col14, _col17 + Position of Big Table: 0 + Statistics: Num rows: 39 Data size: 9231 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col4 (type: string), _col13 (type: bigint), _col14 (type: bigint), _col17 (type: boolean) + outputColumnNames: _col0, _col1, _col4, _col5, _col8 + Statistics: Num rows: 39 Data size: 9387 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + isSamplingPred: false + predicate: (((_col5 < _col4 is not true) or (_col4 = 0L) or _col4 is null or _col8 is not null or _col1 is null) and (_col1 is not null or (_col4 = 0L) or _col4 is null or _col8 is not null) and (_col8 is null or (_col4 = 0L) or _col4 is null)) (type: boolean) + Statistics: Num rows: 39 Data size: 9387 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 4719 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 39 Data size: 4719 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 5 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col0 (type: string) + auto parallelism: false + Reducer 6 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Position of Big Table: 0 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + value expressions: _col2 (type: boolean) + auto parallelism: true + Reducer 9 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + auto parallelism: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +