http://git-wip-us.apache.org/repos/asf/hive/blob/6f5c1135/ql/src/test/results/clientpositive/correlationoptimizer3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/correlationoptimizer3.q.out
b/ql/src/test/results/clientpositive/correlationoptimizer3.q.out
deleted file mode 100644
index 06fb699..0000000
--- a/ql/src/test/results/clientpositive/correlationoptimizer3.q.out
+++ /dev/null
@@ -1,1422 +0,0 @@
-PREHOOK: query: -- When Correlation Optimizer is turned off, 5 MR jobs will be
generated.
--- When Correlation Optimizer is turned on, the subquery tmp will be evalauted
--- in a single MR job (including the subquery b, the subquery d, and b join d).
--- At the reduce side of the MR job evaluating tmp, two operation paths
--- (for subquery b and d) have different depths. The path starting from
subquery b
--- is JOIN->GBY->JOIN, which has a depth of 3. While, the path starting from
subquery d
--- is JOIN->JOIN. We should be able to handle this case.
-EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
-FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value
- FROM (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) b
- JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d
- ON b.key = d.key) tmp
-PREHOOK: type: QUERY
-POSTHOOK: query: -- When Correlation Optimizer is turned off, 5 MR jobs will
be generated.
--- When Correlation Optimizer is turned on, the subquery tmp will be evalauted
--- in a single MR job (including the subquery b, the subquery d, and b join d).
--- At the reduce side of the MR job evaluating tmp, two operation paths
--- (for subquery b and d) have different depths. The path starting from
subquery b
--- is JOIN->GBY->JOIN, which has a depth of 3. While, the path starting from
subquery d
--- is JOIN->JOIN. We should be able to handle this case.
-EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
-FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value
- FROM (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) b
- JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d
- ON b.key = d.key) tmp
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1, Stage-6
- Stage-3 depends on stages: Stage-2
- Stage-5 is a root stage
- Stage-6 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-3
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats:
COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col2
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- value expressions: _col1 (type: string)
- TableScan
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE
Column stats: NONE
- value expressions: _col1 (type: bigint)
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: hash(_col0) (type: int), hash(_col3) (type: int),
hash(_col1) (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE
Column stats: NONE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-3
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
- value expressions: _col0 (type: bigint), _col1 (type: bigint),
_col2 (type: bigint)
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column
stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column
stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-5
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats:
COMPLETE Column stats: NONE
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: _col1 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-6
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- value expressions: _col1 (type: bigint)
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)),
SUM(HASH(tmp.value))
-FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value
- FROM (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) b
- JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d
- ON b.key = d.key) tmp
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Input: default@src1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)),
SUM(HASH(tmp.value))
-FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value
- FROM (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) b
- JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d
- ON b.key = d.key) tmp
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Input: default@src1
-#### A masked pattern was here ####
-1711763 107 3531902962
-PREHOOK: query: EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
-FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value
- FROM (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) b
- JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d
- ON b.key = d.key) tmp
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
-FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value
- FROM (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) b
- JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d
- ON b.key = d.key) tmp
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats:
COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats:
COMPLETE Column stats: NONE
- Reduce Operator Tree:
- Demux Operator
- Statistics: Num rows: 1050 Data size: 11006 Basic stats: COMPLETE
Column stats: NONE
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Mux Operator
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL
Column stats: NONE
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE
Column stats: NONE
- Select Operator
- expressions: hash(_col0) (type: int), hash(_col3) (type:
int), hash(_col1) (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE
Column stats: NONE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Select Operator
- expressions: _col1 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Mux Operator
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: complete
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL
Column stats: NONE
- Mux Operator
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL
Column stats: NONE
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE
Column stats: NONE
- Select Operator
- expressions: hash(_col0) (type: int), hash(_col3)
(type: int), hash(_col1) (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE
Column stats: NONE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
- value expressions: _col0 (type: bigint), _col1 (type: bigint),
_col2 (type: bigint)
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column
stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column
stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)),
SUM(HASH(tmp.value))
-FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value
- FROM (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) b
- JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d
- ON b.key = d.key) tmp
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Input: default@src1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)),
SUM(HASH(tmp.value))
-FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value
- FROM (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) b
- JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d
- ON b.key = d.key) tmp
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Input: default@src1
-#### A masked pattern was here ####
-1711763 107 3531902962
-PREHOOK: query: -- Enable hive.auto.convert.join.
-EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
-FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value
- FROM (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) b
- JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d
- ON b.key = d.key) tmp
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Enable hive.auto.convert.join.
-EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
-FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value
- FROM (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) b
- JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d
- ON b.key = d.key) tmp
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-9 is a root stage
- Stage-2 depends on stages: Stage-9
- Stage-3 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-3
-
-STAGE PLANS:
- Stage: Stage-9
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_0:$hdt$_0:$hdt$_1:x
- Fetch Operator
- limit: -1
- $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_0:$hdt$_0:$hdt$_1:x
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col2
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Local Work:
- Map Reduce Local Work
- Reduce Operator Tree:
- Demux Operator
- Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE
Column stats: NONE
- Mux Operator
- Statistics: Num rows: 1650 Data size: 17529 Basic stats: COMPLETE
Column stats: NONE
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Select Operator
- expressions: hash(_col0) (type: int), hash(_col3) (type: int),
hash(_col1) (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- Mux Operator
- Statistics: Num rows: 1650 Data size: 17529 Basic stats:
COMPLETE Column stats: NONE
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Select Operator
- expressions: hash(_col0) (type: int), hash(_col3) (type:
int), hash(_col1) (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE
Column stats: NONE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-3
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
- value expressions: _col0 (type: bigint), _col1 (type: bigint),
_col2 (type: bigint)
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column
stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column
stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)),
SUM(HASH(tmp.value))
-FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value
- FROM (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) b
- JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d
- ON b.key = d.key) tmp
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Input: default@src1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)),
SUM(HASH(tmp.value))
-FROM (SELECT b.key AS key, b.cnt AS cnt, d.value AS value
- FROM (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) b
- JOIN (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) d
- ON b.key = d.key) tmp
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Input: default@src1
-#### A masked pattern was here ####
-1711763 107 3531902962
-PREHOOK: query: EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
-FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value
- FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b
- JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) d
- ON b.key = d.key) tmp
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
-FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value
- FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b
- JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) d
- ON b.key = d.key) tmp
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1, Stage-6
- Stage-3 depends on stages: Stage-2
- Stage-5 is a root stage
- Stage-6 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-3
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats:
COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col2
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- value expressions: _col1 (type: string)
- TableScan
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE
Column stats: NONE
- value expressions: _col1 (type: bigint)
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col2, _col3
- Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: hash(_col2) (type: int), hash(_col3) (type: int),
hash(_col1) (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE
Column stats: NONE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-3
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
- value expressions: _col0 (type: bigint), _col1 (type: bigint),
_col2 (type: bigint)
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column
stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column
stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-5
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats:
COMPLETE Column stats: NONE
- Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: _col1 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-6
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- value expressions: _col1 (type: bigint)
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)),
SUM(HASH(tmp.value))
-FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value
- FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b
- JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) d
- ON b.key = d.key) tmp
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Input: default@src1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)),
SUM(HASH(tmp.value))
-FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value
- FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b
- JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) d
- ON b.key = d.key) tmp
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Input: default@src1
-#### A masked pattern was here ####
-1711763 107 3531902962
-PREHOOK: query: EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
-FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value
- FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b
- JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) d
- ON b.key = d.key) tmp
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
-FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value
- FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b
- JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) d
- ON b.key = d.key) tmp
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats:
COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 25 Data size: 191 Basic stats:
COMPLETE Column stats: NONE
- Reduce Operator Tree:
- Demux Operator
- Statistics: Num rows: 1050 Data size: 11006 Basic stats: COMPLETE
Column stats: NONE
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Mux Operator
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL
Column stats: NONE
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE
Column stats: NONE
- Select Operator
- expressions: hash(_col2) (type: int), hash(_col3) (type:
int), hash(_col1) (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE
Column stats: NONE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Select Operator
- expressions: _col1 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Mux Operator
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: complete
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL
Column stats: NONE
- Mux Operator
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL
Column stats: NONE
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE
Column stats: NONE
- Select Operator
- expressions: hash(_col2) (type: int), hash(_col3)
(type: int), hash(_col1) (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE
Column stats: NONE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
- value expressions: _col0 (type: bigint), _col1 (type: bigint),
_col2 (type: bigint)
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column
stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column
stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)),
SUM(HASH(tmp.value))
-FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value
- FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b
- JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) d
- ON b.key = d.key) tmp
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Input: default@src1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)),
SUM(HASH(tmp.value))
-FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value
- FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b
- JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) d
- ON b.key = d.key) tmp
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Input: default@src1
-#### A masked pattern was here ####
-1711763 107 3531902962
-PREHOOK: query: -- Enable hive.auto.convert.join.
-EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
-FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value
- FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b
- JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) d
- ON b.key = d.key) tmp
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Enable hive.auto.convert.join.
-EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)), SUM(HASH(tmp.value))
-FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value
- FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b
- JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) d
- ON b.key = d.key) tmp
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-9 is a root stage
- Stage-2 depends on stages: Stage-9
- Stage-3 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-3
-
-STAGE PLANS:
- Stage: Stage-9
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_0:$hdt$_0:$hdt$_1:x
- Fetch Operator
- limit: -1
- $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_0:$hdt$_0:$hdt$_1:x
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- $hdt$_0:$hdt$_1:$hdt$_1:$hdt$_2:x
- TableScan
- alias: x
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE
Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col2
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- TableScan
- alias: y
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 550 Data size: 5843 Basic stats:
COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
- Local Work:
- Map Reduce Local Work
- Reduce Operator Tree:
- Demux Operator
- Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE
Column stats: NONE
- Mux Operator
- Statistics: Num rows: 1650 Data size: 17529 Basic stats: COMPLETE
Column stats: NONE
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Select Operator
- expressions: hash(_col2) (type: int), hash(_col3) (type: int),
hash(_col1) (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE
Column stats: NONE
- Mux Operator
- Statistics: Num rows: 1650 Data size: 17529 Basic stats:
COMPLETE Column stats: NONE
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column
stats: NONE
- Select Operator
- expressions: hash(_col2) (type: int), hash(_col3) (type:
int), hash(_col1) (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE
Column stats: NONE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-3
- Map Reduce
- Map Operator Tree:
- TableScan
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE
Column stats: NONE
- value expressions: _col0 (type: bigint), _col1 (type: bigint),
_col2 (type: bigint)
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column
stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column
stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)),
SUM(HASH(tmp.value))
-FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value
- FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b
- JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) d
- ON b.key = d.key) tmp
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Input: default@src1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)),
SUM(HASH(tmp.value))
-FROM (SELECT d.key AS key, d.cnt AS cnt, b.value as value
- FROM (SELECT x.key, x.value FROM src1 x JOIN src y ON (x.key = y.key)) b
- JOIN (SELECT x.key, count(1) AS cnt FROM src1 x JOIN src y ON (x.key =
y.key) group by x.key) d
- ON b.key = d.key) tmp
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Input: default@src1
-#### A masked pattern was here ####
-1711763 107 3531902962