http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out index 57eead0..14f1efe 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out @@ -111,7 +111,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -192,7 +193,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -319,6 +320,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -355,6 +403,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -484,7 +537,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -565,7 +619,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -671,7 +725,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -692,6 +746,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -705,7 +806,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -728,6 +829,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -874,7 +980,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -955,7 +1062,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1110,7 +1217,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1131,6 +1238,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 61 Data size: 17884 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1144,7 +1298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1167,6 +1321,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out index 184e890..161631a 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out @@ -135,7 +135,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -216,7 +217,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -343,6 +344,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -379,6 +427,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -508,7 +561,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -589,7 +643,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -695,7 +749,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -716,6 +770,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -729,7 +830,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -752,6 +853,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out index b353073..c07f722 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out @@ -135,7 +135,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -214,7 +215,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -339,6 +340,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -375,6 +423,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -492,7 +545,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -571,7 +625,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -675,7 +729,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -696,6 +750,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary> + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -709,7 +810,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -732,6 +833,11 @@ STAGE PLANS: Stats Work Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out index b907c2d..ce24832 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out @@ -101,25 +101,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -127,14 +128,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -144,16 +145,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -163,15 +164,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -191,6 +228,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -281,25 +322,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 3900 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 5580 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 5580 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -307,14 +349,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -324,16 +366,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -343,15 +385,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -371,6 +449,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -485,25 +567,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -511,14 +594,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 168 Data size: 31740 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 160 Data size: 30228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 160 Data size: 30228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 46704 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 @@ -528,16 +611,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 31080 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -547,15 +630,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 168 Data size: 45864 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -575,6 +694,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -695,25 +818,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -721,14 +845,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -738,16 +862,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -757,15 +881,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -785,6 +945,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -887,25 +1051,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -913,14 +1078,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -930,16 +1095,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 31248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -949,15 +1114,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -977,6 +1178,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.v1, b.v2) @@ -1079,25 +1284,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1105,14 +1311,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1122,16 +1328,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (_col0 + _col0) (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1141,15 +1347,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1169,6 +1411,10 @@ STAGE PLANS: Stage: Stage-3 Stats Work Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key+a.key, concat(a.value, b.value)