This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
commit 39faad1dae7316b1f29b6c5589a3b8edc54092f7 Author: Zoltan Haindrich <k...@rxd.hu> AuthorDate: Sat May 9 08:50:22 2020 +0000 HIVE-23368: MV rebuild should produce the same view as the one configured at creation time (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez) Signed-off-by: Zoltan Haindrich <k...@rxd.hu> --- .../test/resources/testconfiguration.properties | 1 + .../hadoop/hive/ql/parse/CalcitePlanner.java | 12 +- .../sketches_materialized_view_safety.q | 38 ++ .../llap/sketches_materialized_view_safety.q.out | 519 +++++++++++++++++++++ 4 files changed, 569 insertions(+), 1 deletion(-) diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 2036f29..cf3bc5c 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -830,6 +830,7 @@ minillaplocal.query.files=\ sketches_rewrite.q,\ sketches_materialized_view_rollup.q,\ sketches_materialized_view_rollup2.q,\ + sketches_materialized_view_safety.q,\ table_access_keys_stats.q,\ temp_table_llap_partitioned.q,\ tez_bmj_schema_evolution.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index bf08306..085de48 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1970,7 +1970,8 @@ public class CalcitePlanner extends SemanticAnalyzer { HiveExceptRewriteRule.INSTANCE); //1. Distinct aggregate rewrite - if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_ENABLED)) { + + if (!isMaterializedViewMaintenance() && conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_ENABLED)) { // Rewrite to datasketches if enabled if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_COUNTDISTINCT_ENABLED)) { String sketchClass = conf.getVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_COUNT_DISTINCT_SKETCH); @@ -2106,6 +2107,15 @@ public class CalcitePlanner extends SemanticAnalyzer { return basePlan; } + /** + * Returns true if MV is being loaded, constructed or being rebuilt. + */ + private boolean isMaterializedViewMaintenance() { + return mvRebuildMode != MaterializationRebuildMode.NONE + || ctx.isLoadingMaterializedView() + || getQB().isMaterializedView(); + } + private RelNode applyMaterializedViewRewriting(RelOptPlanner planner, RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) { final RelOptCluster optCluster = basePlan.getCluster(); diff --git a/ql/src/test/queries/clientpositive/sketches_materialized_view_safety.q b/ql/src/test/queries/clientpositive/sketches_materialized_view_safety.q new file mode 100644 index 0000000..620cbb7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/sketches_materialized_view_safety.q @@ -0,0 +1,38 @@ +--! qt:transactional +set hive.fetch.task.conversion=none; +set hive.optimize.bi.enabled=true; + +create table sketch_input (id int, category char(1)) +STORED AS ORC +TBLPROPERTIES ('transactional'='true'); + +insert into table sketch_input values + (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +; + +explain +create materialized view mv_1 as + select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category; +create materialized view mv_1 as + select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category; + +insert into table sketch_input values + (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +; + +explain +alter materialized view mv_1 rebuild; +alter materialized view mv_1 rebuild; + +-- see if we use the mv +explain +select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category; +select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category; + +set hive.optimize.bi.enabled=false; + +explain +select 'mv used',category, count(distinct id) from sketch_input group by category; +select 'mv used',category, count(distinct id) from sketch_input group by category; diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out new file mode 100644 index 0000000..959edfc --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out @@ -0,0 +1,519 @@ +PREHOOK: query: create table sketch_input (id int, category char(1)) +STORED AS ORC +TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sketch_input +POSTHOOK: query: create table sketch_input (id int, category char(1)) +STORED AS ORC +TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sketch_input +PREHOOK: query: insert into table sketch_input values + (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@sketch_input +POSTHOOK: query: insert into table sketch_input values + (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@sketch_input +POSTHOOK: Lineage: sketch_input.category SCRIPT [] +POSTHOOK: Lineage: sketch_input.id SCRIPT [] +PREHOOK: query: explain +create materialized view mv_1 as + select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@sketch_input +PREHOOK: Output: database:default +PREHOOK: Output: default@mv_1 +POSTHOOK: query: explain +create materialized view mv_1 as + select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@sketch_input +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv_1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-2 + Stage-3 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: sketch_input + Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: int), category (type: char(1)) + outputColumnNames: id, category + Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: category (type: char(1)), id (type: int) + minReductionHashAggr: 0.3181818 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(1)), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: char(1)) + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(1)), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: char(1)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + keys: _col1 (type: char(1)) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'no-rewrite-may-happen' (type: string), _col0 (type: char(1)), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.mv_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: char(1)), _col2 (type: bigint) + outputColumnNames: col1, col2, col3 + Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll') + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create View + columns: _c0 string, category char(1), _c2 bigint + expanded text: select 'no-rewrite-may-happen',`sketch_input`.`category`, count(distinct `sketch_input`.`id`) from `default`.`sketch_input` group by `sketch_input`.`category` + name: default.mv_1 + original text: select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category + rewrite enabled: true + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: _c0, category, _c2 + Column Types: string, char(1), bigint + Table: default.mv_1 + + Stage: Stage-5 + Materialized View Update + name: default.mv_1 + retrieve and include: true + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create materialized view mv_1 as + select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@sketch_input +PREHOOK: Output: database:default +PREHOOK: Output: default@mv_1 +POSTHOOK: query: create materialized view mv_1 as + select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@sketch_input +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv_1 +PREHOOK: query: insert into table sketch_input values + (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@sketch_input +POSTHOOK: query: insert into table sketch_input values + (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@sketch_input +POSTHOOK: Lineage: sketch_input.category SCRIPT [] +POSTHOOK: Lineage: sketch_input.id SCRIPT [] +PREHOOK: query: explain +alter materialized view mv_1 rebuild +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_input +PREHOOK: Output: default@mv_1 +POSTHOOK: query: explain +alter materialized view mv_1 rebuild +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_input +POSTHOOK: Output: default@mv_1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: sketch_input + Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: int), category (type: char(1)) + outputColumnNames: id, category + Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: category (type: char(1)), id (type: int) + minReductionHashAggr: 0.52272725 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(1)), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: char(1)) + Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(1)), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: char(1)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + keys: _col1 (type: char(1)) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'no-rewrite-may-happen' (type: string), _col0 (type: char(1)), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.mv_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: char(1)), _col2 (type: bigint) + outputColumnNames: _c0, category, _c2 + Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(_c0, 'hll'), compute_stats(category, 'hll'), compute_stats(_c2, 'hll') + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.mv_1 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: _c0, category, _c2 + Column Types: string, char(1), bigint + Table: default.mv_1 + + Stage: Stage-4 + Materialized View Update + name: default.mv_1 + update creation metadata: true + +PREHOOK: query: alter materialized view mv_1 rebuild +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_input +PREHOOK: Output: default@mv_1 +POSTHOOK: query: alter materialized view mv_1 rebuild +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_input +POSTHOOK: Output: default@mv_1 +POSTHOOK: Lineage: mv_1._c0 SIMPLE [] +POSTHOOK: Lineage: mv_1._c2 EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: mv_1.category SIMPLE [(sketch_input)sketch_input.FieldSchema(name:category, type:char(1), comment:null), ] +PREHOOK: query: explain +select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_input +#### A masked pattern was here #### +POSTHOOK: query: explain +select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_input +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: sketch_input + Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: int), category (type: char(1)) + outputColumnNames: id, category + Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: ds_hll_sketch(id) + keys: category (type: char(1)) + minReductionHashAggr: 0.95454544 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(1)) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: char(1)) + Statistics: Num rows: 2 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct<lgk:int,type:string,sketch:binary>) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: ds_hll_sketch(VALUE._col0) + keys: KEY._col0 (type: char(1)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 458 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'rewritten;mv not used' (type: string), _col0 (type: char(1)), UDFToLong(ds_hll_estimate(_col1)) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_input +#### A masked pattern was here #### +POSTHOOK: query: select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_input +#### A masked pattern was here #### +rewritten;mv not used a 10 +rewritten;mv not used b 10 +PREHOOK: query: explain +select 'mv used',category, count(distinct id) from sketch_input group by category +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_input +#### A masked pattern was here #### +POSTHOOK: query: explain +select 'mv used',category, count(distinct id) from sketch_input group by category +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_input +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: sketch_input + Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: int), category (type: char(1)) + outputColumnNames: id, category + Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: category (type: char(1)), id (type: int) + minReductionHashAggr: 0.52272725 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(1)), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: char(1)) + Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(1)), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: char(1)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + keys: _col1 (type: char(1)) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'mv used' (type: string), _col0 (type: char(1)), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select 'mv used',category, count(distinct id) from sketch_input group by category +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_input +#### A masked pattern was here #### +POSTHOOK: query: select 'mv used',category, count(distinct id) from sketch_input group by category +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_input +#### A masked pattern was here #### +mv used a 10 +mv used b 10