[hive] 02/02: HIVE-23368: MV rebuild should produce the same view as the one configured at creation time (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)

kgyrtkirk Sat, 09 May 2020 02:31:18 -0700

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


commit 39faad1dae7316b1f29b6c5589a3b8edc54092f7
Author: Zoltan Haindrich <k...@rxd.hu>
AuthorDate: Sat May 9 08:50:22 2020 +0000

    HIVE-23368: MV rebuild should produce the same view as the one configured 
at creation time (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)
    
    Signed-off-by: Zoltan Haindrich <k...@rxd.hu>
---
 .../test/resources/testconfiguration.properties    |   1 +
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |  12 +-
 .../sketches_materialized_view_safety.q            |  38 ++
 .../llap/sketches_materialized_view_safety.q.out   | 519 +++++++++++++++++++++
 4 files changed, 569 insertions(+), 1 deletion(-)

diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 2036f29..cf3bc5c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -830,6 +830,7 @@ minillaplocal.query.files=\
   sketches_rewrite.q,\
   sketches_materialized_view_rollup.q,\
   sketches_materialized_view_rollup2.q,\
+  sketches_materialized_view_safety.q,\
   table_access_keys_stats.q,\
   temp_table_llap_partitioned.q,\
   tez_bmj_schema_evolution.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index bf08306..085de48 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1970,7 +1970,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
           HiveExceptRewriteRule.INSTANCE);
 
       //1. Distinct aggregate rewrite
-      if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_ENABLED)) {
+
+      if (!isMaterializedViewMaintenance() && 
conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_ENABLED)) {
         // Rewrite to datasketches if enabled
         if 
(conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_COUNTDISTINCT_ENABLED)) {
           String sketchClass = 
conf.getVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_COUNT_DISTINCT_SKETCH);
@@ -2106,6 +2107,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
       return basePlan;
     }
 
+    /**
+     * Returns true if MV is being loaded, constructed or being rebuilt.
+     */
+    private boolean isMaterializedViewMaintenance() {
+      return mvRebuildMode != MaterializationRebuildMode.NONE
+          || ctx.isLoadingMaterializedView()
+          || getQB().isMaterializedView();
+    }
+
     private RelNode applyMaterializedViewRewriting(RelOptPlanner planner, 
RelNode basePlan,
         RelMetadataProvider mdProvider, RexExecutor executorProvider) {
       final RelOptCluster optCluster = basePlan.getCluster();
diff --git 
a/ql/src/test/queries/clientpositive/sketches_materialized_view_safety.q 
b/ql/src/test/queries/clientpositive/sketches_materialized_view_safety.q
new file mode 100644
index 0000000..620cbb7
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/sketches_materialized_view_safety.q
@@ -0,0 +1,38 @@
+--! qt:transactional
+set hive.fetch.task.conversion=none;
+set hive.optimize.bi.enabled=true;
+
+create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true');
+
+insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 
'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 
'b'), (13, 'b'), (14, 'b'), (15, 'b')
+; 
+
+explain
+create  materialized view mv_1 as
+  select 'no-rewrite-may-happen',category, count(distinct id) from 
sketch_input group by category;
+create  materialized view mv_1 as
+  select 'no-rewrite-may-happen',category, count(distinct id) from 
sketch_input group by category;
+
+insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 
'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 
'b'), (13, 'b'), (14, 'b'), (15, 'b')
+;
+
+explain
+alter materialized view mv_1 rebuild;
+alter materialized view mv_1 rebuild;
+
+-- see if we use the mv
+explain
+select 'rewritten;mv not used',category, count(distinct id) from sketch_input 
group by category;
+select 'rewritten;mv not used',category, count(distinct id) from sketch_input 
group by category;
+
+set hive.optimize.bi.enabled=false;
+
+explain
+select 'mv used',category, count(distinct id) from sketch_input group by 
category;
+select 'mv used',category, count(distinct id) from sketch_input group by 
category;
diff --git 
a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out
 
b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out
new file mode 100644
index 0000000..959edfc
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out
@@ -0,0 +1,519 @@
+PREHOOK: query: create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_input
+PREHOOK: query: insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 
'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 
'b'), (13, 'b'), (14, 'b'), (15, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 
'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 
'b'), (13, 'b'), (14, 'b'), (15, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@sketch_input
+POSTHOOK: Lineage: sketch_input.category SCRIPT []
+POSTHOOK: Lineage: sketch_input.id SCRIPT []
+PREHOOK: query: explain
+create  materialized view mv_1 as
+  select 'no-rewrite-may-happen',category, count(distinct id) from 
sketch_input group by category
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: explain
+create  materialized view mv_1 as
+  select 'no-rewrite-may-happen',category, count(distinct id) from 
sketch_input group by category
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_1
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-4 depends on stages: Stage-0, Stage-2
+  Stage-3 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-3
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: sketch_input
+                  Statistics: Num rows: 22 Data size: 1958 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: id (type: int), category (type: char(1))
+                    outputColumnNames: id, category
+                    Statistics: Num rows: 22 Data size: 1958 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: category (type: char(1)), id (type: int)
+                      minReductionHashAggr: 0.3181818
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 11 Data size: 979 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: char(1)), _col1 (type: 
int)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: char(1))
+                        Statistics: Num rows: 11 Data size: 979 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: char(1)), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: char(1))
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 11 Data size: 979 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count(_col0)
+                    keys: _col1 (type: char(1))
+                    mode: complete
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2 Data size: 186 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: 'no-rewrite-may-happen' (type: string), 
_col0 (type: char(1)), _col1 (type: bigint)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 2 Data size: 396 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 2 Data size: 396 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        table:
+                            input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                            serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                            name: default.mv_1
+                      Select Operator
+                        expressions: _col0 (type: string), _col1 (type: 
char(1)), _col2 (type: bigint)
+                        outputColumnNames: col1, col2, col3
+                        Statistics: Num rows: 2 Data size: 396 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: compute_stats(col1, 'hll'), 
compute_stats(col2, 'hll'), compute_stats(col3, 'hll')
+                          minReductionHashAggr: 0.5
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 1304 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            null sort order: 
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 1304 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-4
+    Create View
+      columns: _c0 string, category char(1), _c2 bigint
+      expanded text: select 'no-rewrite-may-happen',`sketch_input`.`category`, 
count(distinct `sketch_input`.`id`) from `default`.`sketch_input` group by 
`sketch_input`.`category`
+      name: default.mv_1
+      original text: select 'no-rewrite-may-happen',category, count(distinct 
id) from sketch_input group by category
+      rewrite enabled: true
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: _c0, category, _c2
+          Column Types: string, char(1), bigint
+          Table: default.mv_1
+
+  Stage: Stage-5
+    Materialized View Update
+      name: default.mv_1
+      retrieve and include: true
+
+  Stage: Stage-0
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: create  materialized view mv_1 as
+  select 'no-rewrite-may-happen',category, count(distinct id) from 
sketch_input group by category
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: create  materialized view mv_1 as
+  select 'no-rewrite-may-happen',category, count(distinct id) from 
sketch_input group by category
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_1
+PREHOOK: query: insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 
'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 
'b'), (13, 'b'), (14, 'b'), (15, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 
'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 
'b'), (13, 'b'), (14, 'b'), (15, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@sketch_input
+POSTHOOK: Lineage: sketch_input.category SCRIPT []
+POSTHOOK: Lineage: sketch_input.id SCRIPT []
+PREHOOK: query: explain
+alter materialized view mv_1 rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: explain
+alter materialized view mv_1 rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: default@mv_1
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: sketch_input
+                  Statistics: Num rows: 44 Data size: 3916 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: id (type: int), category (type: char(1))
+                    outputColumnNames: id, category
+                    Statistics: Num rows: 44 Data size: 3916 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: category (type: char(1)), id (type: int)
+                      minReductionHashAggr: 0.52272725
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 22 Data size: 1958 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: char(1)), _col1 (type: 
int)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: char(1))
+                        Statistics: Num rows: 22 Data size: 1958 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: char(1)), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: char(1))
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 22 Data size: 1958 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count(_col0)
+                    keys: _col1 (type: char(1))
+                    mode: complete
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2 Data size: 186 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: 'no-rewrite-may-happen' (type: string), 
_col0 (type: char(1)), _col1 (type: bigint)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 2 Data size: 396 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 2 Data size: 396 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        table:
+                            input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                            serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                            name: default.mv_1
+                      Select Operator
+                        expressions: _col0 (type: string), _col1 (type: 
char(1)), _col2 (type: bigint)
+                        outputColumnNames: _c0, category, _c2
+                        Statistics: Num rows: 2 Data size: 396 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: compute_stats(_c0, 'hll'), 
compute_stats(category, 'hll'), compute_stats(_c2, 'hll')
+                          minReductionHashAggr: 0.5
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 1304 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            null sort order: 
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 1304 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 1320 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.mv_1
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: _c0, category, _c2
+          Column Types: string, char(1), bigint
+          Table: default.mv_1
+
+  Stage: Stage-4
+    Materialized View Update
+      name: default.mv_1
+      update creation metadata: true
+
+PREHOOK: query: alter materialized view mv_1 rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: alter materialized view mv_1 rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: default@mv_1
+POSTHOOK: Lineage: mv_1._c0 SIMPLE []
+POSTHOOK: Lineage: mv_1._c2 EXPRESSION 
[(sketch_input)sketch_input.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: mv_1.category SIMPLE 
[(sketch_input)sketch_input.FieldSchema(name:category, type:char(1), 
comment:null), ]
+PREHOOK: query: explain
+select 'rewritten;mv not used',category, count(distinct id) from sketch_input 
group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select 'rewritten;mv not used',category, count(distinct id) from sketch_input 
group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: sketch_input
+                  Statistics: Num rows: 44 Data size: 3916 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: id (type: int), category (type: char(1))
+                    outputColumnNames: id, category
+                    Statistics: Num rows: 44 Data size: 3916 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: ds_hll_sketch(id)
+                      keys: category (type: char(1))
+                      minReductionHashAggr: 0.95454544
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 946 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: char(1))
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: char(1))
+                        Statistics: Num rows: 2 Data size: 946 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: 
struct<lgk:int,type:string,sketch:binary>)
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: ds_hll_sketch(VALUE._col0)
+                keys: KEY._col0 (type: char(1))
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 2 Data size: 458 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: 'rewritten;mv not used' (type: string), _col0 
(type: char(1)), UDFToLong(ds_hll_estimate(_col1)) (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 396 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select 'rewritten;mv not used',category, count(distinct id) 
from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: select 'rewritten;mv not used',category, count(distinct id) 
from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+rewritten;mv not used  a       10
+rewritten;mv not used  b       10
+PREHOOK: query: explain
+select 'mv used',category, count(distinct id) from sketch_input group by 
category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select 'mv used',category, count(distinct id) from sketch_input group by 
category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: sketch_input
+                  Statistics: Num rows: 44 Data size: 3916 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: id (type: int), category (type: char(1))
+                    outputColumnNames: id, category
+                    Statistics: Num rows: 44 Data size: 3916 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: category (type: char(1)), id (type: int)
+                      minReductionHashAggr: 0.52272725
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 22 Data size: 1958 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: char(1)), _col1 (type: 
int)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: char(1))
+                        Statistics: Num rows: 22 Data size: 1958 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: char(1)), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: char(1))
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 22 Data size: 1958 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count(_col0)
+                    keys: _col1 (type: char(1))
+                    mode: complete
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2 Data size: 186 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: 'mv used' (type: string), _col0 (type: 
char(1)), _col1 (type: bigint)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select 'mv used',category, count(distinct id) from 
sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: select 'mv used',category, count(distinct id) from 
sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+mv used        a       10
+mv used        b       10

[hive] 02/02: HIVE-23368: MV rebuild should produce the same view as the one configured at creation time (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)

Reply via email to