[hive] branch master updated: HIVE-24606: Multi-stage materialized CTEs can lose intermediate data (okumin, reviewed by Krisztian Kasa)

krisztiankasa Fri, 15 Sep 2023 02:18:37 -0700

This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 279c2508757 HIVE-24606: Multi-stage materialized CTEs can lose 
intermediate data (okumin, reviewed by Krisztian Kasa)
279c2508757 is described below

commit 279c25087575ad52645f031dfb5a1da7d712c18c
Author: okumin <g...@okumin.com>
AuthorDate: Fri Sep 15 18:18:23 2023 +0900

    HIVE-24606: Multi-stage materialized CTEs can lose intermediate data 
(okumin, reviewed by Krisztian Kasa)
---
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     | 102 +++--
 ql/src/test/queries/clientpositive/cte_mat_10.q    |  42 +++
 ql/src/test/queries/clientpositive/cte_mat_8.q     |  24 ++
 ql/src/test/queries/clientpositive/cte_mat_9.q     |  66 ++++
 .../results/clientpositive/llap/cte_mat_10.q.out   | 359 ++++++++++++++++++
 .../results/clientpositive/llap/cte_mat_8.q.out    | 240 ++++++++++++
 .../results/clientpositive/llap/cte_mat_9.q.out    | 411 +++++++++++++++++++++
 7 files changed, 1210 insertions(+), 34 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 81f1e55f6f8..cdebdc5ff47 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -45,6 +45,7 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -1378,7 +1379,9 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
   @Override
   public List<Task<?>> getAllRootTasks() {
     if (!rootTasksResolved) {
-      rootTasks = toRealRootTasks(rootClause.asExecutionOrder());
+      LinkedHashMap<CTEClause, LinkedHashSet<CTEClause>> realDependencies = 
listRealDependencies();
+      linkRealDependencies(realDependencies);
+      rootTasks = toRealRootTasks(realDependencies);
       rootTasksResolved = true;
     }
     return rootTasks;
@@ -1448,47 +1451,78 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     }
   }
 
-  private List<Task<?>> toRealRootTasks(List<CTEClause> execution) {
-    List<Task<?>> cteRoots = new ArrayList<>();
-    List<Task<?>> cteLeafs = new ArrayList<>();
-    List<Task<?>> curTopRoots = null;
-    List<Task<?>> curBottomLeafs = null;
-    for (CTEClause current : execution) {
-      if (current.parents.isEmpty() && curTopRoots != null) {
-        cteRoots.addAll(curTopRoots);
-        cteLeafs.addAll(curBottomLeafs);
-        curTopRoots = curBottomLeafs = null;
-      }
-      List<Task<?>> curTasks = current.getTasks();
-      if (curTasks == null) {
-        continue;
-      }
-      if (curTopRoots == null) {
-        curTopRoots = curTasks;
-      }
-      if (curBottomLeafs != null) {
-        for (Task<?> topLeafTask : curBottomLeafs) {
-          for (Task<?> currentRootTask : curTasks) {
-            topLeafTask.addDependentTask(currentRootTask);
-          }
+  private List<Task<?>> getRealTasks(CTEClause cte) {
+    if (cte == rootClause) {
+      return rootTasks;
+    } else {
+      return cte.getTasks();
+    }
+  }
+
+  /**
+   * Links tasks based on dependencies among CTEs which have actual tasks.
+   * For example, when materialized CTE X depends on materialized CTE Y,
+   * the leaf tasks of Y must have the root tasks of X as its child tasks.
+   */
+  private void linkRealDependencies(LinkedHashMap<CTEClause, 
LinkedHashSet<CTEClause>> realDependencies) {
+    LinkedHashMap<CTEClause, List<Task<?>>> dependentTasks = new 
LinkedHashMap<>();
+    for (CTEClause child : realDependencies.keySet()) {
+      for (CTEClause parent : realDependencies.get(child)) {
+        if (!dependentTasks.containsKey(parent)) {
+          dependentTasks.put(parent, new ArrayList<>());
         }
+        dependentTasks.get(parent).addAll(getRealTasks(child));
       }
-      curBottomLeafs = Task.findLeafs(curTasks);
     }
-    if (curTopRoots != null) {
-      cteRoots.addAll(curTopRoots);
-      cteLeafs.addAll(curBottomLeafs);
+    // This operation must be performed only once per CTE since it creates new 
leaves
+    for (CTEClause parent : dependentTasks.keySet()) {
+      List<Task<?>> sources = Task.findLeafs(getRealTasks(parent));
+      linkTasks(sources, dependentTasks.get(parent));
     }
+  }
 
-    if (cteRoots.isEmpty()) {
-      return rootTasks;
+  private static void linkTasks(List<Task<?>> sources, Iterable<Task<?>> 
sinks) {
+    for (Task<?> source : sources) {
+      for (Task<?> sink : sinks) {
+        source.addDependentTask(sink);
+      }
     }
-    for (Task<?> cteLeafTask : cteLeafs) {
-      for (Task<?> mainRootTask : rootTasks) {
-        cteLeafTask.addDependentTask(mainRootTask);
+  }
+
+  // Returns tasks which have no dependencies and can start without waiting 
for any tasks
+  private List<Task<?>> toRealRootTasks(LinkedHashMap<CTEClause, 
LinkedHashSet<CTEClause>> realDependencies) {
+    List<Task<?>> realRootTasks = new ArrayList<>();
+    for (CTEClause cte : realDependencies.keySet()) {
+      if (realDependencies.get(cte).isEmpty()) {
+        realRootTasks.addAll(getRealTasks(cte));
+      }
+    }
+    return realRootTasks;
+  }
+
+  // child with tasks -> list of parents with tasks
+  private LinkedHashMap<CTEClause, LinkedHashSet<CTEClause>> 
listRealDependencies() {
+    LinkedHashMap<CTEClause, LinkedHashSet<CTEClause>> realDependencies = new 
LinkedHashMap<>();
+    for (CTEClause child : rootClause.asExecutionOrder()) {
+      if (getRealTasks(child) == null) {
+        // This CTE will be executed as a part of other CTEs or a root 
statement
+        continue;
+      }
+      LinkedHashSet<CTEClause> parents = new LinkedHashSet<>();
+      collectRealDependencies(child, parents);
+      realDependencies.put(child, parents);
+    }
+    return realDependencies;
+  }
+
+  private void collectRealDependencies(CTEClause cte, LinkedHashSet<CTEClause> 
realDependencies) {
+    for (CTEClause parent : cte.parents) {
+      if (getRealTasks(parent) == null) {
+        collectRealDependencies(parent, realDependencies);
+      } else {
+        realDependencies.add(parent);
       }
     }
-    return cteRoots;
   }
 
   Table materializeCTE(String cteName, CTEClause cte) throws HiveException {
diff --git a/ql/src/test/queries/clientpositive/cte_mat_10.q 
b/ql/src/test/queries/clientpositive/cte_mat_10.q
new file mode 100644
index 00000000000..a0a4601422f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cte_mat_10.q
@@ -0,0 +1,42 @@
+set hive.optimize.cte.materialize.threshold=2;
+set hive.optimize.cte.materialize.full.aggregate.only=false;
+
+explain with x as ( select 'x' as id ),
+a1 as ( select 'a1' as id),
+a2 as ( select 'a2 <- ' || id as id from a1 ),
+a3 as ( select 'a3 <- ' || id as id from a2 ),
+b1 as ( select 'b1' as id ),
+b2 as (
+  select 'b2 <- ' || id as id from b1
+  union all
+  select 'b2 <- ' || id as id from b1
+)
+select * from a2
+union all
+select * from x
+union all
+select * from a3
+union all
+select * from a3
+union all
+select * from b2;
+
+with x as ( select 'x' as id ),
+a1 as ( select 'a1' as id),
+a2 as ( select 'a2 <- ' || id as id from a1 ),
+a3 as ( select 'a3 <- ' || id as id from a2 ),
+b1 as ( select 'b1' as id ),
+b2 as (
+  select 'b2 <- ' || id as id from b1
+  union all
+  select 'b2 <- ' || id as id from b1
+)
+select * from a2
+union all
+select * from x
+union all
+select * from a3
+union all
+select * from a3
+union all
+select * from b2;
diff --git a/ql/src/test/queries/clientpositive/cte_mat_8.q 
b/ql/src/test/queries/clientpositive/cte_mat_8.q
new file mode 100644
index 00000000000..68c827b0129
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cte_mat_8.q
@@ -0,0 +1,24 @@
+set hive.optimize.cte.materialize.threshold=2;
+set hive.optimize.cte.materialize.full.aggregate.only=false;
+
+explain with x as ( select 'x' as id ), -- not materialized
+a1 as ( select 'a1' as id ), -- materialized by a2 and the root
+a2 as ( select 'a2 <- ' || id as id from a1) -- materialized by the root
+select * from a1
+union all
+select * from x
+union all
+select * from a2
+union all
+select * from a2;
+
+with x as ( select 'x' as id ), -- not materialized
+a1 as ( select 'a1' as id ), -- materialized by a2 and the root
+a2 as ( select 'a2 <- ' || id as id from a1) -- materialized by the root
+select * from a1
+union all
+select * from x
+union all
+select * from a2
+union all
+select * from a2;
diff --git a/ql/src/test/queries/clientpositive/cte_mat_9.q 
b/ql/src/test/queries/clientpositive/cte_mat_9.q
new file mode 100644
index 00000000000..eaccbc1d24a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cte_mat_9.q
@@ -0,0 +1,66 @@
+set hive.optimize.cte.materialize.threshold=2;
+set hive.optimize.cte.materialize.full.aggregate.only=false;
+
+drop table if exists cte_mat_9_a;
+create table cte_mat_9_a (id int);
+insert into cte_mat_9_a (id) values (1);
+
+drop table if exists cte_mat_9_b;
+create table cte_mat_9_b (id int);
+insert into cte_mat_9_b (id) values (1);
+
+explain with a0 AS (
+  select id, 'a0' as tag from cte_mat_9_a
+),
+a1 as (
+  select id, 'a1 <- ' || tag as tag from a0
+),
+b0 as (
+  select id, 'b0' as tag from cte_mat_9_b
+),
+b1 as (
+  select id, 'b1 <- ' || tag as tag from b0
+),
+b2 as (
+  select id, 'b2 <- ' || tag as tag  from b1
+),
+b3 as (
+  select id, 'b3 <- ' || tag as tag from b2
+),
+c as (
+  select b2.id, 'c <- (' || b2.tag || ' & ' || b3.tag || ')' as tag
+  from b2
+  full outer join b3 on b2.id = b3.id
+)
+select b1.id, b1.tag, a1.tag, c.tag
+from b1
+full outer join a1 on b1.id = a1.id
+full outer join c on c.id = c.id;
+
+with a0 AS (
+  select id, 'a0' as tag from cte_mat_9_a
+),
+a1 as (
+  select id, 'a1 <- ' || tag as tag from a0
+),
+b0 as (
+  select id, 'b0' as tag from cte_mat_9_b
+),
+b1 as (
+  select id, 'b1 <- ' || tag as tag from b0
+),
+b2 as (
+  select id, 'b2 <- ' || tag as tag  from b1
+),
+b3 as (
+  select id, 'b3 <- ' || tag as tag from b2
+),
+c as (
+  select b2.id, 'c <- (' || b2.tag || ' & ' || b3.tag || ')' as tag
+  from b2
+  full outer join b3 on b2.id = b3.id
+)
+select b1.id, b1.tag, a1.tag, c.tag
+from b1
+full outer join a1 on b1.id = a1.id
+full outer join c on c.id = c.id;
diff --git a/ql/src/test/results/clientpositive/llap/cte_mat_10.q.out 
b/ql/src/test/results/clientpositive/llap/cte_mat_10.q.out
new file mode 100644
index 00000000000..8262ba7c035
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cte_mat_10.q.out
@@ -0,0 +1,359 @@
+PREHOOK: query: explain with x as ( select 'x' as id ),
+a1 as ( select 'a1' as id),
+a2 as ( select 'a2 <- ' || id as id from a1 ),
+a3 as ( select 'a3 <- ' || id as id from a2 ),
+b1 as ( select 'b1' as id ),
+b2 as (
+  select 'b2 <- ' || id as id from b1
+  union all
+  select 'b2 <- ' || id as id from b1
+)
+select * from a2
+union all
+select * from x
+union all
+select * from a3
+union all
+select * from a3
+union all
+select * from b2
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@a2
+PREHOOK: Input: default@a3
+PREHOOK: Input: default@b1
+#### A masked pattern was here ####
+POSTHOOK: query: explain with x as ( select 'x' as id ),
+a1 as ( select 'a1' as id),
+a2 as ( select 'a2 <- ' || id as id from a1 ),
+a3 as ( select 'a3 <- ' || id as id from a2 ),
+b1 as ( select 'b1' as id ),
+b2 as (
+  select 'b2 <- ' || id as id from b1
+  union all
+  select 'b2 <- ' || id as id from b1
+)
+select * from a2
+union all
+select * from x
+union all
+select * from a3
+union all
+select * from a3
+union all
+select * from b2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@a2
+POSTHOOK: Input: default@a3
+POSTHOOK: Input: default@b1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-4 depends on stages: Stage-2, Stage-0
+  Stage-5 depends on stages: Stage-4
+  Stage-10 depends on stages: Stage-2, Stage-0, Stage-5, Stage-3, Stage-8, 
Stage-6
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-1
+  Stage-7 is a root stage
+  Stage-8 depends on stages: Stage-7
+  Stage-6 depends on stages: Stage-7
+  Stage-9 depends on stages: Stage-10
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: _dummy_table
+                  Row Limit Per Split: 1
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: 'a2 <- a1' (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 92 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 92 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.a2
+            Execution mode: llap
+            LLAP IO: no inputs
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-4
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: a2
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: concat('a3 <- ', id) (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.a3
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+
+  Stage: Stage-5
+    Dependency Collection
+
+  Stage: Stage-10
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 10 <- Union 5 (CONTAINS)
+        Map 4 <- Union 5 (CONTAINS)
+        Map 6 <- Union 5 (CONTAINS)
+        Map 7 <- Union 5 (CONTAINS)
+        Map 8 <- Union 5 (CONTAINS)
+        Map 9 <- Union 5 (CONTAINS)
+#### A masked pattern was here ####
+      Vertices:
+        Map 10 
+            Map Operator Tree:
+                TableScan
+                  alias: b1
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: concat('b2 <- ', id) (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 6 Data size: 1005 Basic stats: 
COMPLETE Column stats: PARTIAL
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: a2
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: id (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 6 Data size: 1005 Basic stats: 
COMPLETE Column stats: PARTIAL
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: _dummy_table
+                  Row Limit Per Split: 1
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: 'x' (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 85 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 6 Data size: 1005 Basic stats: 
COMPLETE Column stats: PARTIAL
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: a3
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: id (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 6 Data size: 1005 Basic stats: 
COMPLETE Column stats: PARTIAL
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: a3
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: id (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 6 Data size: 1005 Basic stats: 
COMPLETE Column stats: PARTIAL
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 9 
+            Map Operator Tree:
+                TableScan
+                  alias: b1
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: concat('b2 <- ', id) (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 6 Data size: 1005 Basic stats: 
COMPLETE Column stats: PARTIAL
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Union 5 
+            Vertex: Union 5
+
+  Stage: Stage-3
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-7
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: _dummy_table
+                  Row Limit Per Split: 1
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: 'b1' (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 86 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 86 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.b1
+            Execution mode: llap
+            LLAP IO: no inputs
+
+  Stage: Stage-8
+    Dependency Collection
+
+  Stage: Stage-6
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-9
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: with x as ( select 'x' as id ),
+a1 as ( select 'a1' as id),
+a2 as ( select 'a2 <- ' || id as id from a1 ),
+a3 as ( select 'a3 <- ' || id as id from a2 ),
+b1 as ( select 'b1' as id ),
+b2 as (
+  select 'b2 <- ' || id as id from b1
+  union all
+  select 'b2 <- ' || id as id from b1
+)
+select * from a2
+union all
+select * from x
+union all
+select * from a3
+union all
+select * from a3
+union all
+select * from b2
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@a2
+PREHOOK: Input: default@a3
+PREHOOK: Input: default@b1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@a2
+PREHOOK: Output: default@a3
+PREHOOK: Output: default@b1
+#### A masked pattern was here ####
+POSTHOOK: query: with x as ( select 'x' as id ),
+a1 as ( select 'a1' as id),
+a2 as ( select 'a2 <- ' || id as id from a1 ),
+a3 as ( select 'a3 <- ' || id as id from a2 ),
+b1 as ( select 'b1' as id ),
+b2 as (
+  select 'b2 <- ' || id as id from b1
+  union all
+  select 'b2 <- ' || id as id from b1
+)
+select * from a2
+union all
+select * from x
+union all
+select * from a3
+union all
+select * from a3
+union all
+select * from b2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@a2
+POSTHOOK: Input: default@a3
+POSTHOOK: Input: default@b1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@a2
+POSTHOOK: Output: default@a3
+POSTHOOK: Output: default@b1
+#### A masked pattern was here ####
+a2 <- a1
+x
+a3 <- a2 <- a1
+a3 <- a2 <- a1
+b2 <- b1
+b2 <- b1
diff --git a/ql/src/test/results/clientpositive/llap/cte_mat_8.q.out 
b/ql/src/test/results/clientpositive/llap/cte_mat_8.q.out
new file mode 100644
index 00000000000..cb086fdb9e5
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cte_mat_8.q.out
@@ -0,0 +1,240 @@
+PREHOOK: query: explain with x as ( select 'x' as id ), -- not materialized
+a1 as ( select 'a1' as id ), -- materialized by a2 and the root
+a2 as ( select 'a2 <- ' || id as id from a1) -- materialized by the root
+select * from a1
+union all
+select * from x
+union all
+select * from a2
+union all
+select * from a2
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@a1
+PREHOOK: Input: default@a2
+#### A masked pattern was here ####
+POSTHOOK: query: explain with x as ( select 'x' as id ), -- not materialized
+a1 as ( select 'a1' as id ), -- materialized by a2 and the root
+a2 as ( select 'a2 <- ' || id as id from a1) -- materialized by the root
+select * from a1
+union all
+select * from x
+union all
+select * from a2
+union all
+select * from a2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@a1
+POSTHOOK: Input: default@a2
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-4 depends on stages: Stage-2, Stage-0
+  Stage-5 depends on stages: Stage-4
+  Stage-7 depends on stages: Stage-2, Stage-0, Stage-5, Stage-3
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-1
+  Stage-6 depends on stages: Stage-7
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: _dummy_table
+                  Row Limit Per Split: 1
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: 'a1' (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 86 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 86 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.a1
+            Execution mode: llap
+            LLAP IO: no inputs
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-4
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: a1
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: concat('a2 <- ', id) (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.a2
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+
+  Stage: Stage-5
+    Dependency Collection
+
+  Stage: Stage-7
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 3 <- Union 4 (CONTAINS)
+        Map 5 <- Union 4 (CONTAINS)
+        Map 6 <- Union 4 (CONTAINS)
+        Map 7 <- Union 4 (CONTAINS)
+#### A masked pattern was here ####
+      Vertices:
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: a1
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: id (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 4 Data size: 637 Basic stats: 
COMPLETE Column stats: PARTIAL
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: _dummy_table
+                  Row Limit Per Split: 1
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: 'x' (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 85 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 4 Data size: 637 Basic stats: 
COMPLETE Column stats: PARTIAL
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: a2
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: id (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 4 Data size: 637 Basic stats: 
COMPLETE Column stats: PARTIAL
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: a2
+                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: id (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 4 Data size: 637 Basic stats: 
COMPLETE Column stats: PARTIAL
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Union 4 
+            Vertex: Union 4
+
+  Stage: Stage-3
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-6
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: with x as ( select 'x' as id ), -- not materialized
+a1 as ( select 'a1' as id ), -- materialized by a2 and the root
+a2 as ( select 'a2 <- ' || id as id from a1) -- materialized by the root
+select * from a1
+union all
+select * from x
+union all
+select * from a2
+union all
+select * from a2
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@a1
+PREHOOK: Input: default@a2
+PREHOOK: Output: database:default
+PREHOOK: Output: default@a1
+PREHOOK: Output: default@a2
+#### A masked pattern was here ####
+POSTHOOK: query: with x as ( select 'x' as id ), -- not materialized
+a1 as ( select 'a1' as id ), -- materialized by a2 and the root
+a2 as ( select 'a2 <- ' || id as id from a1) -- materialized by the root
+select * from a1
+union all
+select * from x
+union all
+select * from a2
+union all
+select * from a2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@a1
+POSTHOOK: Input: default@a2
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@a1
+POSTHOOK: Output: default@a2
+#### A masked pattern was here ####
+a1
+x
+a2 <- a1
+a2 <- a1
diff --git a/ql/src/test/results/clientpositive/llap/cte_mat_9.q.out 
b/ql/src/test/results/clientpositive/llap/cte_mat_9.q.out
new file mode 100644
index 00000000000..d9ac24a6f21
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cte_mat_9.q.out
@@ -0,0 +1,411 @@
+PREHOOK: query: drop table if exists cte_mat_9_a
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists cte_mat_9_a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: create table cte_mat_9_a (id int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cte_mat_9_a
+POSTHOOK: query: create table cte_mat_9_a (id int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cte_mat_9_a
+PREHOOK: query: insert into cte_mat_9_a (id) values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@cte_mat_9_a
+POSTHOOK: query: insert into cte_mat_9_a (id) values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@cte_mat_9_a
+POSTHOOK: Lineage: cte_mat_9_a.id SCRIPT []
+PREHOOK: query: drop table if exists cte_mat_9_b
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists cte_mat_9_b
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: create table cte_mat_9_b (id int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cte_mat_9_b
+POSTHOOK: query: create table cte_mat_9_b (id int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cte_mat_9_b
+PREHOOK: query: insert into cte_mat_9_b (id) values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@cte_mat_9_b
+POSTHOOK: query: insert into cte_mat_9_b (id) values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@cte_mat_9_b
+POSTHOOK: Lineage: cte_mat_9_b.id SCRIPT []
+Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in 
Stage 'Reducer 5' is a cross product
+PREHOOK: query: explain with a0 AS (
+  select id, 'a0' as tag from cte_mat_9_a
+),
+a1 as (
+  select id, 'a1 <- ' || tag as tag from a0
+),
+b0 as (
+  select id, 'b0' as tag from cte_mat_9_b
+),
+b1 as (
+  select id, 'b1 <- ' || tag as tag from b0
+),
+b2 as (
+  select id, 'b2 <- ' || tag as tag  from b1
+),
+b3 as (
+  select id, 'b3 <- ' || tag as tag from b2
+),
+c as (
+  select b2.id, 'c <- (' || b2.tag || ' & ' || b3.tag || ')' as tag
+  from b2
+  full outer join b3 on b2.id = b3.id
+)
+select b1.id, b1.tag, a1.tag, c.tag
+from b1
+full outer join a1 on b1.id = a1.id
+full outer join c on c.id = c.id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@b1
+PREHOOK: Input: default@b2
+PREHOOK: Input: default@cte_mat_9_a
+#### A masked pattern was here ####
+POSTHOOK: query: explain with a0 AS (
+  select id, 'a0' as tag from cte_mat_9_a
+),
+a1 as (
+  select id, 'a1 <- ' || tag as tag from a0
+),
+b0 as (
+  select id, 'b0' as tag from cte_mat_9_b
+),
+b1 as (
+  select id, 'b1 <- ' || tag as tag from b0
+),
+b2 as (
+  select id, 'b2 <- ' || tag as tag  from b1
+),
+b3 as (
+  select id, 'b3 <- ' || tag as tag from b2
+),
+c as (
+  select b2.id, 'c <- (' || b2.tag || ' & ' || b3.tag || ')' as tag
+  from b2
+  full outer join b3 on b2.id = b3.id
+)
+select b1.id, b1.tag, a1.tag, c.tag
+from b1
+full outer join a1 on b1.id = a1.id
+full outer join c on c.id = c.id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@b1
+POSTHOOK: Input: default@b2
+POSTHOOK: Input: default@cte_mat_9_a
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-4 depends on stages: Stage-2, Stage-0
+  Stage-5 depends on stages: Stage-4
+  Stage-7 depends on stages: Stage-2, Stage-0, Stage-5, Stage-3
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-1
+  Stage-6 depends on stages: Stage-7
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: cte_mat_9_b
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: id (type: int), 'b1 <- b0' (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.b1
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-4
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: b1
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: id (type: int), concat('b2 <- ', tag) (type: 
string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.b2
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+
+  Stage: Stage-5
+    Dependency Collection
+
+  Stage: Stage-7
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE), Reducer 8 
(CUSTOM_SIMPLE_EDGE)
+        Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: b1
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: id (type: int), tag (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      null sort order: z
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: cte_mat_9_a
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: id (type: int), 'a1 <- a0' (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      null sort order: z
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: b2
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: id (type: int), tag (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      null sort order: z
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 9 
+            Map Operator Tree:
+                TableScan
+                  alias: b2
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: id (type: int), concat('b3 <- ', tag) (type: 
string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      null sort order: z
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Full Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  null sort order: 
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col0 (type: int), _col1 (type: string), 
_col3 (type: string)
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Full Outer Join 0 to 1
+                filter predicates:
+                  0 
+                  1 {VALUE._col0 is not null}
+                keys:
+                  0 
+                  1 
+                outputColumnNames: _col0, _col1, _col3, _col5
+                Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), _col3 
(type: string), _col5 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 1 Data size: 413 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 413 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 8 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Full Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), 
concat(concat(concat(concat('c <- (', _col1), ' & '), _col3), ')') (type: 
string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    null sort order: 
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 206 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: int), _col1 (type: string)
+
+  Stage: Stage-3
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-6
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in 
Stage 'Reducer 5' is a cross product
+PREHOOK: query: with a0 AS (
+  select id, 'a0' as tag from cte_mat_9_a
+),
+a1 as (
+  select id, 'a1 <- ' || tag as tag from a0
+),
+b0 as (
+  select id, 'b0' as tag from cte_mat_9_b
+),
+b1 as (
+  select id, 'b1 <- ' || tag as tag from b0
+),
+b2 as (
+  select id, 'b2 <- ' || tag as tag  from b1
+),
+b3 as (
+  select id, 'b3 <- ' || tag as tag from b2
+),
+c as (
+  select b2.id, 'c <- (' || b2.tag || ' & ' || b3.tag || ')' as tag
+  from b2
+  full outer join b3 on b2.id = b3.id
+)
+select b1.id, b1.tag, a1.tag, c.tag
+from b1
+full outer join a1 on b1.id = a1.id
+full outer join c on c.id = c.id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@b1
+PREHOOK: Input: default@b2
+PREHOOK: Input: default@cte_mat_9_a
+PREHOOK: Input: default@cte_mat_9_b
+PREHOOK: Output: database:default
+PREHOOK: Output: default@b1
+PREHOOK: Output: default@b2
+#### A masked pattern was here ####
+POSTHOOK: query: with a0 AS (
+  select id, 'a0' as tag from cte_mat_9_a
+),
+a1 as (
+  select id, 'a1 <- ' || tag as tag from a0
+),
+b0 as (
+  select id, 'b0' as tag from cte_mat_9_b
+),
+b1 as (
+  select id, 'b1 <- ' || tag as tag from b0
+),
+b2 as (
+  select id, 'b2 <- ' || tag as tag  from b1
+),
+b3 as (
+  select id, 'b3 <- ' || tag as tag from b2
+),
+c as (
+  select b2.id, 'c <- (' || b2.tag || ' & ' || b3.tag || ')' as tag
+  from b2
+  full outer join b3 on b2.id = b3.id
+)
+select b1.id, b1.tag, a1.tag, c.tag
+from b1
+full outer join a1 on b1.id = a1.id
+full outer join c on c.id = c.id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@b1
+POSTHOOK: Input: default@b2
+POSTHOOK: Input: default@cte_mat_9_a
+POSTHOOK: Input: default@cte_mat_9_b
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@b1
+POSTHOOK: Output: default@b2
+#### A masked pattern was here ####
+1      b1 <- b0        a1 <- a0        c <- (b2 <- b1 <- b0 & b3 <- b2 <- b1 
<- b0)

[hive] branch master updated: HIVE-24606: Multi-stage materialized CTEs can lose intermediate data (okumin, reviewed by Krisztian Kasa)

Reply via email to