This is an automated email from the ASF dual-hosted git repository. vgarg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 21df633 HIVE-24304: Query containing UNION fails with OOM (Vineet Garg, reviewed by Jesus Camacho Rodriguez) 21df633 is described below commit 21df633ab0c71496a17d9415006f139fe3452970 Author: Vineet G <vg...@apache.org> AuthorDate: Fri Oct 23 16:54:39 2020 -0700 HIVE-24304: Query containing UNION fails with OOM (Vineet Garg, reviewed by Jesus Camacho Rodriguez) --- .../test/resources/testconfiguration.properties | 1 + .../calcite/HiveDefaultRelMetadataProvider.java | 3 + .../calcite/HiveTezModelRelMetadataProvider.java | 2 + .../calcite/stats/HiveRelMdExpressionLineage.java | 67 ++ ql/src/test/queries/clientpositive/union_lineage.q | 208 ++++++ .../clientpositive/llap/union_lineage.q.out | 772 +++++++++++++++++++++ .../perf/tez/constraints/cbo_query54.q.out | 74 +- .../perf/tez/constraints/query54.q.out | 272 ++++---- 8 files changed, 1226 insertions(+), 173 deletions(-) diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index a7d5c91..217a819 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -167,6 +167,7 @@ minillap.query.files=\ udf_printf.q,\ union23.q,\ unionDistinct_1.q,\ + union_lineage.q,\ union_script.q,\ vector_custom_udf_configure.q,\ vector_offset_limit.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java index 4ef85c5..5b6570a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCumulativeCost; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistribution; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdExpressionLineage; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMemory; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdParallelism; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdPredicates; @@ -60,6 +61,7 @@ public class HiveDefaultRelMetadataProvider { HiveRelMdRuntimeRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, HiveRelMdColumnUniqueness.SOURCE, + HiveRelMdExpressionLineage.SOURCE, HiveRelMdSize.SOURCE, HiveRelMdMemory.SOURCE, HiveRelMdDistribution.SOURCE, @@ -93,6 +95,7 @@ public class HiveDefaultRelMetadataProvider { HiveRelMdRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, HiveRelMdColumnUniqueness.SOURCE, + HiveRelMdExpressionLineage.SOURCE, HiveRelMdSize.SOURCE, HiveRelMdMemory.SOURCE, new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java index cc65544..6d153f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveTezModelRelMetadataProvider.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdColumnUniquene import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCumulativeCost; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistribution; +import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdExpressionLineage; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMemory; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdPredicates; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount; @@ -49,6 +50,7 @@ public class HiveTezModelRelMetadataProvider { HiveRelMdRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, HiveRelMdColumnUniqueness.SOURCE, + HiveRelMdExpressionLineage.SOURCE, HiveRelMdSize.SOURCE, HiveRelMdMemory.SOURCE, HiveRelMdDistribution.SOURCE, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdExpressionLineage.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdExpressionLineage.java new file mode 100644 index 0000000..12a34d2 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdExpressionLineage.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.stats; + + +import org.apache.calcite.rel.core.Union; +import org.apache.calcite.rel.metadata.BuiltInMetadata; +import org.apache.calcite.rel.metadata.MetadataDef; +import org.apache.calcite.rel.metadata.MetadataHandler; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; + +import java.util.Set; + +public final class HiveRelMdExpressionLineage + implements MetadataHandler<BuiltInMetadata.ExpressionLineage> { + public static final RelMetadataProvider SOURCE = + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.EXPRESSION_LINEAGE.method, new HiveRelMdExpressionLineage()); + + //~ Constructors ----------------------------------------------------------- + + private HiveRelMdExpressionLineage() {} + + //~ Methods ---------------------------------------------------------------- + + public MetadataDef<BuiltInMetadata.ExpressionLineage> getDef() { + return BuiltInMetadata.ExpressionLineage.DEF; + } + + // Union returns NULL instead of actually determining the lineage because + // Union may return multiple lineage expressions - one corresponding to each branch + // this could cause exponential possible combinations of lineage expressions + // as you go up in the operator tree. + // As the possible number of expressions increases it could lead to OOM. + // To prevent this UNION returns NULL. + // sample query could be found in union_lineage.q + public Set<RexNode> getExpressionLineage(HiveUnion rel, RelMetadataQuery mq, + RexNode outputExpression) { + return null; + } +} + +// End HiveRelMdExpressionLineage + diff --git a/ql/src/test/queries/clientpositive/union_lineage.q b/ql/src/test/queries/clientpositive/union_lineage.q new file mode 100644 index 0000000..027747d --- /dev/null +++ b/ql/src/test/queries/clientpositive/union_lineage.q @@ -0,0 +1,208 @@ +create database db1; +CREATE EXTERNAL TABLE db1.table1 +( + `level1` string, + `level1_desc` string, + `level2` string, + `level2_desc` string, + `level3` string, + `level3_desc` string, + `level4` string, + `level4_desc` string, + `level5` string, + `level5_desc` string, + `level6` string, + `level6_desc` string, + `level7` string, + `level7_desc` string, + `level8` string, + `level8_desc` string, + `level9` string, + `level9_desc` string, + `level10` string, + `level10_desc` string, + `level11` string, + `level11_desc` string, + `level12` string, + `level12_desc` string, + `level13` string, + `level13_desc` string, + `level14` string, + `level14_desc` string, + `level15` string, + `level15_desc` string, + `pack` string, + aeoi string, + aeoo string) + PARTITIONED BY ( + data_date string) stored as parquet; + + +explain create view if not exists db1.view1 as +SELECT +h.level1, +h.level1_desc, +h.level2, +h.level2_desc, +h.level3, +h.level3_desc, +h.level4, +h.level4_desc, +h.level5, +h.level5_desc, +h.level6, +h.level6_desc, +h.level7, +h.level7_desc, +h.level8, +h.level8_desc, +h.level9, +h.level9_desc, +h.level10, +h.level10_desc, +h.level11, +h.level11_desc, +h.level12, +h.level12_desc, +h.level13, +h.level13_desc, +h.level14, +h.level14_desc, +h.pack, +h.aeoi, +h.aeoo, +h.data_date, +CONCAT('#', h.level1, '#', h.level2, '#', h.level3, '#', h.level4, '#', h.level5, '#', h.level6, '#', h.level7, '#', h.level8, '#', h.level9, '#', h.level10, '#', h.level11, '#', h.level12, '#', h.level13, '#', h.level14, '#') key +FROM +(SELECT MAX(data_date) data_as_of_date, level1 FROM db1.table1 WHERE level1 = 'N00000' GROUP BY level1) d +JOIN db1.table1 h +ON (h.data_date = d.data_as_of_date +AND h.level1 = d.level1) +WHERE +'TM1-5460' NOT IN (h.level1, h.level2, h.level3, h.level4, h.level5, h.level6, h.level7, h.level8, h.level9, h.level10, h.level11, h.level12, h.level13, h.level14, h.level15) +UNION ALL SELECT DISTINCT +hhh.level1, +hhh.level1_desc, +hhh.level2, +hhh.level2_desc, +hhh.level3, +hhh.level3_desc, +hhh.level4, +hhh.level4_desc, +hhh.level5, +hhh.level5_desc, +hhh.level6, +hhh.level6_desc, +hhh.level7, +hhh.level7_desc, +hhh.level8, +hhh.level8_desc, +hhh.level9, +hhh.level9_desc, +hhh.level10, +hhh.level10_desc, +hhh.level11, +hhh.level11_desc, +hhh.level12, +hhh.level12_desc, +hhh.level13, +hhh.level13_desc, +hhh.level14, +hhh.level14_desc, +hhh.pack, +hhh.aeoi, +hhh.aeoo, +hhh.data_date, +CONCAT('#', hhh.level1, '#', hhh.level2, '#', hhh.level3, '#', hhh.level4, '#', hhh.level5, '#', hhh.level6, '#', hhh.level7, '#', hhh.level8, '#', hhh.level9, '#', hhh.level10, '#', hhh.level11, '#', hhh.level12, '#', hhh.level13, '#', hhh.level14, '#') key +FROM +(SELECT +hh.level1, +hh.level1_desc, +CASE WHEN hh.leaf_level < 2 THEN hh.leaf_code ELSE hh.level2 END level2, +CASE WHEN hh.leaf_level < 2 THEN hh.leaf_desc ELSE hh.level2_desc END level2_desc, +CASE WHEN hh.leaf_level < 3 THEN hh.leaf_code ELSE hh.level3 END level3, +CASE WHEN hh.leaf_level < 3 THEN hh.leaf_desc ELSE hh.level3_desc END level3_desc, +CASE WHEN hh.leaf_level < 4 THEN hh.leaf_code ELSE hh.level4 END level4, +CASE WHEN hh.leaf_level < 4 THEN hh.leaf_desc ELSE hh.level4_desc END level4_desc, +CASE WHEN hh.leaf_level < 5 THEN hh.leaf_code ELSE hh.level5 END level5, +CASE WHEN hh.leaf_level < 5 THEN hh.leaf_desc ELSE hh.level5_desc END level5_desc, +CASE WHEN hh.leaf_level < 6 THEN hh.leaf_code ELSE hh.level6 END level6, +CASE WHEN hh.leaf_level < 6 THEN hh.leaf_desc ELSE hh.level6_desc END level6_desc, +CASE WHEN hh.leaf_level < 7 THEN hh.leaf_code ELSE hh.level7 END level7, +CASE WHEN hh.leaf_level < 7 THEN hh.leaf_desc ELSE hh.level7_desc END level7_desc, +CASE WHEN hh.leaf_level < 8 THEN hh.leaf_code ELSE hh.level8 END level8, +CASE WHEN hh.leaf_level < 8 THEN hh.leaf_desc ELSE hh.level8_desc END level8_desc, +CASE WHEN hh.leaf_level < 9 THEN hh.leaf_code ELSE hh.level9 END level9, +CASE WHEN hh.leaf_level < 9 THEN hh.leaf_desc ELSE hh.level9_desc END level9_desc, +CASE WHEN hh.leaf_level < 10 THEN hh.leaf_code ELSE hh.level10 END level10, +CASE WHEN hh.leaf_level < 10 THEN hh.leaf_desc ELSE hh.level10_desc END level10_desc, +CASE WHEN hh.leaf_level < 11 THEN hh.leaf_code ELSE hh.level11 END level11, +CASE WHEN hh.leaf_level < 11 THEN hh.leaf_desc ELSE hh.level11_desc END level11_desc, +CASE WHEN hh.leaf_level < 12 THEN hh.leaf_code ELSE hh.level12 END level12, +CASE WHEN hh.leaf_level < 12 THEN hh.leaf_desc ELSE hh.level12_desc END level12_desc, +CASE WHEN hh.leaf_level < 13 THEN hh.leaf_code ELSE hh.level13 END level13, +CASE WHEN hh.leaf_level < 13 THEN hh.leaf_desc ELSE hh.level13_desc END level13_desc, +CASE WHEN hh.leaf_level < 14 THEN hh.leaf_code ELSE hh.level14 END level14, +CASE WHEN hh.leaf_level < 14 THEN hh.leaf_desc ELSE hh.level14_desc END level14_desc, +hh.pack, +hh.aeoi, +hh.aeoo, +hh.data_date +FROM +(SELECT +h.*, +lc.leaf_level, +lc.leaf_code, +CASE lc.leaf_code +WHEN h.level2 THEN h.level2_desc +WHEN h.level3 THEN h.level3_desc +WHEN h.level4 THEN h.level4_desc +WHEN h.level5 THEN h.level5_desc +WHEN h.level6 THEN h.level6_desc +WHEN h.level7 THEN h.level7_desc +WHEN h.level8 THEN h.level8_desc +WHEN h.level9 THEN h.level9_desc +WHEN h.level10 THEN h.level10_desc +WHEN h.level11 THEN h.level11_desc +WHEN h.level12 THEN h.level12_desc +WHEN h.level13 THEN h.level13_desc +ELSE NULL +END leaf_desc +FROM +(SELECT MAX(data_date) data_as_of_date, level1 FROM db1.table1 WHERE level1 = 'N00000' GROUP BY level1) d +JOIN db1.table1 h +ON (h.data_date = d.data_as_of_date +AND h.level1 = d.level1), +(SELECT +l.leaf_level, +'TM1-5460' leaf_code +FROM +(SELECT 3 leaf_level +UNION ALL SELECT 4 leaf_level +UNION ALL SELECT 5 leaf_level +UNION ALL SELECT 6 leaf_level +UNION ALL SELECT 7 leaf_level +UNION ALL SELECT 8 leaf_level +UNION ALL SELECT 9 leaf_level +UNION ALL SELECT 10 leaf_level +UNION ALL SELECT 11 leaf_level +UNION ALL SELECT 12 leaf_level +UNION ALL SELECT 13 leaf_level) l) lc +WHERE +lc.leaf_code = +CASE +WHEN lc.leaf_level = 2 THEN h.level2 +WHEN lc.leaf_level = 3 THEN h.level3 +WHEN lc.leaf_level = 4 THEN h.level4 +WHEN lc.leaf_level = 5 THEN h.level5 +WHEN lc.leaf_level = 6 THEN h.level6 +WHEN lc.leaf_level = 7 THEN h.level7 +WHEN lc.leaf_level = 8 THEN h.level8 +WHEN lc.leaf_level = 9 THEN h.level9 +WHEN lc.leaf_level = 10 THEN h.level10 +WHEN lc.leaf_level = 11 THEN h.level11 +WHEN lc.leaf_level = 12 THEN h.level12 +WHEN lc.leaf_level = 13 THEN h.level13 +END +AND h.level14 <> lc.leaf_code) hh) hhh; + diff --git a/ql/src/test/results/clientpositive/llap/union_lineage.q.out b/ql/src/test/results/clientpositive/llap/union_lineage.q.out new file mode 100644 index 0000000..36b6bc9 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/union_lineage.q.out @@ -0,0 +1,772 @@ +PREHOOK: query: create database db1 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:db1 +POSTHOOK: query: create database db1 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:db1 +PREHOOK: query: CREATE EXTERNAL TABLE db1.table1 +( + `level1` string, + `level1_desc` string, + `level2` string, + `level2_desc` string, + `level3` string, + `level3_desc` string, + `level4` string, + `level4_desc` string, + `level5` string, + `level5_desc` string, + `level6` string, + `level6_desc` string, + `level7` string, + `level7_desc` string, + `level8` string, + `level8_desc` string, + `level9` string, + `level9_desc` string, + `level10` string, + `level10_desc` string, + `level11` string, + `level11_desc` string, + `level12` string, + `level12_desc` string, + `level13` string, + `level13_desc` string, + `level14` string, + `level14_desc` string, + `level15` string, + `level15_desc` string, + `pack` string, + aeoi string, + aeoo string) + PARTITIONED BY ( + data_date string) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:db1 +PREHOOK: Output: db1@table1 +POSTHOOK: query: CREATE EXTERNAL TABLE db1.table1 +( + `level1` string, + `level1_desc` string, + `level2` string, + `level2_desc` string, + `level3` string, + `level3_desc` string, + `level4` string, + `level4_desc` string, + `level5` string, + `level5_desc` string, + `level6` string, + `level6_desc` string, + `level7` string, + `level7_desc` string, + `level8` string, + `level8_desc` string, + `level9` string, + `level9_desc` string, + `level10` string, + `level10_desc` string, + `level11` string, + `level11_desc` string, + `level12` string, + `level12_desc` string, + `level13` string, + `level13_desc` string, + `level14` string, + `level14_desc` string, + `level15` string, + `level15_desc` string, + `pack` string, + aeoi string, + aeoo string) + PARTITIONED BY ( + data_date string) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:db1 +POSTHOOK: Output: db1@table1 +PREHOOK: query: explain create view if not exists db1.view1 as +SELECT +h.level1, +h.level1_desc, +h.level2, +h.level2_desc, +h.level3, +h.level3_desc, +h.level4, +h.level4_desc, +h.level5, +h.level5_desc, +h.level6, +h.level6_desc, +h.level7, +h.level7_desc, +h.level8, +h.level8_desc, +h.level9, +h.level9_desc, +h.level10, +h.level10_desc, +h.level11, +h.level11_desc, +h.level12, +h.level12_desc, +h.level13, +h.level13_desc, +h.level14, +h.level14_desc, +h.pack, +h.aeoi, +h.aeoo, +h.data_date, +CONCAT('#', h.level1, '#', h.level2, '#', h.level3, '#', h.level4, '#', h.level5, '#', h.level6, '#', h.level7, '#', h.level8, '#', h.level9, '#', h.level10, '#', h.level11, '#', h.level12, '#', h.level13, '#', h.level14, '#') key +FROM +(SELECT MAX(data_date) data_as_of_date, level1 FROM db1.table1 WHERE level1 = 'N00000' GROUP BY level1) d +JOIN db1.table1 h +ON (h.data_date = d.data_as_of_date +AND h.level1 = d.level1) +WHERE +'TM1-5460' NOT IN (h.level1, h.level2, h.level3, h.level4, h.level5, h.level6, h.level7, h.level8, h.level9, h.level10, h.level11, h.level12, h.level13, h.level14, h.level15) +UNION ALL SELECT DISTINCT +hhh.level1, +hhh.level1_desc, +hhh.level2, +hhh.level2_desc, +hhh.level3, +hhh.level3_desc, +hhh.level4, +hhh.level4_desc, +hhh.level5, +hhh.level5_desc, +hhh.level6, +hhh.level6_desc, +hhh.level7, +hhh.level7_desc, +hhh.level8, +hhh.level8_desc, +hhh.level9, +hhh.level9_desc, +hhh.level10, +hhh.level10_desc, +hhh.level11, +hhh.level11_desc, +hhh.level12, +hhh.level12_desc, +hhh.level13, +hhh.level13_desc, +hhh.level14, +hhh.level14_desc, +hhh.pack, +hhh.aeoi, +hhh.aeoo, +hhh.data_date, +CONCAT('#', hhh.level1, '#', hhh.level2, '#', hhh.level3, '#', hhh.level4, '#', hhh.level5, '#', hhh.level6, '#', hhh.level7, '#', hhh.level8, '#', hhh.level9, '#', hhh.level10, '#', hhh.level11, '#', hhh.level12, '#', hhh.level13, '#', hhh.level14, '#') key +FROM +(SELECT +hh.level1, +hh.level1_desc, +CASE WHEN hh.leaf_level < 2 THEN hh.leaf_code ELSE hh.level2 END level2, +CASE WHEN hh.leaf_level < 2 THEN hh.leaf_desc ELSE hh.level2_desc END level2_desc, +CASE WHEN hh.leaf_level < 3 THEN hh.leaf_code ELSE hh.level3 END level3, +CASE WHEN hh.leaf_level < 3 THEN hh.leaf_desc ELSE hh.level3_desc END level3_desc, +CASE WHEN hh.leaf_level < 4 THEN hh.leaf_code ELSE hh.level4 END level4, +CASE WHEN hh.leaf_level < 4 THEN hh.leaf_desc ELSE hh.level4_desc END level4_desc, +CASE WHEN hh.leaf_level < 5 THEN hh.leaf_code ELSE hh.level5 END level5, +CASE WHEN hh.leaf_level < 5 THEN hh.leaf_desc ELSE hh.level5_desc END level5_desc, +CASE WHEN hh.leaf_level < 6 THEN hh.leaf_code ELSE hh.level6 END level6, +CASE WHEN hh.leaf_level < 6 THEN hh.leaf_desc ELSE hh.level6_desc END level6_desc, +CASE WHEN hh.leaf_level < 7 THEN hh.leaf_code ELSE hh.level7 END level7, +CASE WHEN hh.leaf_level < 7 THEN hh.leaf_desc ELSE hh.level7_desc END level7_desc, +CASE WHEN hh.leaf_level < 8 THEN hh.leaf_code ELSE hh.level8 END level8, +CASE WHEN hh.leaf_level < 8 THEN hh.leaf_desc ELSE hh.level8_desc END level8_desc, +CASE WHEN hh.leaf_level < 9 THEN hh.leaf_code ELSE hh.level9 END level9, +CASE WHEN hh.leaf_level < 9 THEN hh.leaf_desc ELSE hh.level9_desc END level9_desc, +CASE WHEN hh.leaf_level < 10 THEN hh.leaf_code ELSE hh.level10 END level10, +CASE WHEN hh.leaf_level < 10 THEN hh.leaf_desc ELSE hh.level10_desc END level10_desc, +CASE WHEN hh.leaf_level < 11 THEN hh.leaf_code ELSE hh.level11 END level11, +CASE WHEN hh.leaf_level < 11 THEN hh.leaf_desc ELSE hh.level11_desc END level11_desc, +CASE WHEN hh.leaf_level < 12 THEN hh.leaf_code ELSE hh.level12 END level12, +CASE WHEN hh.leaf_level < 12 THEN hh.leaf_desc ELSE hh.level12_desc END level12_desc, +CASE WHEN hh.leaf_level < 13 THEN hh.leaf_code ELSE hh.level13 END level13, +CASE WHEN hh.leaf_level < 13 THEN hh.leaf_desc ELSE hh.level13_desc END level13_desc, +CASE WHEN hh.leaf_level < 14 THEN hh.leaf_code ELSE hh.level14 END level14, +CASE WHEN hh.leaf_level < 14 THEN hh.leaf_desc ELSE hh.level14_desc END level14_desc, +hh.pack, +hh.aeoi, +hh.aeoo, +hh.data_date +FROM +(SELECT +h.*, +lc.leaf_level, +lc.leaf_code, +CASE lc.leaf_code +WHEN h.level2 THEN h.level2_desc +WHEN h.level3 THEN h.level3_desc +WHEN h.level4 THEN h.level4_desc +WHEN h.level5 THEN h.level5_desc +WHEN h.level6 THEN h.level6_desc +WHEN h.level7 THEN h.level7_desc +WHEN h.level8 THEN h.level8_desc +WHEN h.level9 THEN h.level9_desc +WHEN h.level10 THEN h.level10_desc +WHEN h.level11 THEN h.level11_desc +WHEN h.level12 THEN h.level12_desc +WHEN h.level13 THEN h.level13_desc +ELSE NULL +END leaf_desc +FROM +(SELECT MAX(data_date) data_as_of_date, level1 FROM db1.table1 WHERE level1 = 'N00000' GROUP BY level1) d +JOIN db1.table1 h +ON (h.data_date = d.data_as_of_date +AND h.level1 = d.level1), +(SELECT +l.leaf_level, +'TM1-5460' leaf_code +FROM +(SELECT 3 leaf_level +UNION ALL SELECT 4 leaf_level +UNION ALL SELECT 5 leaf_level +UNION ALL SELECT 6 leaf_level +UNION ALL SELECT 7 leaf_level +UNION ALL SELECT 8 leaf_level +UNION ALL SELECT 9 leaf_level +UNION ALL SELECT 10 leaf_level +UNION ALL SELECT 11 leaf_level +UNION ALL SELECT 12 leaf_level +UNION ALL SELECT 13 leaf_level) l) lc +WHERE +lc.leaf_code = +CASE +WHEN lc.leaf_level = 2 THEN h.level2 +WHEN lc.leaf_level = 3 THEN h.level3 +WHEN lc.leaf_level = 4 THEN h.level4 +WHEN lc.leaf_level = 5 THEN h.level5 +WHEN lc.leaf_level = 6 THEN h.level6 +WHEN lc.leaf_level = 7 THEN h.level7 +WHEN lc.leaf_level = 8 THEN h.level8 +WHEN lc.leaf_level = 9 THEN h.level9 +WHEN lc.leaf_level = 10 THEN h.level10 +WHEN lc.leaf_level = 11 THEN h.level11 +WHEN lc.leaf_level = 12 THEN h.level12 +WHEN lc.leaf_level = 13 THEN h.level13 +END +AND h.level14 <> lc.leaf_code) hh) hhh +PREHOOK: type: CREATEVIEW +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: db1@table1 +PREHOOK: Output: database:db1 +PREHOOK: Output: db1@view1 +POSTHOOK: query: explain create view if not exists db1.view1 as +SELECT +h.level1, +h.level1_desc, +h.level2, +h.level2_desc, +h.level3, +h.level3_desc, +h.level4, +h.level4_desc, +h.level5, +h.level5_desc, +h.level6, +h.level6_desc, +h.level7, +h.level7_desc, +h.level8, +h.level8_desc, +h.level9, +h.level9_desc, +h.level10, +h.level10_desc, +h.level11, +h.level11_desc, +h.level12, +h.level12_desc, +h.level13, +h.level13_desc, +h.level14, +h.level14_desc, +h.pack, +h.aeoi, +h.aeoo, +h.data_date, +CONCAT('#', h.level1, '#', h.level2, '#', h.level3, '#', h.level4, '#', h.level5, '#', h.level6, '#', h.level7, '#', h.level8, '#', h.level9, '#', h.level10, '#', h.level11, '#', h.level12, '#', h.level13, '#', h.level14, '#') key +FROM +(SELECT MAX(data_date) data_as_of_date, level1 FROM db1.table1 WHERE level1 = 'N00000' GROUP BY level1) d +JOIN db1.table1 h +ON (h.data_date = d.data_as_of_date +AND h.level1 = d.level1) +WHERE +'TM1-5460' NOT IN (h.level1, h.level2, h.level3, h.level4, h.level5, h.level6, h.level7, h.level8, h.level9, h.level10, h.level11, h.level12, h.level13, h.level14, h.level15) +UNION ALL SELECT DISTINCT +hhh.level1, +hhh.level1_desc, +hhh.level2, +hhh.level2_desc, +hhh.level3, +hhh.level3_desc, +hhh.level4, +hhh.level4_desc, +hhh.level5, +hhh.level5_desc, +hhh.level6, +hhh.level6_desc, +hhh.level7, +hhh.level7_desc, +hhh.level8, +hhh.level8_desc, +hhh.level9, +hhh.level9_desc, +hhh.level10, +hhh.level10_desc, +hhh.level11, +hhh.level11_desc, +hhh.level12, +hhh.level12_desc, +hhh.level13, +hhh.level13_desc, +hhh.level14, +hhh.level14_desc, +hhh.pack, +hhh.aeoi, +hhh.aeoo, +hhh.data_date, +CONCAT('#', hhh.level1, '#', hhh.level2, '#', hhh.level3, '#', hhh.level4, '#', hhh.level5, '#', hhh.level6, '#', hhh.level7, '#', hhh.level8, '#', hhh.level9, '#', hhh.level10, '#', hhh.level11, '#', hhh.level12, '#', hhh.level13, '#', hhh.level14, '#') key +FROM +(SELECT +hh.level1, +hh.level1_desc, +CASE WHEN hh.leaf_level < 2 THEN hh.leaf_code ELSE hh.level2 END level2, +CASE WHEN hh.leaf_level < 2 THEN hh.leaf_desc ELSE hh.level2_desc END level2_desc, +CASE WHEN hh.leaf_level < 3 THEN hh.leaf_code ELSE hh.level3 END level3, +CASE WHEN hh.leaf_level < 3 THEN hh.leaf_desc ELSE hh.level3_desc END level3_desc, +CASE WHEN hh.leaf_level < 4 THEN hh.leaf_code ELSE hh.level4 END level4, +CASE WHEN hh.leaf_level < 4 THEN hh.leaf_desc ELSE hh.level4_desc END level4_desc, +CASE WHEN hh.leaf_level < 5 THEN hh.leaf_code ELSE hh.level5 END level5, +CASE WHEN hh.leaf_level < 5 THEN hh.leaf_desc ELSE hh.level5_desc END level5_desc, +CASE WHEN hh.leaf_level < 6 THEN hh.leaf_code ELSE hh.level6 END level6, +CASE WHEN hh.leaf_level < 6 THEN hh.leaf_desc ELSE hh.level6_desc END level6_desc, +CASE WHEN hh.leaf_level < 7 THEN hh.leaf_code ELSE hh.level7 END level7, +CASE WHEN hh.leaf_level < 7 THEN hh.leaf_desc ELSE hh.level7_desc END level7_desc, +CASE WHEN hh.leaf_level < 8 THEN hh.leaf_code ELSE hh.level8 END level8, +CASE WHEN hh.leaf_level < 8 THEN hh.leaf_desc ELSE hh.level8_desc END level8_desc, +CASE WHEN hh.leaf_level < 9 THEN hh.leaf_code ELSE hh.level9 END level9, +CASE WHEN hh.leaf_level < 9 THEN hh.leaf_desc ELSE hh.level9_desc END level9_desc, +CASE WHEN hh.leaf_level < 10 THEN hh.leaf_code ELSE hh.level10 END level10, +CASE WHEN hh.leaf_level < 10 THEN hh.leaf_desc ELSE hh.level10_desc END level10_desc, +CASE WHEN hh.leaf_level < 11 THEN hh.leaf_code ELSE hh.level11 END level11, +CASE WHEN hh.leaf_level < 11 THEN hh.leaf_desc ELSE hh.level11_desc END level11_desc, +CASE WHEN hh.leaf_level < 12 THEN hh.leaf_code ELSE hh.level12 END level12, +CASE WHEN hh.leaf_level < 12 THEN hh.leaf_desc ELSE hh.level12_desc END level12_desc, +CASE WHEN hh.leaf_level < 13 THEN hh.leaf_code ELSE hh.level13 END level13, +CASE WHEN hh.leaf_level < 13 THEN hh.leaf_desc ELSE hh.level13_desc END level13_desc, +CASE WHEN hh.leaf_level < 14 THEN hh.leaf_code ELSE hh.level14 END level14, +CASE WHEN hh.leaf_level < 14 THEN hh.leaf_desc ELSE hh.level14_desc END level14_desc, +hh.pack, +hh.aeoi, +hh.aeoo, +hh.data_date +FROM +(SELECT +h.*, +lc.leaf_level, +lc.leaf_code, +CASE lc.leaf_code +WHEN h.level2 THEN h.level2_desc +WHEN h.level3 THEN h.level3_desc +WHEN h.level4 THEN h.level4_desc +WHEN h.level5 THEN h.level5_desc +WHEN h.level6 THEN h.level6_desc +WHEN h.level7 THEN h.level7_desc +WHEN h.level8 THEN h.level8_desc +WHEN h.level9 THEN h.level9_desc +WHEN h.level10 THEN h.level10_desc +WHEN h.level11 THEN h.level11_desc +WHEN h.level12 THEN h.level12_desc +WHEN h.level13 THEN h.level13_desc +ELSE NULL +END leaf_desc +FROM +(SELECT MAX(data_date) data_as_of_date, level1 FROM db1.table1 WHERE level1 = 'N00000' GROUP BY level1) d +JOIN db1.table1 h +ON (h.data_date = d.data_as_of_date +AND h.level1 = d.level1), +(SELECT +l.leaf_level, +'TM1-5460' leaf_code +FROM +(SELECT 3 leaf_level +UNION ALL SELECT 4 leaf_level +UNION ALL SELECT 5 leaf_level +UNION ALL SELECT 6 leaf_level +UNION ALL SELECT 7 leaf_level +UNION ALL SELECT 8 leaf_level +UNION ALL SELECT 9 leaf_level +UNION ALL SELECT 10 leaf_level +UNION ALL SELECT 11 leaf_level +UNION ALL SELECT 12 leaf_level +UNION ALL SELECT 13 leaf_level) l) lc +WHERE +lc.leaf_code = +CASE +WHEN lc.leaf_level = 2 THEN h.level2 +WHEN lc.leaf_level = 3 THEN h.level3 +WHEN lc.leaf_level = 4 THEN h.level4 +WHEN lc.leaf_level = 5 THEN h.level5 +WHEN lc.leaf_level = 6 THEN h.level6 +WHEN lc.leaf_level = 7 THEN h.level7 +WHEN lc.leaf_level = 8 THEN h.level8 +WHEN lc.leaf_level = 9 THEN h.level9 +WHEN lc.leaf_level = 10 THEN h.level10 +WHEN lc.leaf_level = 11 THEN h.level11 +WHEN lc.leaf_level = 12 THEN h.level12 +WHEN lc.leaf_level = 13 THEN h.level13 +END +AND h.level14 <> lc.leaf_code) hh) hhh +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: db1@table1 +POSTHOOK: Output: database:db1 +POSTHOOK: Output: db1@view1 +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Create View + expanded text: SELECT +`h`.`level1`, +`h`.`level1_desc`, +`h`.`level2`, +`h`.`level2_desc`, +`h`.`level3`, +`h`.`level3_desc`, +`h`.`level4`, +`h`.`level4_desc`, +`h`.`level5`, +`h`.`level5_desc`, +`h`.`level6`, +`h`.`level6_desc`, +`h`.`level7`, +`h`.`level7_desc`, +`h`.`level8`, +`h`.`level8_desc`, +`h`.`level9`, +`h`.`level9_desc`, +`h`.`level10`, +`h`.`level10_desc`, +`h`.`level11`, +`h`.`level11_desc`, +`h`.`level12`, +`h`.`level12_desc`, +`h`.`level13`, +`h`.`level13_desc`, +`h`.`level14`, +`h`.`level14_desc`, +`h`.`pack`, +`h`.`aeoi`, +`h`.`aeoo`, +`h`.`data_date`, +CONCAT('#', `h`.`level1`, '#', `h`.`level2`, '#', `h`.`level3`, '#', `h`.`level4`, '#', `h`.`level5`, '#', `h`.`level6`, '#', `h`.`level7`, '#', `h`.`level8`, '#', `h`.`level9`, '#', `h`.`level10`, '#', `h`.`level11`, '#', `h`.`level12`, '#', `h`.`level13`, '#', `h`.`level14`, '#') `key` +FROM +(SELECT MAX(`table1`.`data_date`) `data_as_of_date`, `table1`.`level1` FROM `db1`.`table1` WHERE `table1`.`level1` = 'N00000' GROUP BY `table1`.`level1`) `d` +JOIN `db1`.`table1` `h` +ON (`h`.`data_date` = `d`.`data_as_of_date` +AND `h`.`level1` = `d`.`level1`) +WHERE +'TM1-5460' NOT IN (`h`.`level1`, `h`.`level2`, `h`.`level3`, `h`.`level4`, `h`.`level5`, `h`.`level6`, `h`.`level7`, `h`.`level8`, `h`.`level9`, `h`.`level10`, `h`.`level11`, `h`.`level12`, `h`.`level13`, `h`.`level14`, `h`.`level15`) +UNION ALL SELECT DISTINCT +`hhh`.`level1`, +`hhh`.`level1_desc`, +`hhh`.`level2`, +`hhh`.`level2_desc`, +`hhh`.`level3`, +`hhh`.`level3_desc`, +`hhh`.`level4`, +`hhh`.`level4_desc`, +`hhh`.`level5`, +`hhh`.`level5_desc`, +`hhh`.`level6`, +`hhh`.`level6_desc`, +`hhh`.`level7`, +`hhh`.`level7_desc`, +`hhh`.`level8`, +`hhh`.`level8_desc`, +`hhh`.`level9`, +`hhh`.`level9_desc`, +`hhh`.`level10`, +`hhh`.`level10_desc`, +`hhh`.`level11`, +`hhh`.`level11_desc`, +`hhh`.`level12`, +`hhh`.`level12_desc`, +`hhh`.`level13`, +`hhh`.`level13_desc`, +`hhh`.`level14`, +`hhh`.`level14_desc`, +`hhh`.`pack`, +`hhh`.`aeoi`, +`hhh`.`aeoo`, +`hhh`.`data_date`, +CONCAT('#', `hhh`.`level1`, '#', `hhh`.`level2`, '#', `hhh`.`level3`, '#', `hhh`.`level4`, '#', `hhh`.`level5`, '#', `hhh`.`level6`, '#', `hhh`.`level7`, '#', `hhh`.`level8`, '#', `hhh`.`level9`, '#', `hhh`.`level10`, '#', `hhh`.`level11`, '#', `hhh`.`level12`, '#', `hhh`.`level13`, '#', `hhh`.`level14`, '#') `key` +FROM +(SELECT +`hh`.`level1`, +`hh`.`level1_desc`, +CASE WHEN `hh`.`leaf_level` < 2 THEN `hh`.`leaf_code` ELSE `hh`.`level2` END `level2`, +CASE WHEN `hh`.`leaf_level` < 2 THEN `hh`.`leaf_desc` ELSE `hh`.`level2_desc` END `level2_desc`, +CASE WHEN `hh`.`leaf_level` < 3 THEN `hh`.`leaf_code` ELSE `hh`.`level3` END `level3`, +CASE WHEN `hh`.`leaf_level` < 3 THEN `hh`.`leaf_desc` ELSE `hh`.`level3_desc` END `level3_desc`, +CASE WHEN `hh`.`leaf_level` < 4 THEN `hh`.`leaf_code` ELSE `hh`.`level4` END `level4`, +CASE WHEN `hh`.`leaf_level` < 4 THEN `hh`.`leaf_desc` ELSE `hh`.`level4_desc` END `level4_desc`, +CASE WHEN `hh`.`leaf_level` < 5 THEN `hh`.`leaf_code` ELSE `hh`.`level5` END `level5`, +CASE WHEN `hh`.`leaf_level` < 5 THEN `hh`.`leaf_desc` ELSE `hh`.`level5_desc` END `level5_desc`, +CASE WHEN `hh`.`leaf_level` < 6 THEN `hh`.`leaf_code` ELSE `hh`.`level6` END `level6`, +CASE WHEN `hh`.`leaf_level` < 6 THEN `hh`.`leaf_desc` ELSE `hh`.`level6_desc` END `level6_desc`, +CASE WHEN `hh`.`leaf_level` < 7 THEN `hh`.`leaf_code` ELSE `hh`.`level7` END `level7`, +CASE WHEN `hh`.`leaf_level` < 7 THEN `hh`.`leaf_desc` ELSE `hh`.`level7_desc` END `level7_desc`, +CASE WHEN `hh`.`leaf_level` < 8 THEN `hh`.`leaf_code` ELSE `hh`.`level8` END `level8`, +CASE WHEN `hh`.`leaf_level` < 8 THEN `hh`.`leaf_desc` ELSE `hh`.`level8_desc` END `level8_desc`, +CASE WHEN `hh`.`leaf_level` < 9 THEN `hh`.`leaf_code` ELSE `hh`.`level9` END `level9`, +CASE WHEN `hh`.`leaf_level` < 9 THEN `hh`.`leaf_desc` ELSE `hh`.`level9_desc` END `level9_desc`, +CASE WHEN `hh`.`leaf_level` < 10 THEN `hh`.`leaf_code` ELSE `hh`.`level10` END `level10`, +CASE WHEN `hh`.`leaf_level` < 10 THEN `hh`.`leaf_desc` ELSE `hh`.`level10_desc` END `level10_desc`, +CASE WHEN `hh`.`leaf_level` < 11 THEN `hh`.`leaf_code` ELSE `hh`.`level11` END `level11`, +CASE WHEN `hh`.`leaf_level` < 11 THEN `hh`.`leaf_desc` ELSE `hh`.`level11_desc` END `level11_desc`, +CASE WHEN `hh`.`leaf_level` < 12 THEN `hh`.`leaf_code` ELSE `hh`.`level12` END `level12`, +CASE WHEN `hh`.`leaf_level` < 12 THEN `hh`.`leaf_desc` ELSE `hh`.`level12_desc` END `level12_desc`, +CASE WHEN `hh`.`leaf_level` < 13 THEN `hh`.`leaf_code` ELSE `hh`.`level13` END `level13`, +CASE WHEN `hh`.`leaf_level` < 13 THEN `hh`.`leaf_desc` ELSE `hh`.`level13_desc` END `level13_desc`, +CASE WHEN `hh`.`leaf_level` < 14 THEN `hh`.`leaf_code` ELSE `hh`.`level14` END `level14`, +CASE WHEN `hh`.`leaf_level` < 14 THEN `hh`.`leaf_desc` ELSE `hh`.`level14_desc` END `level14_desc`, +`hh`.`pack`, +`hh`.`aeoi`, +`hh`.`aeoo`, +`hh`.`data_date` +FROM +(SELECT +`h`.`level1`, `h`.`level1_desc`, `h`.`level2`, `h`.`level2_desc`, `h`.`level3`, `h`.`level3_desc`, `h`.`level4`, `h`.`level4_desc`, `h`.`level5`, `h`.`level5_desc`, `h`.`level6`, `h`.`level6_desc`, `h`.`level7`, `h`.`level7_desc`, `h`.`level8`, `h`.`level8_desc`, `h`.`level9`, `h`.`level9_desc`, `h`.`level10`, `h`.`level10_desc`, `h`.`level11`, `h`.`level11_desc`, `h`.`level12`, `h`.`level12_desc`, `h`.`level13`, `h`.`level13_desc`, `h`.`level14`, `h`.`level14_desc`, `h`.`level15`, `h`.` [...] +`lc`.`leaf_level`, +`lc`.`leaf_code`, +CASE `lc`.`leaf_code` +WHEN `h`.`level2` THEN `h`.`level2_desc` +WHEN `h`.`level3` THEN `h`.`level3_desc` +WHEN `h`.`level4` THEN `h`.`level4_desc` +WHEN `h`.`level5` THEN `h`.`level5_desc` +WHEN `h`.`level6` THEN `h`.`level6_desc` +WHEN `h`.`level7` THEN `h`.`level7_desc` +WHEN `h`.`level8` THEN `h`.`level8_desc` +WHEN `h`.`level9` THEN `h`.`level9_desc` +WHEN `h`.`level10` THEN `h`.`level10_desc` +WHEN `h`.`level11` THEN `h`.`level11_desc` +WHEN `h`.`level12` THEN `h`.`level12_desc` +WHEN `h`.`level13` THEN `h`.`level13_desc` +ELSE NULL +END `leaf_desc` +FROM +(SELECT MAX(`table1`.`data_date`) `data_as_of_date`, `table1`.`level1` FROM `db1`.`table1` WHERE `table1`.`level1` = 'N00000' GROUP BY `table1`.`level1`) `d` +JOIN `db1`.`table1` `h` +ON (`h`.`data_date` = `d`.`data_as_of_date` +AND `h`.`level1` = `d`.`level1`), +(SELECT +`l`.`leaf_level`, +'TM1-5460' `leaf_code` +FROM +(SELECT 3 `leaf_level` +UNION ALL SELECT 4 `leaf_level` +UNION ALL SELECT 5 `leaf_level` +UNION ALL SELECT 6 `leaf_level` +UNION ALL SELECT 7 `leaf_level` +UNION ALL SELECT 8 `leaf_level` +UNION ALL SELECT 9 `leaf_level` +UNION ALL SELECT 10 `leaf_level` +UNION ALL SELECT 11 `leaf_level` +UNION ALL SELECT 12 `leaf_level` +UNION ALL SELECT 13 `leaf_level`) `l`) `lc` +WHERE +`lc`.`leaf_code` = +CASE +WHEN `lc`.`leaf_level` = 2 THEN `h`.`level2` +WHEN `lc`.`leaf_level` = 3 THEN `h`.`level3` +WHEN `lc`.`leaf_level` = 4 THEN `h`.`level4` +WHEN `lc`.`leaf_level` = 5 THEN `h`.`level5` +WHEN `lc`.`leaf_level` = 6 THEN `h`.`level6` +WHEN `lc`.`leaf_level` = 7 THEN `h`.`level7` +WHEN `lc`.`leaf_level` = 8 THEN `h`.`level8` +WHEN `lc`.`leaf_level` = 9 THEN `h`.`level9` +WHEN `lc`.`leaf_level` = 10 THEN `h`.`level10` +WHEN `lc`.`leaf_level` = 11 THEN `h`.`level11` +WHEN `lc`.`leaf_level` = 12 THEN `h`.`level12` +WHEN `lc`.`leaf_level` = 13 THEN `h`.`level13` +END +AND `h`.`level14` <> `lc`.`leaf_code`) `hh`) `hhh` + if not exists: true + original text: SELECT +h.level1, +h.level1_desc, +h.level2, +h.level2_desc, +h.level3, +h.level3_desc, +h.level4, +h.level4_desc, +h.level5, +h.level5_desc, +h.level6, +h.level6_desc, +h.level7, +h.level7_desc, +h.level8, +h.level8_desc, +h.level9, +h.level9_desc, +h.level10, +h.level10_desc, +h.level11, +h.level11_desc, +h.level12, +h.level12_desc, +h.level13, +h.level13_desc, +h.level14, +h.level14_desc, +h.pack, +h.aeoi, +h.aeoo, +h.data_date, +CONCAT('#', h.level1, '#', h.level2, '#', h.level3, '#', h.level4, '#', h.level5, '#', h.level6, '#', h.level7, '#', h.level8, '#', h.level9, '#', h.level10, '#', h.level11, '#', h.level12, '#', h.level13, '#', h.level14, '#') key +FROM +(SELECT MAX(data_date) data_as_of_date, level1 FROM db1.table1 WHERE level1 = 'N00000' GROUP BY level1) d +JOIN db1.table1 h +ON (h.data_date = d.data_as_of_date +AND h.level1 = d.level1) +WHERE +'TM1-5460' NOT IN (h.level1, h.level2, h.level3, h.level4, h.level5, h.level6, h.level7, h.level8, h.level9, h.level10, h.level11, h.level12, h.level13, h.level14, h.level15) +UNION ALL SELECT DISTINCT +hhh.level1, +hhh.level1_desc, +hhh.level2, +hhh.level2_desc, +hhh.level3, +hhh.level3_desc, +hhh.level4, +hhh.level4_desc, +hhh.level5, +hhh.level5_desc, +hhh.level6, +hhh.level6_desc, +hhh.level7, +hhh.level7_desc, +hhh.level8, +hhh.level8_desc, +hhh.level9, +hhh.level9_desc, +hhh.level10, +hhh.level10_desc, +hhh.level11, +hhh.level11_desc, +hhh.level12, +hhh.level12_desc, +hhh.level13, +hhh.level13_desc, +hhh.level14, +hhh.level14_desc, +hhh.pack, +hhh.aeoi, +hhh.aeoo, +hhh.data_date, +CONCAT('#', hhh.level1, '#', hhh.level2, '#', hhh.level3, '#', hhh.level4, '#', hhh.level5, '#', hhh.level6, '#', hhh.level7, '#', hhh.level8, '#', hhh.level9, '#', hhh.level10, '#', hhh.level11, '#', hhh.level12, '#', hhh.level13, '#', hhh.level14, '#') key +FROM +(SELECT +hh.level1, +hh.level1_desc, +CASE WHEN hh.leaf_level < 2 THEN hh.leaf_code ELSE hh.level2 END level2, +CASE WHEN hh.leaf_level < 2 THEN hh.leaf_desc ELSE hh.level2_desc END level2_desc, +CASE WHEN hh.leaf_level < 3 THEN hh.leaf_code ELSE hh.level3 END level3, +CASE WHEN hh.leaf_level < 3 THEN hh.leaf_desc ELSE hh.level3_desc END level3_desc, +CASE WHEN hh.leaf_level < 4 THEN hh.leaf_code ELSE hh.level4 END level4, +CASE WHEN hh.leaf_level < 4 THEN hh.leaf_desc ELSE hh.level4_desc END level4_desc, +CASE WHEN hh.leaf_level < 5 THEN hh.leaf_code ELSE hh.level5 END level5, +CASE WHEN hh.leaf_level < 5 THEN hh.leaf_desc ELSE hh.level5_desc END level5_desc, +CASE WHEN hh.leaf_level < 6 THEN hh.leaf_code ELSE hh.level6 END level6, +CASE WHEN hh.leaf_level < 6 THEN hh.leaf_desc ELSE hh.level6_desc END level6_desc, +CASE WHEN hh.leaf_level < 7 THEN hh.leaf_code ELSE hh.level7 END level7, +CASE WHEN hh.leaf_level < 7 THEN hh.leaf_desc ELSE hh.level7_desc END level7_desc, +CASE WHEN hh.leaf_level < 8 THEN hh.leaf_code ELSE hh.level8 END level8, +CASE WHEN hh.leaf_level < 8 THEN hh.leaf_desc ELSE hh.level8_desc END level8_desc, +CASE WHEN hh.leaf_level < 9 THEN hh.leaf_code ELSE hh.level9 END level9, +CASE WHEN hh.leaf_level < 9 THEN hh.leaf_desc ELSE hh.level9_desc END level9_desc, +CASE WHEN hh.leaf_level < 10 THEN hh.leaf_code ELSE hh.level10 END level10, +CASE WHEN hh.leaf_level < 10 THEN hh.leaf_desc ELSE hh.level10_desc END level10_desc, +CASE WHEN hh.leaf_level < 11 THEN hh.leaf_code ELSE hh.level11 END level11, +CASE WHEN hh.leaf_level < 11 THEN hh.leaf_desc ELSE hh.level11_desc END level11_desc, +CASE WHEN hh.leaf_level < 12 THEN hh.leaf_code ELSE hh.level12 END level12, +CASE WHEN hh.leaf_level < 12 THEN hh.leaf_desc ELSE hh.level12_desc END level12_desc, +CASE WHEN hh.leaf_level < 13 THEN hh.leaf_code ELSE hh.level13 END level13, +CASE WHEN hh.leaf_level < 13 THEN hh.leaf_desc ELSE hh.level13_desc END level13_desc, +CASE WHEN hh.leaf_level < 14 THEN hh.leaf_code ELSE hh.level14 END level14, +CASE WHEN hh.leaf_level < 14 THEN hh.leaf_desc ELSE hh.level14_desc END level14_desc, +hh.pack, +hh.aeoi, +hh.aeoo, +hh.data_date +FROM +(SELECT +h.*, +lc.leaf_level, +lc.leaf_code, +CASE lc.leaf_code +WHEN h.level2 THEN h.level2_desc +WHEN h.level3 THEN h.level3_desc +WHEN h.level4 THEN h.level4_desc +WHEN h.level5 THEN h.level5_desc +WHEN h.level6 THEN h.level6_desc +WHEN h.level7 THEN h.level7_desc +WHEN h.level8 THEN h.level8_desc +WHEN h.level9 THEN h.level9_desc +WHEN h.level10 THEN h.level10_desc +WHEN h.level11 THEN h.level11_desc +WHEN h.level12 THEN h.level12_desc +WHEN h.level13 THEN h.level13_desc +ELSE NULL +END leaf_desc +FROM +(SELECT MAX(data_date) data_as_of_date, level1 FROM db1.table1 WHERE level1 = 'N00000' GROUP BY level1) d +JOIN db1.table1 h +ON (h.data_date = d.data_as_of_date +AND h.level1 = d.level1), +(SELECT +l.leaf_level, +'TM1-5460' leaf_code +FROM +(SELECT 3 leaf_level +UNION ALL SELECT 4 leaf_level +UNION ALL SELECT 5 leaf_level +UNION ALL SELECT 6 leaf_level +UNION ALL SELECT 7 leaf_level +UNION ALL SELECT 8 leaf_level +UNION ALL SELECT 9 leaf_level +UNION ALL SELECT 10 leaf_level +UNION ALL SELECT 11 leaf_level +UNION ALL SELECT 12 leaf_level +UNION ALL SELECT 13 leaf_level) l) lc +WHERE +lc.leaf_code = +CASE +WHEN lc.leaf_level = 2 THEN h.level2 +WHEN lc.leaf_level = 3 THEN h.level3 +WHEN lc.leaf_level = 4 THEN h.level4 +WHEN lc.leaf_level = 5 THEN h.level5 +WHEN lc.leaf_level = 6 THEN h.level6 +WHEN lc.leaf_level = 7 THEN h.level7 +WHEN lc.leaf_level = 8 THEN h.level8 +WHEN lc.leaf_level = 9 THEN h.level9 +WHEN lc.leaf_level = 10 THEN h.level10 +WHEN lc.leaf_level = 11 THEN h.level11 +WHEN lc.leaf_level = 12 THEN h.level12 +WHEN lc.leaf_level = 13 THEN h.level13 +END +AND h.level14 <> lc.leaf_code) hh) hhh + columns: level1 string, level1_desc string, level2 string, level2_desc string, level3 string, level3_desc string, level4 string, level4_desc string, level5 string, level5_desc string, level6 string, level6_desc string, level7 string, level7_desc string, level8 string, level8_desc string, level9 string, level9_desc string, level10 string, level10_desc string, level11 string, level11_desc string, level12 string, level12_desc string, level13 string, level13_desc string, level14 string [...] + name: db1.view1 + diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out index 11076f0..7481f88 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[286][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[292][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[291][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 15' is a cross product -Warning: Shuffle Join MERGEJOIN[294][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[288][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[294][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[293][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 15' is a cross product +Warning: Shuffle Join MERGEJOIN[296][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain cbo with my_customers as ( select distinct c_customer_sk @@ -135,9 +135,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(segment=[$0], num_customers=[$1], segment_base=[*($0, 50)]) HiveAggregate(group=[{0}], agg#0=[count()]) HiveProject(segment=[CAST(/($1, 50:DECIMAL(10, 0))):INTEGER]) - HiveAggregate(group=[{5}], agg#0=[sum($2)]) + HiveAggregate(group=[{10}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($10, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_sales_price=[$15]) @@ -161,37 +161,37 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject($f0=[+($3, 1)]) HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f1=[$1], ca_address_sk=[$2], ca_county=[$3], ca_state=[$4], s_county=[$5], s_state=[$6]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveSemiJoin(condition=[=($0, $2)], joinType=[semi]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(cs_bill_customer_sk=[$1]) - HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) - HiveUnion(all=[true]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[AND(=($10, _UTF-16LE'consignment'), =($12, _UTF-16LE'Jewelry'))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4]) - HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($8))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(s_county=[$23], s_state=[$24]) - HiveFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($24))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], $f0=[$5], $f1=[$6]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($8))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_county=[$23], s_state=[$24]) + HiveFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($24))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1]) + HiveSemiJoin(condition=[=($0, $2)], joinType=[semi]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(cs_bill_customer_sk=[$1]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) + HiveUnion(all=[true]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[AND(=($10, _UTF-16LE'consignment'), =($12, _UTF-16LE'Jewelry'))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0], d_month_seq=[$1], $f0=[$2], $f00=[$3]) HiveJoin(condition=[<=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[<=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out index 1e2b7af..fd34cf0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[286][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[292][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[291][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 15' is a cross product -Warning: Shuffle Join MERGEJOIN[294][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[288][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[294][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[293][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 15' is a cross product +Warning: Shuffle Join MERGEJOIN[296][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -133,8 +133,8 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 24 <- Reducer 22 (BROADCAST_EDGE), Union 25 (CONTAINS) -Map 26 <- Reducer 22 (BROADCAST_EDGE), Union 25 (CONTAINS) +Map 27 <- Reducer 22 (BROADCAST_EDGE), Union 28 (CONTAINS) +Map 29 <- Reducer 22 (BROADCAST_EDGE), Union 28 (CONTAINS) Reducer 10 <- Map 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) Reducer 12 <- Map 9 (SIMPLE_EDGE) @@ -143,13 +143,13 @@ Reducer 14 <- Map 9 (SIMPLE_EDGE) Reducer 15 <- Map 9 (CUSTOM_SIMPLE_EDGE), Reducer 14 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 17 (CUSTOM_SIMPLE_EDGE) Reducer 17 <- Map 9 (SIMPLE_EDGE) -Reducer 18 <- Map 9 (SIMPLE_EDGE), Union 25 (SIMPLE_EDGE) -Reducer 19 <- Map 27 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 18 <- Map 9 (SIMPLE_EDGE), Union 28 (SIMPLE_EDGE) +Reducer 19 <- Map 30 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 11 (CUSTOM_SIMPLE_EDGE) -Reducer 20 <- Map 23 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) +Reducer 20 <- Map 26 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) Reducer 22 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 28 (SIMPLE_EDGE), Map 30 (SIMPLE_EDGE) +Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) Reducer 3 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) @@ -162,37 +162,37 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_377] - Limit [LIM_376] (rows=1 width=16) + File Output Operator [FS_379] + Limit [LIM_378] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_375] (rows=1 width=16) + Select Operator [SEL_377] (rows=1 width=16) Output:["_col0","_col1","_col2"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_374] - Select Operator [SEL_373] (rows=1 width=16) + SHUFFLE [RS_376] + Select Operator [SEL_375] (rows=1 width=16) Output:["_col0","_col1","_col2"] - Top N Key Operator [TNK_372] (rows=1 width=12) + Top N Key Operator [TNK_374] (rows=1 width=12) keys:_col0, _col1,top n:100 - Group By Operator [GBY_371] (rows=1 width=12) + Group By Operator [GBY_373] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_370] + SHUFFLE [RS_372] PartitionCols:_col0 - Group By Operator [GBY_369] (rows=1 width=12) + Group By Operator [GBY_371] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_368] (rows=1 width=116) + Select Operator [SEL_370] (rows=1 width=116) Output:["_col0"] - Top N Key Operator [TNK_367] (rows=1 width=116) + Top N Key Operator [TNK_369] (rows=1 width=116) keys:UDFToInteger((_col1 / 50)),top n:100 - Group By Operator [GBY_366] (rows=1 width=116) + Group By Operator [GBY_368] (rows=1 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_118] PartitionCols:_col0 Group By Operator [GBY_117] (rows=228 width=116) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_296] (rows=525327388 width=110) - Conds:RS_113._col0=RS_114._col0(Inner),Output:["_col2","_col5"] + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col10 + Merge Join Operator [MERGEJOIN_298] (rows=525327388 width=110) + Conds:RS_113._col0=RS_114._col0(Inner),Output:["_col2","_col10"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_114] PartitionCols:_col0 @@ -200,222 +200,222 @@ Stage-0 Output:["_col0"] Filter Operator [FIL_102] (rows=5072841 width=12) predicate:(_col1 <= _col3) - Merge Join Operator [MERGEJOIN_294] (rows=15218525 width=12) + Merge Join Operator [MERGEJOIN_296] (rows=15218525 width=12) Conds:(Inner),Output:["_col0","_col1","_col3"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_99] Filter Operator [FIL_98] (rows=608741 width=12) predicate:(_col2 <= _col1) - Merge Join Operator [MERGEJOIN_291] (rows=1826225 width=12) + Merge Join Operator [MERGEJOIN_293] (rows=1826225 width=12) Conds:(Inner),Output:["_col0","_col1","_col2"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_327] - Select Operator [SEL_320] (rows=73049 width=8) + PARTITION_ONLY_SHUFFLE [RS_329] + Select Operator [SEL_322] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_314] (rows=73049 width=8) + Filter Operator [FIL_316] (rows=73049 width=8) predicate:d_month_seq is not null TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy","d_date_sk"] <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_363] - Group By Operator [GBY_362] (rows=25 width=4) + PARTITION_ONLY_SHUFFLE [RS_365] + Group By Operator [GBY_364] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 9 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_330] + PARTITION_ONLY_SHUFFLE [RS_332] PartitionCols:_col0 - Group By Operator [GBY_323] (rows=25 width=4) + Group By Operator [GBY_325] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_317] (rows=50 width=12) + Select Operator [SEL_319] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_311] (rows=50 width=12) + Filter Operator [FIL_313] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 3) and d_month_seq is not null) Please refer to the previous TableScan [TS_3] <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_365] - Group By Operator [GBY_364] (rows=25 width=4) + PARTITION_ONLY_SHUFFLE [RS_367] + Group By Operator [GBY_366] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 9 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_331] + PARTITION_ONLY_SHUFFLE [RS_333] PartitionCols:_col0 - Group By Operator [GBY_324] (rows=25 width=4) + Group By Operator [GBY_326] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_318] (rows=50 width=12) + Select Operator [SEL_320] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_312] (rows=50 width=12) + Filter Operator [FIL_314] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 3) and d_month_seq is not null) Please refer to the previous TableScan [TS_3] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_113] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_295] (rows=525327388 width=114) - Conds:RS_110._col1=RS_111._col0(Inner),Output:["_col0","_col2","_col5"] + Merge Join Operator [MERGEJOIN_297] (rows=525327388 width=114) + Conds:RS_110._col1=RS_111._col5(Inner),Output:["_col0","_col2","_col10"] <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_111] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_293] (rows=365 width=4) - Conds:RS_72._col1=RS_73._col0(Inner),Output:["_col0"] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_295] (rows=365 width=4) + Conds:RS_72._col0=RS_73._col1(Inner),Output:["_col5"] <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_72] + SHUFFLE [RS_73] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_289] (rows=365 width=8) - Conds:RS_355._col0=RS_70._col0(Left Semi),Output:["_col0","_col1"] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_355] + Merge Join Operator [MERGEJOIN_292] (rows=365 width=8) + Conds:RS_357._col0=RS_66._col0(Left Semi),Output:["_col0","_col1"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_357] PartitionCols:_col0 - Select Operator [SEL_354] (rows=80000000 width=8) + Select Operator [SEL_356] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_353] (rows=80000000 width=8) + Filter Operator [FIL_355] (rows=80000000 width=8) predicate:c_current_addr_sk is not null - TableScan [TS_33] (rows=80000000 width=8) + TableScan [TS_39] (rows=80000000 width=8) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_70] + SHUFFLE [RS_66] PartitionCols:_col0 - Group By Operator [GBY_68] (rows=365 width=0) + Group By Operator [GBY_64] (rows=365 width=0) Output:["_col0"],keys:_col0 - Select Operator [SEL_56] (rows=110092 width=0) + Select Operator [SEL_62] (rows=110092 width=0) Output:["_col0"] - Merge Join Operator [MERGEJOIN_288] (rows=110092 width=0) - Conds:RS_53._col2=RS_352._col0(Inner),Output:["_col1"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_352] + Merge Join Operator [MERGEJOIN_291] (rows=110092 width=0) + Conds:RS_59._col2=RS_354._col0(Inner),Output:["_col1"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_354] PartitionCols:_col0 - Select Operator [SEL_351] (rows=453 width=4) + Select Operator [SEL_353] (rows=453 width=4) Output:["_col0"] - Filter Operator [FIL_350] (rows=453 width=186) + Filter Operator [FIL_352] (rows=453 width=186) predicate:((i_class = 'consignment') and (i_category = 'Jewelry')) - TableScan [TS_47] (rows=462000 width=186) + TableScan [TS_53] (rows=462000 width=186) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_53] + SHUFFLE [RS_59] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_287] (rows=11665117 width=7) - Conds:Union 25._col0=RS_325._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_290] (rows=11665117 width=7) + Conds:Union 28._col0=RS_327._col0(Inner),Output:["_col1","_col2"] <-Map 9 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_325] + PARTITION_ONLY_SHUFFLE [RS_327] PartitionCols:_col0 - Select Operator [SEL_319] (rows=50 width=4) + Select Operator [SEL_321] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_313] (rows=50 width=12) + Filter Operator [FIL_315] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 3)) Please refer to the previous TableScan [TS_3] - <-Union 25 [SIMPLE_EDGE] - <-Map 24 [CONTAINS] vectorized - Reduce Output Operator [RS_383] + <-Union 28 [SIMPLE_EDGE] + <-Map 27 [CONTAINS] vectorized + Reduce Output Operator [RS_385] PartitionCols:_col0 - Select Operator [SEL_382] (rows=285117831 width=11) + Select Operator [SEL_384] (rows=285117831 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_381] (rows=285117831 width=11) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_297] (rows=287989836 width=11) + Filter Operator [FIL_383] (rows=285117831 width=11) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_299] (rows=287989836 width=11) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_379] - Group By Operator [GBY_378] (rows=1 width=12) + BROADCAST [RS_381] + Group By Operator [GBY_380] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_333] - Group By Operator [GBY_332] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_335] + Group By Operator [GBY_334] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_326] (rows=50 width=4) + Select Operator [SEL_328] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_319] - <-Map 26 [CONTAINS] vectorized - Reduce Output Operator [RS_386] + Please refer to the previous Select Operator [SEL_321] + <-Map 29 [CONTAINS] vectorized + Reduce Output Operator [RS_388] PartitionCols:_col0 - Select Operator [SEL_385] (rows=143930993 width=11) + Select Operator [SEL_387] (rows=143930993 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_384] (rows=143930993 width=11) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_302] (rows=144002668 width=11) + Filter Operator [FIL_386] (rows=143930993 width=11) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_304] (rows=144002668 width=11) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_380] - Please refer to the previous Group By Operator [GBY_378] - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_73] + BROADCAST [RS_382] + Please refer to the previous Group By Operator [GBY_380] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_72] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_290] (rows=39720279 width=4) - Conds:RS_358._col1, _col2=RS_361._col0, _col1(Inner),Output:["_col0"] - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_358] + Merge Join Operator [MERGEJOIN_289] (rows=39720279 width=4) + Conds:RS_360._col1, _col2=RS_363._col0, _col1(Inner),Output:["_col0"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_360] PartitionCols:_col1, _col2 - Select Operator [SEL_357] (rows=40000000 width=188) + Select Operator [SEL_359] (rows=40000000 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_356] (rows=40000000 width=188) + Filter Operator [FIL_358] (rows=40000000 width=188) predicate:(ca_county is not null and ca_state is not null) - TableScan [TS_57] (rows=40000000 width=188) + TableScan [TS_33] (rows=40000000 width=188) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_361] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_363] PartitionCols:_col0, _col1 - Select Operator [SEL_360] (rows=1704 width=184) + Select Operator [SEL_362] (rows=1704 width=184) Output:["_col0","_col1"] - Filter Operator [FIL_359] (rows=1704 width=184) + Filter Operator [FIL_361] (rows=1704 width=184) predicate:(s_county is not null and s_state is not null) - TableScan [TS_60] (rows=1704 width=184) + TableScan [TS_36] (rows=1704 width=184) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_110] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_292] (rows=525327388 width=114) + Merge Join Operator [MERGEJOIN_294] (rows=525327388 width=114) Conds:(Inner),Output:["_col0","_col1","_col2"] <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_349] - Select Operator [SEL_348] (rows=1 width=8) - Filter Operator [FIL_347] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_351] + Select Operator [SEL_350] (rows=1 width=8) + Filter Operator [FIL_349] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_346] (rows=1 width=8) + Group By Operator [GBY_348] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_345] - Group By Operator [GBY_344] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_347] + Group By Operator [GBY_346] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_343] (rows=25 width=4) - Group By Operator [GBY_342] (rows=25 width=4) + Select Operator [SEL_345] (rows=25 width=4) + Group By Operator [GBY_344] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 9 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_329] + PARTITION_ONLY_SHUFFLE [RS_331] PartitionCols:_col0 - Group By Operator [GBY_322] (rows=25 width=4) + Group By Operator [GBY_324] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_316] (rows=50 width=12) + Select Operator [SEL_318] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_310] (rows=50 width=12) + Filter Operator [FIL_312] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 3)) Please refer to the previous TableScan [TS_3] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_107] - Merge Join Operator [MERGEJOIN_286] (rows=525327388 width=114) + Merge Join Operator [MERGEJOIN_288] (rows=525327388 width=114) Conds:(Inner),Output:["_col0","_col1","_col2"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_309] - Select Operator [SEL_308] (rows=525327388 width=114) + PARTITION_ONLY_SHUFFLE [RS_311] + Select Operator [SEL_310] (rows=525327388 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_307] (rows=525327388 width=114) + Filter Operator [FIL_309] (rows=525327388 width=114) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_341] - Select Operator [SEL_340] (rows=1 width=8) - Filter Operator [FIL_339] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_343] + Select Operator [SEL_342] (rows=1 width=8) + Filter Operator [FIL_341] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_338] (rows=1 width=8) + Group By Operator [GBY_340] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_337] - Group By Operator [GBY_336] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_339] + Group By Operator [GBY_338] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_335] (rows=25 width=4) - Group By Operator [GBY_334] (rows=25 width=4) + Select Operator [SEL_337] (rows=25 width=4) + Group By Operator [GBY_336] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 9 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_328] + PARTITION_ONLY_SHUFFLE [RS_330] PartitionCols:_col0 - Group By Operator [GBY_321] (rows=25 width=4) + Group By Operator [GBY_323] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_315] (rows=50 width=12) + Select Operator [SEL_317] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_310] + Please refer to the previous Filter Operator [FIL_312]