HIVE-20920: Use SQL constraints to improve join reordering algorithm (II) (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5553c59e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5553c59e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5553c59e Branch: refs/heads/master Commit: 5553c59e343b89e092718c961062ffb9e95ec6ac Parents: ea1173a Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Wed Nov 14 20:57:16 2018 -0800 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Mon Nov 19 11:38:32 2018 -0800 ---------------------------------------------------------------------- .../calcite/stats/HiveRelMdRowCount.java | 77 +- .../perf/tez/constraints/cbo_query11.q.out | 56 +- .../perf/tez/constraints/cbo_query14.q.out | 330 ++-- .../perf/tez/constraints/cbo_query17.q.out | 32 +- .../perf/tez/constraints/cbo_query18.q.out | 56 +- .../perf/tez/constraints/cbo_query24.q.out | 46 +- .../perf/tez/constraints/cbo_query25.q.out | 32 +- .../perf/tez/constraints/cbo_query29.q.out | 32 +- .../perf/tez/constraints/cbo_query35.q.out | 15 +- .../perf/tez/constraints/cbo_query4.q.out | 72 +- .../perf/tez/constraints/cbo_query46.q.out | 50 +- .../perf/tez/constraints/cbo_query47.q.out | 66 +- .../perf/tez/constraints/cbo_query54.q.out | 62 +- .../perf/tez/constraints/cbo_query57.q.out | 40 +- .../perf/tez/constraints/cbo_query58.q.out | 35 +- .../perf/tez/constraints/cbo_query6.q.out | 44 +- .../perf/tez/constraints/cbo_query64.q.out | 230 ++- .../perf/tez/constraints/cbo_query68.q.out | 50 +- .../perf/tez/constraints/cbo_query72.q.out | 68 +- .../perf/tez/constraints/cbo_query74.q.out | 56 +- .../perf/tez/constraints/cbo_query76.q.out | 40 +- .../perf/tez/constraints/cbo_query83.q.out | 33 +- .../perf/tez/constraints/cbo_query85.q.out | 24 +- .../perf/tez/constraints/cbo_query91.q.out | 42 +- .../perf/tez/constraints/cbo_query99.q.out | 14 +- .../perf/tez/constraints/query11.q.out | 266 ++-- .../perf/tez/constraints/query14.q.out | 1500 +++++++++--------- .../perf/tez/constraints/query17.q.out | 402 ++--- .../perf/tez/constraints/query18.q.out | 276 ++-- .../perf/tez/constraints/query24.q.out | 350 ++-- .../perf/tez/constraints/query25.q.out | 406 ++--- .../perf/tez/constraints/query29.q.out | 404 ++--- .../perf/tez/constraints/query35.q.out | 246 +-- .../perf/tez/constraints/query4.q.out | 352 ++-- .../perf/tez/constraints/query45.q.out | 218 +-- .../perf/tez/constraints/query46.q.out | 256 +-- .../perf/tez/constraints/query47.q.out | 48 +- .../perf/tez/constraints/query54.q.out | 494 +++--- .../perf/tez/constraints/query57.q.out | 18 +- .../perf/tez/constraints/query58.q.out | 408 ++--- .../perf/tez/constraints/query6.q.out | 247 ++- .../perf/tez/constraints/query64.q.out | 968 +++++------ .../perf/tez/constraints/query67.q.out | 6 +- .../perf/tez/constraints/query68.q.out | 256 +-- .../perf/tez/constraints/query70.q.out | 254 +-- .../perf/tez/constraints/query72.q.out | 386 +++-- .../perf/tez/constraints/query74.q.out | 266 ++-- .../perf/tez/constraints/query76.q.out | 62 +- .../perf/tez/constraints/query83.q.out | 282 ++-- .../perf/tez/constraints/query85.q.out | 216 ++- .../perf/tez/constraints/query91.q.out | 176 +- .../perf/tez/constraints/query99.q.out | 26 +- 52 files changed, 5229 insertions(+), 5162 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/5553c59e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index 563260e..be34673 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.stats; +import com.google.common.collect.ImmutableList; import java.util.ArrayList; import java.util.List; import java.util.Set; @@ -33,6 +34,7 @@ import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelColumnOrigin; import org.apache.calcite.rel.metadata.RelMdRowCount; import org.apache.calcite.rel.metadata.RelMdUtil; import org.apache.calcite.rel.metadata.RelMetadataProvider; @@ -43,12 +45,16 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.util.BuiltInMethod; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.PKFKJoinInfo; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; @@ -57,6 +63,7 @@ public class HiveRelMdRowCount extends RelMdRowCount { protected static final Logger LOG = LoggerFactory.getLogger(HiveRelMdRowCount.class.getName()); + private static final Long HIVE_PK_FK_NO_FILTERING_COST_INCREASING_FACTOR = 1000000000000000L; public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider .reflectiveSource(BuiltInMethod.ROW_COUNT.method, new HiveRelMdRowCount()); @@ -75,7 +82,12 @@ public class HiveRelMdRowCount extends RelMdRowCount { double joinSelectivity = Math.min(1.0, constraintBasedResult.left.pkInfo.selectivity * constraintBasedResult.left.ndvScalingFactor); double residualSelectivity = RelMdUtil.guessSelectivity(constraintBasedResult.right); - double rowCount = constraintBasedResult.left.fkInfo.rowCount * joinSelectivity * residualSelectivity; + double rowCount; + if (constraintBasedResult.left.isPKSideSimple) { + rowCount = constraintBasedResult.left.pkInfo.rowCount + HIVE_PK_FK_NO_FILTERING_COST_INCREASING_FACTOR; + } else { + rowCount = constraintBasedResult.left.fkInfo.rowCount * joinSelectivity * residualSelectivity; + } if (LOG.isDebugEnabled()) { LOG.debug("Identified Primary - Foreign Key relation from constraints:\n {} {} Row count for join: {}\n" + " Join selectivity: {}\n Residual selectivity: {}\n", RelOptUtil.toString(join), constraintBasedResult.left, @@ -274,8 +286,8 @@ public class HiveRelMdRowCount extends RelMdRowCount { } int pkSide = leftIsKey ? 0 : 1; - boolean isPKSideSimpleTree = leftIsKey ? SimpleTreeOnJoinKey.check(false, left, lBitSet, mq) : - SimpleTreeOnJoinKey.check(false, right, rBitSet, mq); + boolean isPKSideSimpleTree = leftIsKey ? SimpleTreeOnJoinKey.check(false, left, lBitSet, mq).left : + SimpleTreeOnJoinKey.check(false, right, rBitSet, mq).left; double leftNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(left, lBitSet, leftPred) : -1; double rightNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(right, rBitSet, rightPred) : -1; @@ -389,8 +401,10 @@ public class HiveRelMdRowCount extends RelMdRowCount { // 4) Extract additional information on the PK-FK relationship int pkSide = leftIsKey ? 0 : 1; - boolean isPKSideSimpleTree = leftIsKey ? SimpleTreeOnJoinKey.check(true, left, lBitSet, mq) : + Pair<Boolean,Boolean> simpleTree = leftIsKey ? SimpleTreeOnJoinKey.check(true, left, lBitSet, mq) : SimpleTreeOnJoinKey.check(true, right, rBitSet, mq); + boolean isPKSideSimpleTree = simpleTree.left; + boolean isNoFilteringPKSideTree = simpleTree.right; RexBuilder rexBuilder = join.getCluster().getRexBuilder(); RexNode leftPred = RexUtil.composeConjunction( rexBuilder, leftFilters, true); @@ -415,10 +429,7 @@ public class HiveRelMdRowCount extends RelMdRowCount { join.getJoinType().generatesNullsOnRight() ? 1.0 : pkSelectivity); double ndvScalingFactor = isPKSideSimpleTree ? leftNDV/rightNDV : 1.0; - if (isPKSideSimpleTree) { - ndvScalingFactor = leftNDV/rightNDV; - } - return Pair.of(new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree), + return Pair.of(new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isNoFilteringPKSideTree), residualCond); } else { // pkSide == 1 FKSideInfo fkInfo = new FKSideInfo(leftRowCount, @@ -429,7 +440,7 @@ public class HiveRelMdRowCount extends RelMdRowCount { join.getJoinType().generatesNullsOnLeft() ? 1.0 : pkSelectivity); double ndvScalingFactor = isPKSideSimpleTree ? rightNDV/leftNDV : 1.0; - return Pair.of(new PKFKRelationInfo(0, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree), + return Pair.of(new PKFKRelationInfo(0, fkInfo, pkInfo, ndvScalingFactor, isNoFilteringPKSideTree), residualCond); } } @@ -531,12 +542,13 @@ public class HiveRelMdRowCount extends RelMdRowCount { boolean constraintsBased; ImmutableBitSet joinKey; boolean simpleTree; + boolean nonFilteringTree; RelMetadataQuery mq; - static boolean check(boolean constraintsBased, RelNode r, ImmutableBitSet joinKey, RelMetadataQuery mq) { + static Pair<Boolean,Boolean> check(boolean constraintsBased, RelNode r, ImmutableBitSet joinKey, RelMetadataQuery mq) { SimpleTreeOnJoinKey v = new SimpleTreeOnJoinKey(constraintsBased, joinKey, mq); v.go(r); - return v.simpleTree; + return Pair.of(v.simpleTree, v.nonFilteringTree); } SimpleTreeOnJoinKey(boolean constraintsBased, ImmutableBitSet joinKey, RelMetadataQuery mq) { @@ -545,6 +557,7 @@ public class HiveRelMdRowCount extends RelMdRowCount { this.joinKey = joinKey; this.mq = mq; simpleTree = true; + nonFilteringTree = true; } @Override @@ -558,10 +571,19 @@ public class HiveRelMdRowCount extends RelMdRowCount { simpleTree = true; } else if (node instanceof Project) { simpleTree = isSimple((Project) node); + nonFilteringTree &= simpleTree; } else if (node instanceof Filter) { - simpleTree = isSimple((Filter) node, mq); + // Remove is not null from condition if it does not filter anything. + Filter filterOp = (Filter) node; + List<RexNode> conjs = extractFilterPreds(filterOp); + ImmutableBitSet condBits = RelOptUtil.InputFinder.bits(conjs, null); + // For simple tree, we want to know whether filter is only on + // key columns. + simpleTree = isSimple(condBits, filterOp, mq); + nonFilteringTree &= conjs.isEmpty(); } else { simpleTree = false; + nonFilteringTree = false; } if (simpleTree) { @@ -582,14 +604,41 @@ public class HiveRelMdRowCount extends RelMdRowCount { return true; } - private boolean isSimple(Filter filter, RelMetadataQuery mq) { - ImmutableBitSet condBits = RelOptUtil.InputFinder.bits(filter.getCondition()); + private boolean isSimple(ImmutableBitSet condBits, Filter filter, RelMetadataQuery mq) { + // Returns whether the filter is only applied on the key columns if (constraintsBased) { return mq.areColumnsUnique(filter, condBits); } return isKey(condBits, filter, mq); } + private List<RexNode> extractFilterPreds(Filter filterOp) { + List<RexNode> conjs = new ArrayList<>(); + for (RexNode r : HiveRelOptUtil.conjunctions(filterOp.getCondition())) { + if (r.getKind() == SqlKind.IS_NOT_NULL) { + RexCall isNotNullNode = (RexCall) r; + if (RexUtil.isReferenceOrAccess(isNotNullNode.getOperands().get(0), true)) { + ImmutableBitSet ref = RelOptUtil.InputFinder.bits(isNotNullNode); + RelColumnOrigin co = mq.getColumnOrigin(filterOp, ref.nextSetBit(0)); + if (co == null) { + // We add it back + conjs.add(r); + continue; + } + RelOptHiveTable table = (RelOptHiveTable) co.getOriginTable(); + List<ColStatistics> colStats = table.getColStat(ImmutableList.of(co.getOriginColumnOrdinal()), true); + if (colStats == null || colStats.isEmpty() || colStats.get(0).getNumNulls() != 0) { + // We add it back + conjs.add(r); + } + } + } else { + conjs.add(r); + } + } + return conjs; + } + } } http://git-wip-us.apache.org/repos/asf/hive/blob/5553c59e/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out index 24bd6fd..f315e3c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out @@ -159,7 +159,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(c_preferred_cust_flag=[$1]) - HiveJoin(condition=[AND(=($0, $5), CASE(CAST(IS NOT NULL($6)):BOOLEAN, CASE($9, >(/($4, $8), /($2, $6)), >(null, /($2, $6))), CASE($9, >(/($4, $8), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($0, $8), CASE(CAST(IS NOT NULL($9)):BOOLEAN, CASE($7, >(/($4, $6), /($2, $9)), >(null, /($2, $9))), CASE($7, >(/($4, $6), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f4=[$3], $f9=[$7]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -172,20 +172,33 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f8=[$7]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f8=[$7]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveFilter(condition=[>($7, 0)]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f9=[$7]) HiveFilter(condition=[>($7, 0)]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) @@ -199,17 +212,4 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2001)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0)]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) http://git-wip-us.apache.org/repos/asf/hive/blob/5553c59e/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out index 457cdce..9abcb05 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out @@ -1,9 +1,9 @@ -Warning: Shuffle Join MERGEJOIN[1431][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[1443][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[1433][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 13' is a cross product -Warning: Shuffle Join MERGEJOIN[1456][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 14' is a cross product -Warning: Shuffle Join MERGEJOIN[1435][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 18' is a cross product -Warning: Shuffle Join MERGEJOIN[1469][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 19' is a cross product +Warning: Shuffle Join MERGEJOIN[1458][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[1470][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[1460][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 13' is a cross product +Warning: Shuffle Join MERGEJOIN[1483][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[1462][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 18' is a cross product +Warning: Shuffle Join MERGEJOIN[1496][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 19' is a cross product PREHOOK: query: explain cbo with cross_items as (select i_item_sk ss_item_sk @@ -232,60 +232,60 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveJoin(condition=[>($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) - HiveProject($f0=[$2], $f1=[$3], $f2=[$4], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$0]) - HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[=($3, 3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveUnion(all=[true]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) - HiveTableScan(table=[[default, item]], table:alias=[iss]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) - HiveTableScan(table=[[default, item]], table:alias=[ics]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) - HiveTableScan(table=[[default, item]], table:alias=[iws]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0]) + HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_quantity=[$10], ss_list_price=[$12]) HiveFilter(condition=[IS NOT NULL($0)]) @@ -359,60 +359,60 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveJoin(condition=[>($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) - HiveProject($f0=[$2], $f1=[$3], $f2=[$4], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$0]) - HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[=($3, 3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveUnion(all=[true]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) - HiveTableScan(table=[[default, item]], table:alias=[iss]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) - HiveTableScan(table=[[default, item]], table:alias=[ics]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) - HiveTableScan(table=[[default, item]], table:alias=[iws]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0]) + HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) HiveFilter(condition=[IS NOT NULL($0)]) @@ -486,60 +486,60 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveJoin(condition=[>($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) - HiveProject($f0=[$2], $f1=[$3], $f2=[$4], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) + HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$0]) - HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[=($3, 3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveUnion(all=[true]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) - HiveTableScan(table=[[default, item]], table:alias=[iss]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) - HiveTableScan(table=[[default, item]], table:alias=[ics]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) - HiveTableScan(table=[[default, item]], table:alias=[iws]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0]) + HiveJoin(condition=[AND(AND(=($1, $4), =($2, $5)), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_quantity=[$18], ws_list_price=[$20]) HiveFilter(condition=[IS NOT NULL($0)]) http://git-wip-us.apache.org/repos/asf/hive/blob/5553c59e/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out index 3d190e3..120c8d2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out @@ -104,20 +104,18 @@ CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], i_item_desc=[$1], s_state=[$2], store_sales_quantitycount=[$3], store_sales_quantityave=[/(CAST($4):DOUBLE, $3)], store_sales_quantitystdev=[POWER(/(-($5, /(*($6, $6), $3)), CASE(=($3, 1), null, -($3, 1))), 0.5)], store_sales_quantitycov=[/(POWER(/(-($5, /(*($6, $6), $3)), CASE(=($3, 1), null, -($3, 1))), 0.5), /(CAST($4):DOUBLE, $3))], as_store_returns_quantitycount=[$7], as_store_returns_quantityave=[/(CAST($8):DOUBLE, $7)], as_store_returns_quantitystdev=[POWER(/(-($9, /(*($10, $10), $7)), CASE(=($7, 1), null, -($7, 1))), 0.5)], store_returns_quantitycov=[/(POWER(/(-($9, /(*($10, $10), $7)), CASE(=($7, 1), null, -($7, 1))), 0.5), /(CAST($8):DOUBLE, $7))], catalog_sales_quantitycount=[$11], catalog_sales_quantityave=[/(CAST($12):DOUBLE, $11)], catalog_sales_quantitystdev=[/(POWER(/(-($13, /(*($14, $14), $11)), CASE(=($11, 1), null, -($11, 1))), 0.5), /(CAST($12):DOUBLE, $11))], catalog_sales_quantitycov=[/(POWER(/(-($13, /(*($14, $14), $11)), CASE(=($ 11, 1), null, -($11, 1))), 0.5), /(CAST($12):DOUBLE, $11))]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)], agg#1=[sum($3)], agg#2=[sum($7)], agg#3=[sum($6)], agg#4=[count($4)], agg#5=[sum($4)], agg#6=[sum($9)], agg#7=[sum($8)], agg#8=[count($5)], agg#9=[sum($5)], agg#10=[sum($11)], agg#11=[sum($10)]) - HiveProject($f0=[$6], $f1=[$7], $f2=[$22], $f3=[$13], $f4=[$19], $f5=[$3], $f30=[CAST($13):DOUBLE], $f7=[*(CAST($13):DOUBLE, CAST($13):DOUBLE)], $f40=[CAST($19):DOUBLE], $f9=[*(CAST($19):DOUBLE, CAST($19):DOUBLE)], $f50=[CAST($3):DOUBLE], $f11=[*(CAST($3):DOUBLE, CAST($3):DOUBLE)]) - HiveJoin(condition=[AND(=($17, $1), =($16, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3')]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$2], ss_sold_date_sk=[$3], ss_item_sk=[$4], ss_customer_sk=[$5], ss_store_sk=[$6], ss_ticket_number=[$7], ss_quantity=[$8], d_date_sk=[$9], sr_returned_date_sk=[$10], sr_item_sk=[$11], sr_customer_sk=[$12], sr_ticket_number=[$13], sr_return_quantity=[$14], d_date_sk0=[$15], s_store_sk=[$16], s_state=[$17]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($13, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$21], $f1=[$22], $f2=[$19], $f3=[$10], $f4=[$16], $f5=[$3], $f30=[CAST($10):DOUBLE], $f7=[*(CAST($10):DOUBLE, CAST($10):DOUBLE)], $f40=[CAST($16):DOUBLE], $f9=[*(CAST($16):DOUBLE, CAST($16):DOUBLE)], $f50=[CAST($3):DOUBLE], $f11=[*(CAST($3):DOUBLE, CAST($3):DOUBLE)]) + HiveJoin(condition=[=($20, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($18, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($14, $1), =($13, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$3], ss_ticket_number=[$4], ss_quantity=[$5], d_date_sk=[$6], sr_returned_date_sk=[$7], sr_item_sk=[$8], sr_customer_sk=[$9], sr_ticket_number=[$10], sr_return_quantity=[$11], d_date_sk0=[$12]) HiveJoin(condition=[AND(AND(=($2, $9), =($1, $8)), =($4, $10))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_quantity=[$10]) @@ -134,6 +132,8 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(d_date_sk=[$0]) HiveFilter(condition=[IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3')]) HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(s_store_sk=[$0], s_state=[$24]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(s_store_sk=[$0], s_state=[$24]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) + HiveTableScan(table=[[default, item]], table:alias=[item]) http://git-wip-us.apache.org/repos/asf/hive/blob/5553c59e/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out index 72c7628..1aeb29a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out @@ -80,32 +80,32 @@ POSTHOOK: Input: default@item POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$3], sort3=[$0], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) - HiveProject($f0=[$3], $f1=[$2], $f2=[$1], $f3=[$0], $f4=[/($4, $5)], $f5=[/($6, $7)], $f6=[/($8, $9)], $f7=[/($10, $11)], $f8=[/($12, $13)], $f9=[/($14, $15)], $f10=[/($16, $17)]) - HiveAggregate(group=[{5, 6, 7, 10}], groups=[[{5, 6, 7, 10}, {6, 7, 10}, {7, 10}, {10}, {}]], agg#0=[sum($15)], agg#1=[count($15)], agg#2=[sum($16)], agg#3=[count($16)], agg#4=[sum($17)], agg#5=[count($17)], agg#6=[sum($18)], agg#7=[count($18)], agg#8=[sum($19)], agg#9=[count($19)], agg#10=[sum($3)], agg#11=[count($3)], agg#12=[sum($22)], agg#13=[count($22)]) - HiveJoin(condition=[=($12, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4], CAST=[CAST($13):DECIMAL(12, 2)]) - HiveFilter(condition=[AND(IN($12, 9, 5, 12, 4, 1, 10), IS NOT NULL($2), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8], ca_country=[$10]) - HiveFilter(condition=[IN($8, _UTF-16LE'ND', _UTF-16LE'WI', _UTF-16LE'AL', _UTF-16LE'NC', _UTF-16LE'OK', _UTF-16LE'MS', _UTF-16LE'TN')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(cd_demo_sk=[$0]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], cs_sold_date_sk=[$2], cs_bill_customer_sk=[$3], cs_bill_cdemo_sk=[$4], cs_item_sk=[$5], CAST=[$6], CAST5=[$7], CAST6=[$8], CAST7=[$9], CAST8=[$10], d_date_sk=[$11], cd_demo_sk=[$12], CAST0=[$13]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], CAST=[CAST($18):DECIMAL(12, 2)], CAST5=[CAST($20):DECIMAL(12, 2)], CAST6=[CAST($27):DECIMAL(12, 2)], CAST7=[CAST($21):DECIMAL(12, 2)], CAST8=[CAST($33):DECIMAL(12, 2)]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(cd_demo_sk=[$0], CAST=[CAST($6):DECIMAL(12, 2)]) - HiveFilter(condition=[AND(=($1, _UTF-16LE'M'), =($3, _UTF-16LE'College'))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveProject($f0=[$0], $f1=[$3], $f2=[$2], $f3=[$1], $f4=[/($4, $5)], $f5=[/($6, $7)], $f6=[/($8, $9)], $f7=[/($10, $11)], $f8=[/($12, $13)], $f9=[/($14, $15)], $f10=[/($16, $17)]) + HiveAggregate(group=[{1, 7, 8, 9}], groups=[[{1, 7, 8, 9}, {1, 8, 9}, {1, 9}, {1}, {}]], agg#0=[sum($14)], agg#1=[count($14)], agg#2=[sum($15)], agg#3=[count($15)], agg#4=[sum($16)], agg#5=[count($16)], agg#6=[sum($17)], agg#7=[count($17)], agg#8=[sum($18)], agg#9=[count($18)], agg#10=[sum($5)], agg#11=[count($5)], agg#12=[sum($21)], agg#13=[count($21)]) + HiveJoin(condition=[=($3, $22)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($13, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4], CAST=[CAST($13):DECIMAL(12, 2)]) + HiveFilter(condition=[AND(IN($12, 9, 5, 12, 4, 1, 10), IS NOT NULL($2), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8], ca_country=[$10]) + HiveFilter(condition=[IN($8, _UTF-16LE'ND', _UTF-16LE'WI', _UTF-16LE'AL', _UTF-16LE'NC', _UTF-16LE'OK', _UTF-16LE'MS', _UTF-16LE'TN')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_bill_cdemo_sk=[$2], cs_item_sk=[$3], CAST=[$4], CAST5=[$5], CAST6=[$6], CAST7=[$7], CAST8=[$8], d_date_sk=[$9], cd_demo_sk=[$10], CAST0=[$11]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], CAST=[CAST($18):DECIMAL(12, 2)], CAST5=[CAST($20):DECIMAL(12, 2)], CAST6=[CAST($27):DECIMAL(12, 2)], CAST7=[CAST($21):DECIMAL(12, 2)], CAST8=[CAST($33):DECIMAL(12, 2)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cd_demo_sk=[$0], CAST=[CAST($6):DECIMAL(12, 2)]) + HiveFilter(condition=[AND(=($1, _UTF-16LE'M'), =($3, _UTF-16LE'College'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveProject(cd_demo_sk=[$0]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) http://git-wip-us.apache.org/repos/asf/hive/blob/5553c59e/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out index 1be3932..41d96ea 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[298][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[297][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with ssales as (select c_last_name @@ -117,32 +117,32 @@ CBO PLAN: HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveJoin(condition=[>($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_last_name=[$1], c_first_name=[$0], s_store_name=[$2], $f3=[$3]) - HiveAggregate(group=[{0, 1, 7}], agg#0=[sum($9)]) - HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], i_current_price=[$3], i_size=[$4], i_units=[$5], i_manager_id=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9]) - HiveAggregate(group=[{4, 5, 8, 17, 18, 19, 20, 22, 23}], agg#0=[sum($15)]) - HiveJoin(condition=[AND(=($14, $1), =($11, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[AND(=($22, $7), =($11, $19))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($10, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{0, 1, 3}], agg#0=[sum($9)]) + HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], s_store_name=[$3], s_state=[$4], i_current_price=[$5], i_size=[$6], i_units=[$7], i_manager_id=[$8], $f9=[$9]) + HiveAggregate(group=[{9, 10, 13, 17, 18, 21, 22, 23, 24}], agg#0=[sum($4)]) + HiveJoin(condition=[=($0, $20)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $7), =($2, $16))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($3, $6), =($0, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], UPPER=[$8], s_store_sk=[$9], s_store_name=[$10], s_state=[$11], s_zip=[$12]) HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) - HiveFilter(condition=[IS NOT NULL($9)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], ss_store_sk=[$2], ss_ticket_number=[$3], ss_sales_price=[$4], i_item_sk=[$5], i_current_price=[$6], i_size=[$7], i_units=[$8], i_manager_id=[$9]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) - HiveFilter(condition=[=($17, _UTF-16LE'orchid')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) - HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) + HiveFilter(condition=[IS NOT NULL($9)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) + HiveFilter(condition=[=($17, _UTF-16LE'orchid')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(_o__c0=[*(0.05, /($0, $1))]) HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)]) HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], s_store_name=[$3], s_state=[$4], i_current_price=[$5], i_size=[$6], i_color=[$7], i_units=[$8], i_manager_id=[$9], $f10=[$10]) http://git-wip-us.apache.org/repos/asf/hive/blob/5553c59e/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query25.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query25.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query25.q.out index 658410c..958033e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query25.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query25.q.out @@ -108,21 +108,19 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) - HiveProject(i_item_id=[$0], i_item_desc=[$1], s_store_id=[$2], s_store_name=[$3], $f4=[$4], $f5=[$5], $f6=[$6]) - HiveAggregate(group=[{6, 7, 22, 23}], agg#0=[sum($13)], agg#1=[sum($19)], agg#2=[sum($3)]) - HiveJoin(condition=[AND(=($17, $1), =($16, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, $8, 4, 10), =($6, 2000))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$2], ss_sold_date_sk=[$3], ss_item_sk=[$4], ss_customer_sk=[$5], ss_store_sk=[$6], ss_ticket_number=[$7], ss_net_profit=[$8], d_date_sk=[$9], sr_returned_date_sk=[$10], sr_item_sk=[$11], sr_customer_sk=[$12], sr_ticket_number=[$13], sr_net_loss=[$14], d_date_sk0=[$15], s_store_sk=[$16], s_store_id=[$17], s_store_name=[$18]) - HiveJoin(condition=[=($16, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_id=[$2], i_item_desc=[$3], s_store_id=[$0], s_store_name=[$1], $f4=[$4], $f5=[$5], $f6=[$6]) + HiveAggregate(group=[{19, 20, 22, 23}], agg#0=[sum($10)], agg#1=[sum($16)], agg#2=[sum($3)]) + HiveJoin(condition=[=($21, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($18, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($14, $1), =($13, $2))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, $8, 4, 10), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$3], ss_ticket_number=[$4], ss_net_profit=[$5], d_date_sk=[$6], sr_returned_date_sk=[$7], sr_item_sk=[$8], sr_customer_sk=[$9], sr_ticket_number=[$10], sr_net_loss=[$11], d_date_sk0=[$12]) HiveJoin(condition=[AND(AND(=($2, $9), =($1, $8)), =($4, $10))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_net_profit=[$22]) @@ -139,6 +137,8 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(BETWEEN(false, $8, 4, 10), =($6, 2000))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) + HiveTableScan(table=[[default, item]], table:alias=[item]) http://git-wip-us.apache.org/repos/asf/hive/blob/5553c59e/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query29.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query29.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query29.q.out index 8134a46..3e7c680 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query29.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query29.q.out @@ -106,21 +106,19 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) - HiveProject(i_item_id=[$0], i_item_desc=[$1], s_store_id=[$2], s_store_name=[$3], $f4=[$4], $f5=[$5], $f6=[$6]) - HiveAggregate(group=[{6, 7, 22, 23}], agg#0=[sum($13)], agg#1=[sum($19)], agg#2=[sum($3)]) - HiveJoin(condition=[AND(=($17, $1), =($16, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$2], ss_sold_date_sk=[$3], ss_item_sk=[$4], ss_customer_sk=[$5], ss_store_sk=[$6], ss_ticket_number=[$7], ss_quantity=[$8], d_date_sk=[$9], sr_returned_date_sk=[$10], sr_item_sk=[$11], sr_customer_sk=[$12], sr_ticket_number=[$13], sr_return_quantity=[$14], d_date_sk0=[$15], s_store_sk=[$16], s_store_id=[$17], s_store_name=[$18]) - HiveJoin(condition=[=($16, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_id=[$2], i_item_desc=[$3], s_store_id=[$0], s_store_name=[$1], $f4=[$4], $f5=[$5], $f6=[$6]) + HiveAggregate(group=[{19, 20, 22, 23}], agg#0=[sum($10)], agg#1=[sum($16)], agg#2=[sum($3)]) + HiveJoin(condition=[=($21, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($18, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($14, $1), =($13, $2))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$3], ss_ticket_number=[$4], ss_quantity=[$5], d_date_sk=[$6], sr_returned_date_sk=[$7], sr_item_sk=[$8], sr_customer_sk=[$9], sr_ticket_number=[$10], sr_return_quantity=[$11], d_date_sk0=[$12]) HiveJoin(condition=[AND(AND(=($2, $9), =($1, $8)), =($4, $10))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_quantity=[$10]) @@ -137,6 +135,8 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(BETWEEN(false, $8, 4, 7), =($6, 1999))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) + HiveTableScan(table=[[default, item]], table:alias=[item])