HIVE-16934: Transform COUNT(x) into COUNT() when x is not nullable (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4ad4ceb6 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4ad4ceb6 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4ad4ceb6 Branch: refs/heads/master Commit: 4ad4ceb665af73ba5a0fbad71670af8f357c8f1b Parents: 287113e Author: Jesus Camacho Rodriguez <[email protected]> Authored: Wed Jun 21 19:34:00 2017 +0100 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Sat Jun 24 10:49:00 2017 +0100 ---------------------------------------------------------------------- .../hive/ql/optimizer/StatsOptimizer.java | 17 +- .../calcite/rules/HiveAggregateReduceRule.java | 126 +++ .../calcite/rules/HiveSemiJoinRule.java | 96 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +- .../results/clientpositive/auto_join26.q.out | 8 +- .../results/clientpositive/auto_join27.q.out | 8 +- .../test/results/clientpositive/combine2.q.out | 6 +- .../test/results/clientpositive/constGby.q.out | 2 +- .../clientpositive/correlationoptimizer10.q.out | 4 +- .../clientpositive/correlationoptimizer11.q.out | 8 +- .../clientpositive/correlationoptimizer13.q.out | 32 +- .../clientpositive/correlationoptimizer15.q.out | 4 +- .../clientpositive/correlationoptimizer7.q.out | 24 +- .../clientpositive/correlationoptimizer8.q.out | 252 ++--- .../clientpositive/correlationoptimizer9.q.out | 156 ++- .../clientpositive/count_dist_rewrite.q.out | 12 +- .../results/clientpositive/create_view.q.out | 4 +- .../clientpositive/dynamic_rdd_cache.q.out | 10 +- .../results/clientpositive/except_all.q.out | 48 +- .../clientpositive/explain_logical.q.out | 32 +- .../test/results/clientpositive/fold_case.q.out | 14 +- .../results/clientpositive/groupby4_map.q.out | 4 +- .../clientpositive/groupby4_map_skew.q.out | 4 +- .../results/clientpositive/groupby_cube1.q.out | 20 +- .../clientpositive/groupby_position.q.out | 24 +- .../clientpositive/groupby_rollup1.q.out | 12 +- .../clientpositive/groupby_sort_11.q.out | 18 +- .../clientpositive/groupby_sort_1_23.q.out | 254 ++--- .../results/clientpositive/groupby_sort_2.q.out | 6 +- .../results/clientpositive/groupby_sort_3.q.out | 12 +- .../results/clientpositive/groupby_sort_4.q.out | 12 +- .../results/clientpositive/groupby_sort_5.q.out | 18 +- .../results/clientpositive/groupby_sort_6.q.out | 20 +- .../results/clientpositive/groupby_sort_7.q.out | 6 +- .../results/clientpositive/groupby_sort_9.q.out | 6 +- .../clientpositive/groupby_sort_skew_1_23.q.out | 254 ++--- .../clientpositive/groupby_sort_test_1.q.out | 6 +- .../infer_bucket_sort_grouping_operators.q.out | 18 +- .../test/results/clientpositive/input30.q.out | 6 +- .../test/results/clientpositive/input31.q.out | 4 +- .../test/results/clientpositive/input32.q.out | 4 +- .../test/results/clientpositive/input39.q.out | 2 +- .../test/results/clientpositive/input41.q.out | 2 +- ql/src/test/results/clientpositive/join29.q.out | 48 +- ql/src/test/results/clientpositive/join30.q.out | 8 +- ql/src/test/results/clientpositive/join31.q.out | 6 +- ql/src/test/results/clientpositive/join35.q.out | 64 +- ql/src/test/results/clientpositive/join38.q.out | 28 +- ql/src/test/results/clientpositive/join40.q.out | 6 +- .../list_bucket_query_oneskew_2.q.out | 10 +- .../clientpositive/llap/bucket_groupby.q.out | 72 +- .../llap/correlationoptimizer1.q.out | 186 ++- .../llap/correlationoptimizer2.q.out | 4 +- .../llap/correlationoptimizer3.q.out | 156 ++- .../llap/correlationoptimizer4.q.out | 220 ++-- .../llap/correlationoptimizer6.q.out | 92 +- .../results/clientpositive/llap/count.q.out | 124 +- .../llap/count_dist_rewrite.q.out | 12 +- .../llap/dynamic_partition_pruning.q.out | 261 +++-- .../clientpositive/llap/except_distinct.q.out | 44 +- .../clientpositive/llap/explainuser_1.q.out | 220 ++-- .../clientpositive/llap/explainuser_2.q.out | 148 +-- .../clientpositive/llap/intersect_all.q.out | 40 +- .../llap/intersect_distinct.q.out | 40 +- .../clientpositive/llap/intersect_merge.q.out | 194 ++-- .../clientpositive/llap/limit_pushdown.q.out | 6 +- .../results/clientpositive/llap/lineage2.q.out | 2 +- .../results/clientpositive/llap/merge1.q.out | 6 +- .../results/clientpositive/llap/merge2.q.out | 6 +- .../llap/metadata_only_queries.q.out | 112 +- .../clientpositive/llap/multiMapJoin2.q.out | 142 ++- .../llap/offset_limit_ppd_optimizer.q.out | 14 +- .../clientpositive/llap/skewjoinopt15.q.out | 4 +- .../clientpositive/llap/subquery_in.q.out | 46 +- .../clientpositive/llap/subquery_multi.q.out | 255 ++--- .../clientpositive/llap/subquery_null_agg.q.out | 51 +- .../clientpositive/llap/subquery_scalar.q.out | 54 +- .../clientpositive/llap/subquery_views.q.out | 2 +- .../llap/table_access_keys_stats.q.out | 36 +- .../llap/tez_union_multiinsert.q.out | 40 +- .../results/clientpositive/llap/union2.q.out | 4 +- .../results/clientpositive/llap/union4.q.out | 6 +- .../results/clientpositive/llap/union5.q.out | 4 +- .../results/clientpositive/llap/union6.q.out | 4 +- .../results/clientpositive/llap/union7.q.out | 20 +- .../results/clientpositive/llap/union9.q.out | 6 +- .../clientpositive/llap/unionDistinct_1.q.out | 156 ++- .../clientpositive/llap/union_remove_26.q.out | 54 +- .../clientpositive/llap/vector_between_in.q.out | 16 +- .../clientpositive/llap/vector_count.q.out | 66 +- .../llap/vector_groupby_cube1.q.out | 20 +- .../llap/vector_groupby_rollup1.q.out | 12 +- .../llap/vector_mr_diff_schema_alias.q.out | 26 +- .../vectorized_dynamic_partition_pruning.q.out | 284 +++-- ql/src/test/results/clientpositive/merge1.q.out | 6 +- ql/src/test/results/clientpositive/merge2.q.out | 6 +- .../clientpositive/metadata_only_queries.q.out | 112 +- .../results/clientpositive/notable_alias1.q.out | 24 +- .../results/clientpositive/notable_alias2.q.out | 24 +- .../test/results/clientpositive/nullgroup.q.out | 8 +- .../results/clientpositive/nullgroup2.q.out | 76 +- .../results/clientpositive/nullgroup3.q.out | 8 +- .../results/clientpositive/nullgroup4.q.out | 26 +- .../nullgroup4_multi_distinct.q.out | 12 +- .../clientpositive/partition_boolexpr.q.out | 4 +- .../results/clientpositive/perf/query14.q.out | 1056 +++++++++--------- .../results/clientpositive/perf/query38.q.out | 254 +++-- .../results/clientpositive/perf/query72.q.out | 34 +- .../results/clientpositive/perf/query8.q.out | 4 +- .../results/clientpositive/perf/query83.q.out | 271 ++--- .../results/clientpositive/perf/query87.q.out | 232 ++-- .../test/results/clientpositive/plan_json.q.out | 2 +- .../results/clientpositive/ppd_gby_join.q.out | 4 +- .../reduce_deduplicate_extended2.q.out | 89 +- .../results/clientpositive/setop_subq.q.out | 36 +- .../clientpositive/skewjoin_mapjoin1.q.out | 16 +- .../clientpositive/skewjoin_mapjoin10.q.out | 16 +- .../clientpositive/skewjoin_mapjoin5.q.out | 24 +- .../results/clientpositive/skewjoinopt1.q.out | 8 +- .../results/clientpositive/skewjoinopt2.q.out | 8 +- .../results/clientpositive/skewjoinopt9.q.out | 24 +- .../clientpositive/spark/auto_join26.q.out | 4 +- .../clientpositive/spark/auto_join27.q.out | 2 +- .../results/clientpositive/spark/count.q.out | 124 +- .../spark/dynamic_rdd_cache.q.out | 10 +- .../clientpositive/spark/groupby4_map.q.out | 4 +- .../spark/groupby4_map_skew.q.out | 4 +- .../clientpositive/spark/groupby_cube1.q.out | 20 +- .../clientpositive/spark/groupby_position.q.out | 24 +- .../clientpositive/spark/groupby_rollup1.q.out | 12 +- .../spark/groupby_sort_1_23.q.out | 258 ++--- .../spark/groupby_sort_skew_1_23.q.out | 258 ++--- .../results/clientpositive/spark/join29.q.out | 48 +- .../results/clientpositive/spark/join30.q.out | 4 +- .../results/clientpositive/spark/join31.q.out | 2 +- .../results/clientpositive/spark/join35.q.out | 64 +- .../results/clientpositive/spark/join38.q.out | 24 +- .../clientpositive/spark/limit_pushdown.q.out | 12 +- .../results/clientpositive/spark/merge1.q.out | 6 +- .../results/clientpositive/spark/merge2.q.out | 6 +- .../spark/metadata_only_queries.q.out | 112 +- .../clientpositive/spark/nullgroup.q.out | 8 +- .../clientpositive/spark/nullgroup2.q.out | 76 +- .../clientpositive/spark/nullgroup4.q.out | 26 +- .../spark/nullgroup4_multi_distinct.q.out | 12 +- .../clientpositive/spark/ppd_gby_join.q.out | 4 +- .../clientpositive/spark/skewjoinopt1.q.out | 8 +- .../clientpositive/spark/skewjoinopt15.q.out | 8 +- .../clientpositive/spark/skewjoinopt2.q.out | 8 +- .../clientpositive/spark/skewjoinopt9.q.out | 24 +- .../results/clientpositive/spark/stats1.q.out | 4 +- .../clientpositive/spark/subquery_in.q.out | 43 +- .../spark/table_access_keys_stats.q.out | 36 +- .../results/clientpositive/spark/union10.q.out | 8 +- .../results/clientpositive/spark/union11.q.out | 6 +- .../results/clientpositive/spark/union12.q.out | 8 +- .../results/clientpositive/spark/union14.q.out | 4 +- .../results/clientpositive/spark/union15.q.out | 6 +- .../results/clientpositive/spark/union16.q.out | 2 +- .../results/clientpositive/spark/union17.q.out | 8 +- .../results/clientpositive/spark/union18.q.out | 8 +- .../results/clientpositive/spark/union19.q.out | 8 +- .../results/clientpositive/spark/union2.q.out | 4 +- .../results/clientpositive/spark/union20.q.out | 4 +- .../results/clientpositive/spark/union24.q.out | 56 +- .../results/clientpositive/spark/union25.q.out | 4 +- .../results/clientpositive/spark/union31.q.out | 18 +- .../results/clientpositive/spark/union4.q.out | 6 +- .../results/clientpositive/spark/union5.q.out | 4 +- .../results/clientpositive/spark/union6.q.out | 4 +- .../results/clientpositive/spark/union7.q.out | 4 +- .../results/clientpositive/spark/union9.q.out | 6 +- .../clientpositive/spark/union_remove_1.q.out | 6 +- .../clientpositive/spark/union_remove_10.q.out | 6 +- .../clientpositive/spark/union_remove_13.q.out | 6 +- .../clientpositive/spark/union_remove_15.q.out | 6 +- .../clientpositive/spark/union_remove_16.q.out | 6 +- .../clientpositive/spark/union_remove_18.q.out | 6 +- .../clientpositive/spark/union_remove_19.q.out | 54 +- .../clientpositive/spark/union_remove_2.q.out | 6 +- .../clientpositive/spark/union_remove_20.q.out | 6 +- .../clientpositive/spark/union_remove_22.q.out | 6 +- .../clientpositive/spark/union_remove_23.q.out | 10 +- .../clientpositive/spark/union_remove_24.q.out | 6 +- .../clientpositive/spark/union_remove_25.q.out | 6 +- .../clientpositive/spark/union_remove_4.q.out | 6 +- .../clientpositive/spark/union_remove_5.q.out | 6 +- .../clientpositive/spark/union_remove_6.q.out | 6 +- .../spark/union_remove_6_subq.q.out | 14 +- .../clientpositive/spark/union_remove_7.q.out | 6 +- .../clientpositive/spark/union_remove_8.q.out | 6 +- .../clientpositive/spark/union_remove_9.q.out | 6 +- .../clientpositive/spark/union_view.q.out | 24 +- .../spark/vector_between_in.q.out | 16 +- ql/src/test/results/clientpositive/stats1.q.out | 4 +- ql/src/test/results/clientpositive/subq2.q.out | 24 +- .../clientpositive/subquery_in_having.q.out | 600 +++++----- .../symlink_text_input_format.q.out | 4 +- .../clientpositive/tez/explainanalyze_2.q.out | 116 +- .../results/clientpositive/udtf_explode.q.out | 4 +- .../test/results/clientpositive/union10.q.out | 8 +- .../test/results/clientpositive/union11.q.out | 6 +- .../test/results/clientpositive/union12.q.out | 8 +- .../test/results/clientpositive/union14.q.out | 4 +- .../test/results/clientpositive/union15.q.out | 6 +- .../test/results/clientpositive/union16.q.out | 50 +- .../test/results/clientpositive/union17.q.out | 8 +- .../test/results/clientpositive/union18.q.out | 8 +- .../test/results/clientpositive/union19.q.out | 8 +- ql/src/test/results/clientpositive/union2.q.out | 4 +- .../test/results/clientpositive/union20.q.out | 4 +- .../test/results/clientpositive/union24.q.out | 38 +- .../test/results/clientpositive/union25.q.out | 4 +- .../test/results/clientpositive/union31.q.out | 18 +- ql/src/test/results/clientpositive/union4.q.out | 6 +- ql/src/test/results/clientpositive/union5.q.out | 4 +- ql/src/test/results/clientpositive/union6.q.out | 4 +- ql/src/test/results/clientpositive/union7.q.out | 4 +- ql/src/test/results/clientpositive/union9.q.out | 6 +- .../clientpositive/union_pos_alias.q.out | 10 +- .../results/clientpositive/union_remove_1.q.out | 12 +- .../clientpositive/union_remove_10.q.out | 6 +- .../clientpositive/union_remove_13.q.out | 6 +- .../clientpositive/union_remove_15.q.out | 12 +- .../clientpositive/union_remove_16.q.out | 12 +- .../clientpositive/union_remove_18.q.out | 12 +- .../clientpositive/union_remove_19.q.out | 108 +- .../results/clientpositive/union_remove_2.q.out | 6 +- .../clientpositive/union_remove_20.q.out | 12 +- .../clientpositive/union_remove_22.q.out | 12 +- .../clientpositive/union_remove_23.q.out | 10 +- .../clientpositive/union_remove_24.q.out | 12 +- .../clientpositive/union_remove_25.q.out | 12 +- .../results/clientpositive/union_remove_4.q.out | 12 +- .../results/clientpositive/union_remove_5.q.out | 6 +- .../results/clientpositive/union_remove_6.q.out | 12 +- .../clientpositive/union_remove_6_subq.q.out | 28 +- .../results/clientpositive/union_remove_7.q.out | 12 +- .../results/clientpositive/union_remove_8.q.out | 6 +- .../results/clientpositive/union_remove_9.q.out | 6 +- .../results/clientpositive/union_view.q.out | 24 +- .../results/clientpositive/vector_count.q.out | 66 +- .../vector_mr_diff_schema_alias.q.out | 26 +- .../clientpositive/vectorized_mapjoin2.q.out | 8 +- .../test/results/clientpositive/view_cbo.q.out | 12 +- 245 files changed, 5058 insertions(+), 5396 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index 32d1de1..7b2a1a9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -728,19 +728,6 @@ public class StatsOptimizer extends Transform { cselOpTocgbyOp.put(index, nameToIndex.get(exprColumnNodeDesc.getColumn())); } } - // cselOpTocgbyOp may be 0 to 1, where the 0th position of cgbyOp is '1' and 1st position of cgbyOp is count('1') - // Thus, we need to adjust it to the correct position. - List<Entry<Integer, Integer>> list = new ArrayList<>(cselOpTocgbyOp.entrySet()); - Collections.sort(list, new Comparator<Entry<Integer, Integer>>() { - public int compare(Entry<Integer, Integer> o1, Entry<Integer, Integer> o2) { - return (o1.getValue()).compareTo(o2.getValue()); - } - }); - cselOpTocgbyOp.clear(); - // adjust cselOpTocgbyOp - for (int index = 0; index < list.size(); index++) { - cselOpTocgbyOp.put(list.get(index).getKey(), index); - } List<Object> oneRowWithConstant = new ArrayList<>(); for (int pos = 0; pos < cselOp.getSchema().getSignature().size(); pos++) { if (posToConstant.containsKey(pos)) { @@ -748,7 +735,9 @@ public class StatsOptimizer extends Transform { oneRowWithConstant.add(posToConstant.get(pos)); } else { // This position is an aggregation. - oneRowWithConstant.add(oneRow.get(cselOpTocgbyOp.get(pos))); + // As we store in oneRow only the aggregate results, we need to adjust to the correct position + // if there are keys in the GBy operator. + oneRowWithConstant.add(oneRow.get(cselOpTocgbyOp.get(pos) - cgbyOp.getConf().getKeys().size())); } ColumnInfo colInfo = cselOp.getSchema().getSignature().get(pos); colNames.add(colInfo.getInternalName()); http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceRule.java new file mode 100644 index 0000000..5c5c0a0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceRule.java @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.tools.RelBuilder; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; + +import com.google.common.collect.Lists; + +/** + * Planner rule that reduces aggregate functions in + * {@link org.apache.calcite.rel.core.Aggregate}s to simpler forms. + * + * <p>Rewrites: + * <ul> + * + * <li>COUNT(x) → COUNT(*) if x is not nullable + * </ul> + */ +public class HiveAggregateReduceRule extends RelOptRule { + + /** The singleton. */ + public static final HiveAggregateReduceRule INSTANCE = + new HiveAggregateReduceRule(); + + /** Private constructor. */ + private HiveAggregateReduceRule() { + super(operand(HiveAggregate.class, any()), + HiveRelFactories.HIVE_BUILDER, null); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final RelBuilder relBuilder = call.builder(); + final Aggregate aggRel = (Aggregate) call.rel(0); + final RexBuilder rexBuilder = aggRel.getCluster().getRexBuilder(); + + // We try to rewrite COUNT(x) into COUNT(*) if x is not nullable. + // We remove duplicate aggregate calls as well. + boolean rewrite = false; + boolean identity = true; + final Map<AggregateCall, Integer> mapping = new HashMap<>(); + final List<Integer> indexes = new ArrayList<>(); + final List<AggregateCall> aggCalls = aggRel.getAggCallList(); + final List<AggregateCall> newAggCalls = new ArrayList<>(aggCalls.size()); + int nextIdx = aggRel.getGroupCount() + aggRel.getIndicatorCount(); + for (int i = 0; i < aggCalls.size(); i++) { + AggregateCall aggCall = aggCalls.get(i); + if (aggCall.getAggregation().getKind() == SqlKind.COUNT && !aggCall.isDistinct()) { + final List<Integer> args = aggCall.getArgList(); + final List<Integer> nullableArgs = new ArrayList<>(args.size()); + for (int arg : args) { + if (aggRel.getInput().getRowType().getFieldList().get(arg).getType().isNullable()) { + nullableArgs.add(arg); + } + } + if (nullableArgs.size() != args.size()) { + aggCall = aggCall.copy(nullableArgs, aggCall.filterArg); + rewrite = true; + } + } + Integer idx = mapping.get(aggCall); + if (idx == null) { + newAggCalls.add(aggCall); + idx = nextIdx++; + mapping.put(aggCall, idx); + } else { + rewrite = true; + identity = false; + } + indexes.add(idx); + } + + if (rewrite) { + // We trigger the transform + final Aggregate newAggregate = aggRel.copy(aggRel.getTraitSet(), aggRel.getInput(), + aggRel.indicator, aggRel.getGroupSet(), aggRel.getGroupSets(), + newAggCalls); + if (identity) { + call.transformTo(newAggregate); + } else { + final int offset = aggRel.getGroupCount() + aggRel.getIndicatorCount(); + final List<RexNode> projList = Lists.newArrayList(); + for (int i = 0; i < offset; ++i) { + projList.add( + rexBuilder.makeInputRef( + aggRel.getRowType().getFieldList().get(i).getType(), i)); + } + for (int i = offset; i < aggRel.getRowType().getFieldCount(); ++i) { + projList.add( + rexBuilder.makeInputRef( + aggRel.getRowType().getFieldList().get(i).getType(), indexes.get(i-offset))); + } + call.transformTo(relBuilder.push(newAggregate).project(projList).build()); + } + } + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java index e400896..7799090 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java @@ -19,10 +19,12 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptRuleOperand; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.hep.HepRelVertex; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinInfo; import org.apache.calcite.rel.core.JoinRelType; @@ -36,6 +38,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import java.util.ArrayList; @@ -49,34 +52,29 @@ import java.util.List; * TODO Remove this rule and use Calcite's SemiJoinRule. Not possible currently * since Calcite doesnt use RelBuilder for this rule and we want to generate HiveSemiJoin rel here. */ -public class HiveSemiJoinRule extends RelOptRule { +public abstract class HiveSemiJoinRule extends RelOptRule { - public static final HiveSemiJoinRule INSTANCE = new HiveSemiJoinRule(HiveRelFactories.HIVE_BUILDER); protected static final Logger LOG = LoggerFactory.getLogger(HiveSemiJoinRule.class); - private HiveSemiJoinRule(RelBuilderFactory relBuilder) { - super( - operand(Project.class, - some( - operand(Join.class, - some(operand(RelNode.class, any()), - operand(Aggregate.class, any()))))), relBuilder, null); + public static final HiveProjectToSemiJoinRule INSTANCE_PROJECT = + new HiveProjectToSemiJoinRule(HiveRelFactories.HIVE_BUILDER); + + public static final HiveAggregateToSemiJoinRule INSTANCE_AGGREGATE = + new HiveAggregateToSemiJoinRule(HiveRelFactories.HIVE_BUILDER); + + private HiveSemiJoinRule(RelOptRuleOperand operand, RelBuilderFactory relBuilder) { + super(operand, relBuilder, null); } - @Override public void onMatch(RelOptRuleCall call) { + protected void perform(RelOptRuleCall call, ImmutableBitSet topRefs, + RelNode topOperator, Join join, RelNode left, Aggregate aggregate) { LOG.debug("Matched HiveSemiJoinRule"); - final Project project = call.rel(0); - final Join join = call.rel(1); - final RelNode left = call.rel(2); - final Aggregate aggregate = call.rel(3); final RelOptCluster cluster = join.getCluster(); final RexBuilder rexBuilder = cluster.getRexBuilder(); - final ImmutableBitSet bits = - RelOptUtil.InputFinder.bits(project.getProjects(), null); final ImmutableBitSet rightBits = ImmutableBitSet.range(left.getRowType().getFieldCount(), join.getRowType().getFieldCount()); - if (bits.intersects(rightBits)) { + if (topRefs.intersects(rightBits)) { return; } final JoinInfo joinInfo = join.analyzeCondition(); @@ -88,7 +86,7 @@ public class HiveSemiJoinRule extends RelOptRule { } if(join.getJoinType() == JoinRelType.LEFT) { // since for LEFT join we are only interested in rows from LEFT we can get rid of right side - call.transformTo(call.builder().push(left).project(project.getProjects(), project.getRowType().getFieldNames()).build()); + call.transformTo(topOperator.copy(topOperator.getTraitSet(), ImmutableList.of(left))); return; } if (join.getJoinType() != JoinRelType.INNER) { @@ -126,8 +124,68 @@ public class HiveSemiJoinRule extends RelOptRule { else { semi = call.builder().push(left).push(aggregate.getInput()).semiJoin(newCondition).build(); } - call.transformTo(call.builder().push(semi).project(project.getProjects(), project.getRowType().getFieldNames()).build()); + call.transformTo(topOperator.copy(topOperator.getTraitSet(), ImmutableList.of(semi))); + } + + /** SemiJoinRule that matches a Project on top of a Join with an Aggregate + * as its right child. */ + public static class HiveProjectToSemiJoinRule extends HiveSemiJoinRule { + + /** Creates a HiveProjectToSemiJoinRule. */ + public HiveProjectToSemiJoinRule(RelBuilderFactory relBuilder) { + super( + operand(Project.class, + some(operand(Join.class, + some( + operand(RelNode.class, any()), + operand(Aggregate.class, any()))))), + relBuilder); + } + + @Override public void onMatch(RelOptRuleCall call) { + final Project project = call.rel(0); + final Join join = call.rel(1); + final RelNode left = call.rel(2); + final Aggregate aggregate = call.rel(3); + final ImmutableBitSet topRefs = + RelOptUtil.InputFinder.bits(project.getChildExps(), null); + perform(call, topRefs, project, join, left, aggregate); + } } + + /** SemiJoinRule that matches a Aggregate on top of a Join with an Aggregate + * as its right child. */ + public static class HiveAggregateToSemiJoinRule extends HiveSemiJoinRule { + + /** Creates a HiveAggregateToSemiJoinRule. */ + public HiveAggregateToSemiJoinRule(RelBuilderFactory relBuilder) { + super( + operand(Aggregate.class, + some(operand(Join.class, + some( + operand(RelNode.class, any()), + operand(Aggregate.class, any()))))), + relBuilder); + } + + @Override public void onMatch(RelOptRuleCall call) { + final Aggregate topAggregate = call.rel(0); + final Join join = call.rel(1); + final RelNode left = call.rel(2); + final Aggregate aggregate = call.rel(3); + // Gather columns used by aggregate operator + final ImmutableBitSet.Builder topRefs = ImmutableBitSet.builder(); + topRefs.addAll(topAggregate.getGroupSet()); + for (AggregateCall aggCall : topAggregate.getAggCallList()) { + topRefs.addAll(aggCall.getArgList()); + if (aggCall.filterArg != -1) { + topRefs.set(aggCall.filterArg); + } + } + perform(call, topRefs.build(), topAggregate, join, left, aggregate); + } + } + } // End SemiJoinRule.java http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 931e074..88054e7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -171,6 +171,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregatePullUpConstantsRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateReduceRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveDruidProjectFilterTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExceptRewriteRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule; @@ -1523,7 +1524,8 @@ public class CalcitePlanner extends SemanticAnalyzer { // run this rule at later stages, since many calcite rules cant deal with semijoin if (conf.getBoolVar(ConfVars.SEMIJOIN_CONVERSION)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, HiveSemiJoinRule.INSTANCE); + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, + HiveSemiJoinRule.INSTANCE_PROJECT, HiveSemiJoinRule.INSTANCE_AGGREGATE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Semijoin conversion"); } @@ -1682,6 +1684,7 @@ public class CalcitePlanner extends SemanticAnalyzer { rules.add(HiveReduceExpressionsRule.PROJECT_INSTANCE); rules.add(HiveReduceExpressionsRule.FILTER_INSTANCE); rules.add(HiveReduceExpressionsRule.JOIN_INSTANCE); + rules.add(HiveAggregateReduceRule.INSTANCE); if (conf.getBoolVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { rules.add(new HivePointLookupOptimizerRule.FilterCondition(minNumORClauses)); rules.add(new HivePointLookupOptimizerRule.JoinCondition(minNumORClauses)); http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/auto_join26.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join26.q.out b/ql/src/test/results/clientpositive/auto_join26.q.out index b05145d..e6d966f 100644 --- a/ql/src/test/results/clientpositive/auto_join26.q.out +++ b/ql/src/test/results/clientpositive/auto_join26.q.out @@ -24,11 +24,11 @@ STAGE PLANS: Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:x + $hdt$_0:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:x + $hdt$_0:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -66,7 +66,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -124,7 +124,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, ] +POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/auto_join27.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join27.q.out b/ql/src/test/results/clientpositive/auto_join27.q.out index 3d4d355..ba67d1a 100644 --- a/ql/src/test/results/clientpositive/auto_join27.q.out +++ b/ql/src/test/results/clientpositive/auto_join27.q.out @@ -69,11 +69,11 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:src + $hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:src + $hdt$_1:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -112,7 +112,7 @@ STAGE PLANS: 1 _col0 (type: string) Statistics: Num rows: 273 Data size: 2908 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -133,7 +133,7 @@ STAGE PLANS: 1 _col0 (type: string) Statistics: Num rows: 273 Data size: 2908 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/combine2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/combine2.q.out b/ql/src/test/results/clientpositive/combine2.q.out index 6188345..d4194c8 100644 --- a/ql/src/test/results/clientpositive/combine2.q.out +++ b/ql/src/test/results/clientpositive/combine2.q.out @@ -164,11 +164,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) - outputColumnNames: _col0 + outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string) + aggregations: count() + keys: ds (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/constGby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/constGby.q.out b/ql/src/test/results/clientpositive/constGby.q.out index fd8ecc2..7115be3 100644 --- a/ql/src/test/results/clientpositive/constGby.q.out +++ b/ql/src/test/results/clientpositive/constGby.q.out @@ -40,7 +40,7 @@ STAGE PLANS: Select Operator Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator - aggregations: count(1) + aggregations: count() keys: 1 (type: int) mode: hash outputColumnNames: _col0, _col1 http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/correlationoptimizer10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out index 6745eb4..8a8920e 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out @@ -62,7 +62,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -271,7 +271,7 @@ STAGE PLANS: Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/correlationoptimizer11.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer11.q.out b/ql/src/test/results/clientpositive/correlationoptimizer11.q.out index 00006a6..cf22507 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer11.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer11.q.out @@ -93,7 +93,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 110 Data size: 1177 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -225,7 +225,7 @@ STAGE PLANS: Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 @@ -330,7 +330,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -473,7 +473,7 @@ STAGE PLANS: Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/correlationoptimizer13.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer13.q.out b/ql/src/test/results/clientpositive/correlationoptimizer13.q.out index 21219ca..6a8fe70 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer13.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer13.q.out @@ -51,22 +51,18 @@ STAGE PLANS: Filter Operator predicate: ((c1 < 120) and c3 is not null) (type: boolean) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c3 (type: string), c1 (type: int) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count() + keys: c1 (type: int), c3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col1 (type: int), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) TableScan alias: x1 Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE @@ -74,12 +70,12 @@ STAGE PLANS: predicate: ((c2 > 100) and (c1 < 120) and c3 is not null) (type: boolean) Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c3 (type: string), c1 (type: int) - outputColumnNames: _col0, _col1 + expressions: c1 (type: int), c3 (type: string) + outputColumnNames: c1, c3 Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col1 (type: int), _col0 (type: string) + aggregations: count() + keys: c1 (type: int), c3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/correlationoptimizer15.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer15.q.out b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out index a142867..2d813b2 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer15.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out @@ -63,7 +63,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -303,7 +303,7 @@ STAGE PLANS: Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/correlationoptimizer7.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer7.q.out b/ql/src/test/results/clientpositive/correlationoptimizer7.q.out index efcb46b..82fecab 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer7.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer7.q.out @@ -25,11 +25,11 @@ STAGE PLANS: Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_1:y + $hdt$_0:$hdt$_1:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_1:y + $hdt$_0:$hdt$_1:y TableScan alias: y Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -67,7 +67,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -205,11 +205,11 @@ STAGE PLANS: Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_1:y + $hdt$_0:$hdt$_1:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_1:y + $hdt$_0:$hdt$_1:y TableScan alias: y Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -263,7 +263,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -388,11 +388,11 @@ STAGE PLANS: Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_1:y + $hdt$_0:$hdt$_1:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_1:y + $hdt$_0:$hdt$_1:y TableScan alias: y Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -430,7 +430,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -568,11 +568,11 @@ STAGE PLANS: Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_1:y + $hdt$_0:$hdt$_1:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_1:y + $hdt$_0:$hdt$_1:y TableScan alias: y Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -626,7 +626,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) + aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/correlationoptimizer8.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer8.q.out b/ql/src/test/results/clientpositive/correlationoptimizer8.q.out index 1d930f8..f3cb988 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer8.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer8.q.out @@ -32,22 +32,18 @@ STAGE PLANS: Filter Operator predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -129,22 +125,18 @@ STAGE PLANS: Filter Operator predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -232,44 +224,36 @@ STAGE PLANS: Filter Operator predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -444,22 +428,18 @@ STAGE PLANS: Filter Operator predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -536,11 +516,11 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: value Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string) + aggregations: count() + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE @@ -657,22 +637,18 @@ STAGE PLANS: Filter Operator predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: x1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -681,11 +657,11 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) - outputColumnNames: _col0 + outputColumnNames: value Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string) + aggregations: count() + keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE @@ -874,22 +850,18 @@ STAGE PLANS: Filter Operator predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -971,22 +943,18 @@ STAGE PLANS: Filter Operator predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count() + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1046,22 +1014,18 @@ STAGE PLANS: Filter Operator predicate: (UDFToDouble(key) < 20.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1140,22 +1104,18 @@ STAGE PLANS: Filter Operator predicate: (UDFToDouble(key) > 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/correlationoptimizer9.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer9.q.out b/ql/src/test/results/clientpositive/correlationoptimizer9.q.out index e3f11ef..be54d33 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer9.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer9.q.out @@ -52,22 +52,18 @@ STAGE PLANS: Filter Operator predicate: ((c1 < 120) and (c1 > 100)) (type: boolean) Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -129,22 +125,18 @@ STAGE PLANS: Filter Operator predicate: ((c2 > 100) and (c2 < 120)) (type: boolean) Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c2 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: c2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -222,44 +214,36 @@ STAGE PLANS: Filter Operator predicate: ((c1 < 120) and (c1 > 100)) (type: boolean) Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: x1 Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((c2 > 100) and (c2 < 120)) (type: boolean) Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c2 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: c2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator Statistics: Num rows: 228 Data size: 5092 Basic stats: COMPLETE Column stats: NONE @@ -383,22 +367,18 @@ STAGE PLANS: Filter Operator predicate: ((c1 < 120) and c3 is not null) (type: boolean) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int), c3 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count() + keys: c1 (type: int), c3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -462,11 +442,11 @@ STAGE PLANS: Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c3 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: c1, c3 Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: int), _col1 (type: string) + aggregations: count() + keys: c1 (type: int), c3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE @@ -553,22 +533,18 @@ STAGE PLANS: Filter Operator predicate: ((c1 < 120) and c3 is not null) (type: boolean) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int), c3 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count() + keys: c1 (type: int), c3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) TableScan alias: x1 Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE @@ -577,11 +553,11 @@ STAGE PLANS: Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c3 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: c1, c3 Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: int), _col1 (type: string) + aggregations: count() + keys: c1 (type: int), c3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 114 Data size: 2546 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/count_dist_rewrite.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/count_dist_rewrite.q.out b/ql/src/test/results/clientpositive/count_dist_rewrite.q.out index ceda918..d6ff5b7 100644 --- a/ql/src/test/results/clientpositive/count_dist_rewrite.q.out +++ b/ql/src/test/results/clientpositive/count_dist_rewrite.q.out @@ -365,11 +365,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: _col1 + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col1 (type: string) + aggregations: count() + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -565,11 +565,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) - outputColumnNames: _col1 + outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1), stddev(_col1) - keys: _col1 (type: string) + aggregations: count(), stddev(key) + keys: key (type: string) mode: hash outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/create_view.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/create_view.q.out b/ql/src/test/results/clientpositive/create_view.q.out index d3b858a..823b716 100644 --- a/ql/src/test/results/clientpositive/create_view.q.out +++ b/ql/src/test/results/clientpositive/create_view.q.out @@ -1279,8 +1279,8 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@view14 POSTHOOK: Lineage: view14.k1 EXPRESSION [(src)s2.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: view14.k2 EXPRESSION [(src)s4.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: view14.v1 EXPRESSION [(src)s2.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: view14.v2 EXPRESSION [(src)s4.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: view14.v1 EXPRESSION [(src)s1.null, (src)s2.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: view14.v2 EXPRESSION [(src)s3.null, (src)s4.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE EXTENDED view14 PREHOOK: type: DESCTABLE PREHOOK: Input: default@view14 http://git-wip-us.apache.org/repos/asf/hive/blob/4ad4ceb6/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out index 1a0e46c..873a41d 100644 --- a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out +++ b/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out @@ -418,7 +418,7 @@ STAGE PLANS: Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(1) + aggregations: count() mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -553,7 +553,7 @@ STAGE PLANS: Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(1) + aggregations: count() mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -587,7 +587,7 @@ STAGE PLANS: Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(1) + aggregations: count() mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -662,7 +662,7 @@ STAGE PLANS: Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(1) + aggregations: count() mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -770,7 +770,7 @@ STAGE PLANS: Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(1) + aggregations: count() mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
