HIVE-13638: CBO rule to pull up constants through Sort/Limit (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b04dc95f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b04dc95f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b04dc95f Branch: refs/heads/java8 Commit: b04dc95f4fa7dda9d4806c45dbe52aed4b9f1a18 Parents: 2d33d09 Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Sat Apr 30 11:49:47 2016 +0100 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Wed May 4 18:57:30 2016 +0100 ---------------------------------------------------------------------- .../rules/HiveReduceExpressionsRule.java | 125 ++++ .../rules/HiveSortLimitPullUpConstantsRule.java | 157 +++++ .../hadoop/hive/ql/parse/CalcitePlanner.java | 3 + .../test/queries/clientpositive/cbo_input26.q | 54 ++ .../results/clientpositive/cbo_input26.q.out | 596 +++++++++++++++++++ .../clientpositive/load_dyn_part14.q.out | 6 +- .../clientpositive/spark/load_dyn_part14.q.out | 6 +- .../clientpositive/spark/union_remove_25.q.out | 60 +- .../clientpositive/union_remove_25.q.out | 20 +- 9 files changed, 985 insertions(+), 42 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/b04dc95f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java index 9006f45..2fe9b75 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java @@ -396,6 +396,131 @@ public abstract class HiveReduceExpressionsRule extends RelOptRule { assert constExps.size() == addCasts.size(); } + /** Creates a map containing each (e, constant) pair that occurs within + * a predicate list. + * + * @param clazz Class of expression that is considered constant + * @param rexBuilder Rex builder + * @param predicates Predicate list + * @param <C> what to consider a constant: {@link RexLiteral} to use a narrow + * definition of constant, or {@link RexNode} to use + * {@link RexUtil#isConstant(RexNode)} + * @return Map from values to constants + */ + public static <C extends RexNode> ImmutableMap<RexNode, C> predicateConstants( + Class<C> clazz, RexBuilder rexBuilder, RelOptPredicateList predicates) { + // We cannot use an ImmutableMap.Builder here. If there are multiple entries + // with the same key (e.g. "WHERE deptno = 1 AND deptno = 2"), it doesn't + // matter which we take, so the latter will replace the former. + // The basic idea is to find all the pairs of RexNode = RexLiteral + // (1) If 'predicates' contain a non-EQUALS, we bail out. + // (2) It is OK if a RexNode is equal to the same RexLiteral several times, + // (e.g. "WHERE deptno = 1 AND deptno = 1") + // (3) It will return false if there are inconsistent constraints (e.g. + // "WHERE deptno = 1 AND deptno = 2") + final Map<RexNode, C> map = new HashMap<>(); + final Set<RexNode> excludeSet = new HashSet<>(); + for (RexNode predicate : predicates.pulledUpPredicates) { + gatherConstraints(clazz, predicate, map, excludeSet, rexBuilder); + } + final ImmutableMap.Builder<RexNode, C> builder = + ImmutableMap.builder(); + for (Map.Entry<RexNode, C> entry : map.entrySet()) { + RexNode rexNode = entry.getKey(); + if (!overlap(rexNode, excludeSet)) { + builder.put(rexNode, entry.getValue()); + } + } + return builder.build(); + } + + private static <C extends RexNode> void gatherConstraints(Class<C> clazz, + RexNode predicate, Map<RexNode, C> map, Set<RexNode> excludeSet, + RexBuilder rexBuilder) { + if (predicate.getKind() != SqlKind.EQUALS) { + decompose(excludeSet, predicate); + return; + } + final List<RexNode> operands = ((RexCall) predicate).getOperands(); + if (operands.size() != 2) { + decompose(excludeSet, predicate); + return; + } + // if it reaches here, we have rexNode equals rexNode + final RexNode left = operands.get(0); + final RexNode right = operands.get(1); + // note that literals are immutable too and they can only be compared through + // values. + gatherConstraint(clazz, left, right, map, excludeSet, rexBuilder); + gatherConstraint(clazz, right, left, map, excludeSet, rexBuilder); + } + + /** Returns whether a value of {@code type2} can be assigned to a variable + * of {@code type1}. + * + * <p>For example: + * <ul> + * <li>{@code canAssignFrom(BIGINT, TINYINT)} returns {@code true}</li> + * <li>{@code canAssignFrom(TINYINT, BIGINT)} returns {@code false}</li> + * <li>{@code canAssignFrom(BIGINT, VARCHAR)} returns {@code false}</li> + * </ul> + */ + private static boolean canAssignFrom(RelDataType type1, RelDataType type2) { + final SqlTypeName name1 = type1.getSqlTypeName(); + final SqlTypeName name2 = type2.getSqlTypeName(); + if (name1.getFamily() == name2.getFamily()) { + switch (name1.getFamily()) { + case NUMERIC: + return name1.compareTo(name2) >= 0; + default: + return true; + } + } + return false; + } + + private static <C extends RexNode> void gatherConstraint(Class<C> clazz, + RexNode left, RexNode right, Map<RexNode, C> map, Set<RexNode> excludeSet, + RexBuilder rexBuilder) { + if (!clazz.isInstance(right)) { + return; + } + if (!RexUtil.isConstant(right)) { + return; + } + C constant = clazz.cast(right); + if (excludeSet.contains(left)) { + return; + } + final C existedValue = map.get(left); + if (existedValue == null) { + switch (left.getKind()) { + case CAST: + // Convert "CAST(c) = literal" to "c = literal", as long as it is a + // widening cast. + final RexNode operand = ((RexCall) left).getOperands().get(0); + if (canAssignFrom(left.getType(), operand.getType())) { + final RexNode castRight = + rexBuilder.makeCast(operand.getType(), constant); + if (castRight instanceof RexLiteral) { + left = operand; + constant = clazz.cast(castRight); + } + } + } + map.put(left, constant); + } else { + if (existedValue instanceof RexLiteral + && constant instanceof RexLiteral + && !((RexLiteral) existedValue).getValue() + .equals(((RexLiteral) constant).getValue())) { + // we found conflicting values, e.g. left = 10 and left = 20 + map.remove(left); + excludeSet.add(left); + } + } + } + protected static ImmutableMap<RexNode, RexLiteral> predicateConstants( RelOptPredicateList predicates) { // We cannot use an ImmutableMap.Builder here. If there are multiple entries http://git-wip-us.apache.org/repos/asf/hive/blob/b04dc95f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java new file mode 100644 index 0000000..d14b0ba --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.calcite.plan.RelOptPredicateList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.calcite.util.Pair; +import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.ImmutableList; + +/** + * Planner rule that pulls up constant keys through a SortLimit operator. + * + * This rule is only applied on SortLimit operators that are not the root + * of the plan tree. This is done because the interaction of this rule + * with the AST conversion may cause some optimizations to not kick in + * e.g. SimpleFetchOptimizer. Nevertheless, this will not have any + * performance impact in the resulting plans. + */ +public class HiveSortLimitPullUpConstantsRule extends RelOptRule { + + protected static final Logger LOG = LoggerFactory.getLogger(HiveSortLimitPullUpConstantsRule.class); + + + public static final HiveSortLimitPullUpConstantsRule INSTANCE = + new HiveSortLimitPullUpConstantsRule(HiveSortLimit.class, + HiveRelFactories.HIVE_BUILDER); + + private HiveSortLimitPullUpConstantsRule( + Class<? extends Sort> sortClass, + RelBuilderFactory relBuilderFactory) { + super(operand(RelNode.class, + operand(sortClass, any())), + relBuilderFactory, null); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final RelNode parent = call.rel(0); + final Sort sort = call.rel(1); + + final int count = sort.getInput().getRowType().getFieldCount(); + if (count == 1) { + // No room for optimization since we cannot convert to an empty + // Project operator. + return; + } + + final RexBuilder rexBuilder = sort.getCluster().getRexBuilder(); + final RelMetadataQuery mq = RelMetadataQuery.instance(); + final RelOptPredicateList predicates = mq.getPulledUpPredicates(sort.getInput()); + if (predicates == null) { + return; + } + + Map<RexNode, RexNode> constants = HiveReduceExpressionsRule.predicateConstants( + RexNode.class, rexBuilder, predicates); + + // None of the expressions are constant. Nothing to do. + if (constants.isEmpty()) { + return; + } + + if (count == constants.size()) { + // At least a single item in project is required. + final Map<RexNode, RexNode> map = new HashMap<>(constants); + map.remove(map.keySet().iterator().next()); + constants = map; + } + + // Create expressions for Project operators before and after the Sort + List<RelDataTypeField> fields = sort.getInput().getRowType().getFieldList(); + List<Pair<RexNode, String>> newChildExprs = new ArrayList<>(); + List<RexNode> topChildExprs = new ArrayList<>(); + List<String> topChildExprsFields = new ArrayList<>(); + for (int i = 0; i < count ; i++) { + RexNode expr = rexBuilder.makeInputRef(sort.getInput(), i); + RelDataTypeField field = fields.get(i); + if (constants.containsKey(expr)) { + topChildExprs.add(constants.get(expr)); + topChildExprsFields.add(field.getName()); + } else { + newChildExprs.add(Pair.<RexNode,String>of(expr, field.getName())); + topChildExprs.add(expr); + topChildExprsFields.add(field.getName()); + } + } + + // Update field collations + final Mappings.TargetMapping mapping = + RelOptUtil.permutation(Pair.left(newChildExprs), sort.getInput().getRowType()).inverse(); + List<RelFieldCollation> fieldCollations = new ArrayList<>(); + for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) { + final int target = mapping.getTargetOpt(fc.getFieldIndex()); + if (target < 0) { + // It is a constant, we can ignore it + continue; + } + fieldCollations.add(fc.copy(target)); + } + + // Update top Project positions + topChildExprs = ImmutableList.copyOf(RexUtil.apply(mapping, topChildExprs)); + + // Create new Project-Sort-Project sequence + final RelBuilder relBuilder = call.builder(); + relBuilder.push(sort.getInput()); + relBuilder.project(Pair.left(newChildExprs), Pair.right(newChildExprs)); + final ImmutableList<RexNode> sortFields = + relBuilder.fields(RelCollations.of(fieldCollations)); + relBuilder.sortLimit(sort.offset == null ? -1 : RexLiteral.intValue(sort.offset), + sort.fetch == null ? -1 : RexLiteral.intValue(sort.fetch), sortFields); + relBuilder.project(topChildExprs, topChildExprsFields); + + call.transformTo(parent.copy(parent.getTraitSet(), ImmutableList.of(relBuilder.build()))); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/b04dc95f/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 8e00e0b..377573b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -153,6 +153,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinCommuteRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortLimitPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; @@ -1163,6 +1164,8 @@ public class CalcitePlanner extends SemanticAnalyzer { rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN); rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN); rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN); + rules.add(HiveSortMergeRule.INSTANCE); + rules.add(HiveSortLimitPullUpConstantsRule.INSTANCE); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, rules.toArray(new RelOptRule[rules.size()])); http://git-wip-us.apache.org/repos/asf/hive/blob/b04dc95f/ql/src/test/queries/clientpositive/cbo_input26.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/cbo_input26.q b/ql/src/test/queries/clientpositive/cbo_input26.q new file mode 100644 index 0000000..40050f9 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_input26.q @@ -0,0 +1,54 @@ +set hive.mapred.mode=nonstrict; +set hive.optimize.constant.propagation=false; + +explain +select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +explain +select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; + +select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq; http://git-wip-us.apache.org/repos/asf/hive/blob/b04dc95f/ql/src/test/results/clientpositive/cbo_input26.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cbo_input26.q.out b/ql/src/test/results/clientpositive/cbo_input26.q.out new file mode 100644 index 0000000..5c4c771 --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_input26.q.out @@ -0,0 +1,596 @@ +PREHOOK: query: explain +select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '14' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select * from (select * from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select * from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +0 val_0 2008-04-08 11 +0 val_0 2008-04-08 11 +0 val_0 2008-04-08 11 +10 val_10 2008-04-08 11 +100 val_100 2008-04-08 11 +PREHOOK: query: explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string), _col0 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string), _col0 (type: string), '14' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +2008-04-08 0 11 +2008-04-08 0 11 +2008-04-08 0 11 +2008-04-08 10 11 +2008-04-08 100 11 +PREHOOK: query: explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string), _col0 (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string), _col0 (type: string), '14' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select * from (select a.ds, a.key, a.hr from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.hr,a.key limit 5)pa + union all + select * from (select b.ds, b.key, b.hr from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +2008-04-08 0 11 +2008-04-08 0 11 +2008-04-08 0 11 +2008-04-08 10 11 +2008-04-08 100 11 +PREHOOK: query: explain +select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((ds = '2008-04-08') and (hr = '14')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( + select * from (select a.key, a.ds, a.value from srcpart a where a.ds = '2008-04-08' and a.hr = '11' order by a.ds limit 5)pa + union all + select * from (select b.key, b.ds, b.value from srcpart b where b.ds = '2008-04-08' and b.hr = '14' limit 5)pb +)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +#### A masked pattern was here #### +165 2008-04-08 val_165 +27 2008-04-08 val_27 +311 2008-04-08 val_311 +86 2008-04-08 val_86 +238 2008-04-08 val_238 http://git-wip-us.apache.org/repos/asf/hive/blob/b04dc95f/ql/src/test/results/clientpositive/load_dyn_part14.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/load_dyn_part14.q.out b/ql/src/test/results/clientpositive/load_dyn_part14.q.out index 53e9df3..57c4287 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part14.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part14.q.out @@ -74,13 +74,13 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit http://git-wip-us.apache.org/repos/asf/hive/blob/b04dc95f/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out index 84d99c3..1940561 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out @@ -73,13 +73,13 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 2 Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/b04dc95f/ql/src/test/results/clientpositive/spark/union_remove_25.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out index 253bf8f..190bea5 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out @@ -438,7 +438,7 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1000 @@ -447,49 +447,57 @@ STAGE PLANS: sort order: Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col3 (type: string) - outputColumnNames: _col0, _col1, _col3 + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1000 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 + File Output Operator + compressed: false + Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 Reducer 4 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col3 (type: string) - outputColumnNames: _col0, _col1, _col3 + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1000 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl3 + File Output Operator + compressed: false + Statistics: Num rows: 2000 Data size: 20000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl3 Stage: Stage-0 Move Operator http://git-wip-us.apache.org/repos/asf/hive/blob/b04dc95f/ql/src/test/results/clientpositive/union_remove_25.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/union_remove_25.q.out b/ql/src/test/results/clientpositive/union_remove_25.q.out index 54ddf56..3869735 100644 --- a/ql/src/test/results/clientpositive/union_remove_25.q.out +++ b/ql/src/test/results/clientpositive/union_remove_25.q.out @@ -461,7 +461,7 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1000 @@ -470,17 +470,17 @@ STAGE PLANS: sort order: Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col3 (type: string) - outputColumnNames: _col0, _col1, _col3 + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1000 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col3 (type: string) + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -513,7 +513,7 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1000 @@ -522,17 +522,17 @@ STAGE PLANS: sort order: Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col3 (type: string) - outputColumnNames: _col0, _col1, _col3 + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1000 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col3 (type: string) + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE File Output Operator