This is an automated email from the ASF dual-hosted git repository. vgarg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 7414645 HIVE-21539: GroupBy + where clause on same column results in incorrect query rewrite(Vineet Garg, reviewed by Jesus Camacho Rodriguez) 7414645 is described below commit 7414645f2e0e21d15a29528442e2c0d274618927 Author: Vineet Garg <vg...@apache.org> AuthorDate: Mon Apr 8 10:17:56 2019 -0700 HIVE-21539: GroupBy + where clause on same column results in incorrect query rewrite(Vineet Garg, reviewed by Jesus Camacho Rodriguez) --- .../calcite/rules/HiveRelFieldTrimmer.java | 63 ++++++++++++---------- ql/src/test/queries/clientpositive/groupby13.q | 6 +++ ql/src/test/results/clientpositive/groupby13.q.out | 50 +++++++++++++++++ 3 files changed, 92 insertions(+), 27 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index 3759ed6..c570356 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -173,7 +173,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { } Set<RelDataTypeField> inputExtraFields = - Collections.<RelDataTypeField>emptySet(); + Collections.<RelDataTypeField>emptySet(); TrimResult trimResult = trimChild(join, input, inputFieldsUsed.build(), inputExtraFields); newInputs.add(trimResult.left); @@ -212,7 +212,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { // Build new join. final RexVisitor<RexNode> shuttle = new RexPermuteInputsShuttle( - mapping, newInputs.toArray(new RelNode[newInputs.size()])); + mapping, newInputs.toArray(new RelNode[newInputs.size()])); RexNode newConditionExpr = conditionExpr.accept(shuttle); List<RexNode> newJoinFilters = Lists.newArrayList(); @@ -222,14 +222,14 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { } final RelDataType newRowType = RelOptUtil.permute(join.getCluster().getTypeFactory(), - join.getRowType(), mapping); + join.getRowType(), mapping); final RelNode newJoin = new HiveMultiJoin(join.getCluster(), - newInputs, - newConditionExpr, - newRowType, - join.getJoinInputs(), - join.getJoinTypes(), - newJoinFilters); + newInputs, + newConditionExpr, + newRowType, + join.getJoinInputs(), + join.getJoinTypes(), + newJoinFilters); return new TrimResult(newJoin, mapping); } @@ -271,7 +271,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { } private static RelNode project(DruidQuery dq, ImmutableBitSet fieldsUsed, - Set<RelDataTypeField> extraFields, RelBuilder relBuilder) { + Set<RelDataTypeField> extraFields, RelBuilder relBuilder) { final int fieldCount = dq.getRowType().getFieldCount(); if (fieldsUsed.equals(ImmutableBitSet.range(fieldCount)) && extraFields.isEmpty()) { @@ -321,7 +321,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { // because if not and it consist of keys (unique + not null OR pk), we can safely remove rest of the columns // if those are columns are not being used further up private ImmutableBitSet generateGroupSetIfCardinalitySame(final Aggregate aggregate, - final ImmutableBitSet originalGroupSet, final ImmutableBitSet fieldsUsed) { + final ImmutableBitSet originalGroupSet, final ImmutableBitSet fieldsUsed) { RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder(); RelMetadataQuery mq = aggregate.getCluster().getMetadataQuery(); @@ -473,7 +473,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { * */ private Aggregate rewriteGBConstantKeys(Aggregate aggregate, ImmutableBitSet fieldsUsed, - Set<RelDataTypeField> extraFields) { + ImmutableBitSet aggCallFields) { if ((aggregate.getIndicatorCount() > 0) || (aggregate.getGroupSet().isEmpty()) || fieldsUsed.contains(aggregate.getGroupSet())) { @@ -503,7 +503,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { if (allConstants) { for (int i = 0; i < rowType.getFieldCount(); i++) { - if (aggregate.getGroupSet().get(i)) { + if (aggregate.getGroupSet().get(i) && !aggCallFields.get(i)) { newProjects.add(rexBuilder.makeLiteral(true)); } else { newProjects.add(rexBuilder.makeInputRef(input, i)); @@ -512,7 +512,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { relBuilder.push(input); relBuilder.project(newProjects); Aggregate newAggregate = new HiveAggregate(aggregate.getCluster(), aggregate.getTraitSet(), relBuilder.build(), - aggregate.getGroupSet(), null, aggregate.getAggCallList()); + aggregate.getGroupSet(), null, aggregate.getAggCallList()); return newAggregate; } return aggregate; @@ -533,24 +533,33 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { // // But group and indicator fields stay, even if they are not used. - aggregate = rewriteGBConstantKeys(aggregate, fieldsUsed, extraFields); + // Compute which input fields are used. - final RelDataType rowType = aggregate.getRowType(); - // Compute which input fields are used. - // 1. group fields are always used - final ImmutableBitSet.Builder inputFieldsUsed = - aggregate.getGroupSet().rebuild(); - // 2. agg functions + // agg functions + // agg functions are added first (before group sets) because rewriteGBConstantsKeys + // needs it + final ImmutableBitSet.Builder aggCallFieldsUsedBuilder = ImmutableBitSet.builder(); for (AggregateCall aggCall : aggregate.getAggCallList()) { for (int i : aggCall.getArgList()) { - inputFieldsUsed.set(i); + aggCallFieldsUsedBuilder.set(i); } if (aggCall.filterArg >= 0) { - inputFieldsUsed.set(aggCall.filterArg); + aggCallFieldsUsedBuilder.set(aggCall.filterArg); } } + // transform if group by contain constant keys + ImmutableBitSet aggCallFieldsUsed = aggCallFieldsUsedBuilder.build(); + aggregate = rewriteGBConstantKeys(aggregate, fieldsUsed, aggCallFieldsUsed); + + // add group fields + final ImmutableBitSet.Builder inputFieldsUsed = aggregate.getGroupSet().rebuild(); + inputFieldsUsed.addAll(aggCallFieldsUsed); + + + final RelDataType rowType = aggregate.getRowType(); + // Create input with trimmed columns. final RelNode input = aggregate.getInput(); final Set<RelDataTypeField> inputExtraFields = Collections.emptySet(); @@ -582,7 +591,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { if (input == newInput && fieldsUsed.equals(ImmutableBitSet.range(rowType.getFieldCount()))) { return result(aggregate, - Mappings.createIdentity(rowType.getFieldCount())); + Mappings.createIdentity(rowType.getFieldCount())); } // update the group by keys based on inputMapping @@ -615,7 +624,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { } else { newGroupSets = ImmutableList.copyOf( Iterables.transform(aggregate.getGroupSets(), - input1 -> Mappings.apply(inputMapping, input1))); + input1 -> Mappings.apply(inputMapping, input1))); } // Populate mapping of where to find the fields. System, group key and @@ -641,8 +650,8 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { : relBuilder.field(Mappings.apply(inputMapping, aggCall.filterArg)); RelBuilder.AggCall newAggCall = relBuilder.aggregateCall(aggCall.getAggregation(), - aggCall.isDistinct(), aggCall.isApproximate(), - filterArg, aggCall.name, args); + aggCall.isDistinct(), aggCall.isApproximate(), + filterArg, aggCall.name, args); mapping.set(j, updatedGroupCount + newAggCallList.size()); newAggCallList.add(newAggCall); } diff --git a/ql/src/test/queries/clientpositive/groupby13.q b/ql/src/test/queries/clientpositive/groupby13.q index 53feaed..900f557 100644 --- a/ql/src/test/queries/clientpositive/groupby13.q +++ b/ql/src/test/queries/clientpositive/groupby13.q @@ -14,3 +14,9 @@ int_col_7, int_col_7, LEAST(COALESCE(int_col_5, -279), COALESCE(int_col_7, 476)); + +create table aGBY (i int, j string); +insert into aGBY values ( 1, 'a'),(2,'b'); +explain cbo select min(j) from aGBY where j='a' group by j; +select min(j) from aGBY where j='a' group by j; +drop table aGBY; diff --git a/ql/src/test/results/clientpositive/groupby13.q.out b/ql/src/test/results/clientpositive/groupby13.q.out index d7fcc68..0af8530 100644 --- a/ql/src/test/results/clientpositive/groupby13.q.out +++ b/ql/src/test/results/clientpositive/groupby13.q.out @@ -90,3 +90,53 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: create table aGBY (i int, j string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@aGBY +POSTHOOK: query: create table aGBY (i int, j string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@aGBY +PREHOOK: query: insert into aGBY values ( 1, 'a'),(2,'b') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@agby +POSTHOOK: query: insert into aGBY values ( 1, 'a'),(2,'b') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@agby +POSTHOOK: Lineage: agby.i SCRIPT [] +POSTHOOK: Lineage: agby.j SCRIPT [] +PREHOOK: query: explain cbo select min(j) from aGBY where j='a' group by j +PREHOOK: type: QUERY +PREHOOK: Input: default@agby +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select min(j) from aGBY where j='a' group by j +POSTHOOK: type: QUERY +POSTHOOK: Input: default@agby +#### A masked pattern was here #### +CBO PLAN: +HiveProject(_o__c0=[$1]) + HiveAggregate(group=[{0}], agg#0=[min($0)]) + HiveProject($f0=[CAST(_UTF-16LE'a':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($1, _UTF-16LE'a')]) + HiveTableScan(table=[[default, agby]], table:alias=[agby]) + +PREHOOK: query: select min(j) from aGBY where j='a' group by j +PREHOOK: type: QUERY +PREHOOK: Input: default@agby +#### A masked pattern was here #### +POSTHOOK: query: select min(j) from aGBY where j='a' group by j +POSTHOOK: type: QUERY +POSTHOOK: Input: default@agby +#### A masked pattern was here #### +a +PREHOOK: query: drop table aGBY +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@agby +PREHOOK: Output: default@agby +POSTHOOK: query: drop table aGBY +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@agby +POSTHOOK: Output: default@agby