This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch sum-literal-cast in repository https://gitbox.apache.org/repos/asf/doris.git
commit 8619e768ee7f3ba83057db7b134165654e3136d3 Author: englefly <[email protected]> AuthorDate: Wed Mar 11 15:56:19 2026 +0800 [opt](Nereids) strip redundant widening integer cast in SumLiteralRewrite SumLiteralRewrite transforms SUM(expr +/- literal) into SUM(expr) +/- literal * COUNT(expr). When type coercion has introduced an implicit widening cast (e.g. CAST(smallint_col AS INT)), the rewritten SUM/COUNT still operates on the wider type, forcing unnecessary wider data reads. This is redundant because SUM always returns BIGINT for any integer input (TINYINT/SMALLINT/INT/BIGINT). Strip implicit widening integer casts in extractSumLiteral() so the aggregate operates on the original narrow column directly. This benefits ClickBench Q29-style queries where SUM(col), SUM(col+1), SUM(col+2) share a narrow integer column — after stripping the cast, SUM(col+1) and SUM(col+2) reuse the existing SUM(col). --- .../nereids/rules/rewrite/SumLiteralRewrite.java | 32 ++++++++++++ .../rules/rewrite/SumLiteralRewriteTest.java | 60 ++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SumLiteralRewrite.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SumLiteralRewrite.java index 09be00a5819..ee48de3e84d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SumLiteralRewrite.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SumLiteralRewrite.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.trees.expressions.Add; import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.BinaryArithmetic; +import org.apache.doris.nereids.trees.expressions.Cast; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Multiply; import org.apache.doris.nereids.trees.expressions.NamedExpression; @@ -204,10 +205,41 @@ public class SumLiteralRewrite extends OneRewriteRuleFactory { // only support integer or float types return null; } + // Strip redundant widening integer cast introduced by type coercion. + // e.g. SUM(CAST(smallint_col AS INT) + 1) → after rewrite becomes SUM(CAST(smallint_col AS INT)). + // Since SUM always returns BIGINT for any integer input, CAST(smallint→int) is unnecessary + // and forces wider data reads. Strip it so we get SUM(smallint_col) directly. + left = stripWideningIntegerCast(left); SumInfo info = new SumInfo(left, ((Sum) func).isDistinct(), ((Sum) func).isAlwaysNullable()); return Pair.of(namedExpression, Pair.of(info, (Literal) right)); } + /** + * Strip a widening integer cast that is redundant for SUM/COUNT. + * For example, CAST(smallint_col AS INT) → smallint_col. + * + * This is safe because: + * - SUM returns BIGINT for all integer inputs (TINYINT/SMALLINT/INT/BIGINT), + * so widening the input before aggregation does not change the result. + * - COUNT just counts non-null values, unaffected by widening. + * + * Only implicit (type-coercion) casts between integer-like types are stripped. + */ + private static Expression stripWideningIntegerCast(Expression expr) { + if (!(expr instanceof Cast)) { + return expr; + } + Cast cast = (Cast) expr; + if (cast.isExplicitType()) { + return expr; + } + Expression inner = cast.child(); + if (inner.getDataType().isIntegerLikeType() && cast.getDataType().isIntegerLikeType()) { + return inner; + } + return expr; + } + static class SumInfo { Expression expr; boolean isDistinct; diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SumLiteralRewriteTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SumLiteralRewriteTest.java index 19ea7b864fb..5b918c62a59 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SumLiteralRewriteTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SumLiteralRewriteTest.java @@ -19,12 +19,14 @@ package org.apache.doris.nereids.rules.rewrite; import org.apache.doris.nereids.trees.expressions.Add; import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Cast; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.Subtract; import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.util.MemoPatternMatchSupported; import org.apache.doris.nereids.util.MemoTestUtils; import org.apache.doris.nereids.util.PlanChecker; @@ -143,4 +145,62 @@ class SumLiteralRewriteTest implements MemoPatternMatchSupported { .matches(logicalAggregate().when(p -> p.getOutputs().size() == 3)); } + + @Test + void testStripWideningIntegerCast() { + Slot slot1 = scan1.getOutput().get(0); + // Simulate type coercion's implicit widening cast: CAST(int_col AS BIGINT) + Cast castSlot = new Cast(slot1, BigIntType.INSTANCE); + Alias add1 = new Alias(new Sum(new Add(castSlot, Literal.of(1)))); + Alias add2 = new Alias(new Sum(new Add(castSlot, Literal.of(2)))); + LogicalAggregate<?> agg = new LogicalAggregate<>( + ImmutableList.of(), ImmutableList.of(add1, add2), scan1); + PlanChecker.from(MemoTestUtils.createConnectContext(), agg) + .applyTopDown(ImmutableList.of(new SumLiteralRewrite().build())) + .printlnTree() + // After stripping the implicit widening cast, Sum and Count should use + // slot1 directly (not Cast(slot1 AS BIGINT)), so no Cast in aggregate outputs + .matches(logicalAggregate().when(a -> + a.getOutputExpressions().stream().noneMatch( + e -> e.anyMatch(expr -> expr instanceof Cast)))); + + // Verify explicit cast is NOT stripped + Cast explicitCast = new Cast(slot1, BigIntType.INSTANCE, true); + Alias addExplicit1 = new Alias(new Sum(new Add(explicitCast, Literal.of(1)))); + Alias addExplicit2 = new Alias(new Sum(new Add(explicitCast, Literal.of(2)))); + agg = new LogicalAggregate<>( + ImmutableList.of(), ImmutableList.of(addExplicit1, addExplicit2), scan1); + PlanChecker.from(MemoTestUtils.createConnectContext(), agg) + .applyTopDown(ImmutableList.of(new SumLiteralRewrite().build())) + .printlnTree() + // Explicit cast should be preserved — aggregate outputs should still contain Cast + .matches(logicalAggregate().when(a -> + a.getOutputExpressions().stream().anyMatch( + e -> e.anyMatch(expr -> expr instanceof Cast)))); + } + + @Test + void testStripWideningCastWithExistingSum() { + // Simulates ClickBench Q29: SELECT SUM(col), SUM(col+1), SUM(col+2) + // where col is a narrow integer type and type coercion introduces implicit widening cast. + Slot slot1 = scan1.getOutput().get(0); + // Pre-existing plain SUM(slot) — no cast, no literal + Alias sum = new Alias(new Sum(slot1)); + // Simulate type coercion widening: SUM(CAST(int_col AS BIGINT) + 1) etc. + Cast castSlot = new Cast(slot1, BigIntType.INSTANCE); + Alias add1 = new Alias(new Sum(new Add(castSlot, Literal.of(1)))); + Alias add2 = new Alias(new Sum(new Add(castSlot, Literal.of(2)))); + LogicalAggregate<?> agg = new LogicalAggregate<>( + ImmutableList.of(), ImmutableList.of(sum, add1, add2), scan1); + PlanChecker.from(MemoTestUtils.createConnectContext(), agg) + .applyTopDown(ImmutableList.of(new SumLiteralRewrite().build())) + .printlnTree() + // After stripping widening cast, the base expr of SUM(CAST(slot AS BIGINT) + n) + // becomes slot — matching the pre-existing SUM(slot). Rewrite reuses it and only + // adds COUNT(slot). Aggregate outputs: sum(slot) + count(slot) = 2. + .matches(logicalAggregate().when(a -> + a.getOutputExpressions().size() == 2 + && a.getOutputExpressions().stream().noneMatch( + e -> e.anyMatch(expr -> expr instanceof Cast)))); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
