This is an automated email from the ASF dual-hosted git repository.
yashmayya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new a9159035932 Avoid per-row CAST on TIMESTAMP column when compared to a
BIGINT literal (#18396)
a9159035932 is described below
commit a9159035932a7436ad8dd1b96d4867f6d50c1094
Author: Yash Mayya <[email protected]>
AuthorDate: Mon May 4 14:30:45 2026 -0700
Avoid per-row CAST on TIMESTAMP column when compared to a BIGINT literal
(#18396)
---
.../pinot/query/validate/PinotTypeCoercion.java | 44 ++++-
.../query/validate/PinotTypeCoercionTest.java | 193 +++++++++++++++++++++
2 files changed, 228 insertions(+), 9 deletions(-)
diff --git
a/pinot-query-planner/src/main/java/org/apache/pinot/query/validate/PinotTypeCoercion.java
b/pinot-query-planner/src/main/java/org/apache/pinot/query/validate/PinotTypeCoercion.java
index d76d3120478..be906726c8e 100644
---
a/pinot-query-planner/src/main/java/org/apache/pinot/query/validate/PinotTypeCoercion.java
+++
b/pinot-query-planner/src/main/java/org/apache/pinot/query/validate/PinotTypeCoercion.java
@@ -21,7 +21,9 @@ package org.apache.pinot.query.validate;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.sql.SqlCallBinding;
+import org.apache.calcite.sql.SqlIdentifier;
import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.calcite.sql.type.SqlTypeName;
@@ -33,14 +35,20 @@ import
org.apache.calcite.sql.validate.implicit.TypeCoercionImpl;
/**
* Custom implementation of Calcite's default type coercion implementation to
add Pinot specific type coercion rules.
* Currently, the only additional rule we add is to for TIMESTAMP / BIGINT
types. For binary arithmetic and binary
- * comparison operators, we add implicit casts to convert TIMESTAMP to BIGINT.
For other standard operators, we add
+ * comparison operators, we add implicit casts between TIMESTAMP and BIGINT.
For other standard operators, we add
* implicit casts in both directions as and when needed (TIMESTAMP -> BIGINT
and BIGINT -> TIMESTAMP).
* <p>
* This always works since Pinot's execution type for the TIMESTAMP SQL type
is LONG (i.e., BIGINT). We add these
* implicit casts for convenience since the single-stage engine already treats
the two types as interchangeable and
- * many common user query patterns include things like TIMESTAMP + LONG or
TIMESTAMP - LONG or TIMESTAMP > LONG. For
- * such operations, we're always adding the cast on the TIMESTAMP side, but
users can choose to add explicit casts
- * instead, for performance reasons (adding cast on literal side instead of
column side) and index applicability.
+ * many common user query patterns include things like TIMESTAMP + LONG or
TIMESTAMP - LONG or TIMESTAMP > LONG.
+ * <p>
+ * For binary arithmetic, the cast is always added on the TIMESTAMP side so
that the result is BIGINT (long) arithmetic
+ * (e.g., TIMESTAMP - LONG -> BIGINT). For binary comparisons, we prefer to
add the cast on the operand that is not a
+ * column reference whenever possible. For example, given a TIMESTAMP column
{@code ts} and a BIGINT literal {@code L},
+ * {@code ts > L} is rewritten as {@code ts > CAST(L AS TIMESTAMP)} rather
than {@code CAST(ts AS BIGINT) > L}. This
+ * avoids wrapping the column in a per-row CAST (which is expensive on the
query path and breaks index applicability)
+ * while remaining semantically equivalent. If both sides are column
references (or neither is), we fall back to
+ * casting the TIMESTAMP side to BIGINT to preserve the long-standing default
behavior.
*/
public class PinotTypeCoercion extends TypeCoercionImpl {
public PinotTypeCoercion(RelDataTypeFactory typeFactory,
@@ -77,17 +85,35 @@ public class PinotTypeCoercion extends TypeCoercionImpl {
final RelDataType type1 = binding.getOperandType(0);
final RelDataType type2 = binding.getOperandType(1);
- if (SqlTypeUtil.isTimestamp(type1) && SqlTypeUtil.isIntType(type2)) {
- return coerceOperandType(binding.getScope(), binding.getCall(), 0,
factory.createSqlType(SqlTypeName.BIGINT));
- }
- if (SqlTypeUtil.isIntType(type1) && SqlTypeUtil.isTimestamp(type2)) {
- return coerceOperandType(binding.getScope(), binding.getCall(), 1,
factory.createSqlType(SqlTypeName.BIGINT));
+ boolean firstIsTimestamp = SqlTypeUtil.isTimestamp(type1);
+ boolean secondIsTimestamp = SqlTypeUtil.isTimestamp(type2);
+ boolean firstIsInt = SqlTypeUtil.isIntType(type1);
+ boolean secondIsInt = SqlTypeUtil.isIntType(type2);
+
+ if ((firstIsTimestamp && secondIsInt) || (firstIsInt &&
secondIsTimestamp)) {
+ int timestampIdx = firstIsTimestamp ? 0 : 1;
+ int intIdx = firstIsTimestamp ? 1 : 0;
+ // Prefer casting the non-column-reference operand to keep the column
unwrapped.
+ // When the TIMESTAMP operand is a column and the BIGINT operand is
not, cast the BIGINT operand to TIMESTAMP
+ // rather than wrapping the column in a per-row CAST. This is
semantically equivalent because Pinot's
+ // execution type for TIMESTAMP is LONG.
+ if (isColumnReference(binding.operand(timestampIdx))
+ && !isColumnReference(binding.operand(intIdx))) {
+ return coerceOperandType(binding.getScope(), binding.getCall(),
intIdx,
+ factory.createSqlType(SqlTypeName.TIMESTAMP));
+ }
+ return coerceOperandType(binding.getScope(), binding.getCall(),
timestampIdx,
+ factory.createSqlType(SqlTypeName.BIGINT));
}
}
return super.binaryComparisonCoercion(binding);
}
+ private static boolean isColumnReference(SqlNode node) {
+ return node instanceof SqlIdentifier && !((SqlIdentifier) node).isStar();
+ }
+
@Override
public RelDataType implicitCast(RelDataType in, SqlTypeFamily expected) {
if (in.getSqlTypeName() == SqlTypeName.TIMESTAMP && expected ==
SqlTypeFamily.NUMERIC) {
diff --git
a/pinot-query-planner/src/test/java/org/apache/pinot/query/validate/PinotTypeCoercionTest.java
b/pinot-query-planner/src/test/java/org/apache/pinot/query/validate/PinotTypeCoercionTest.java
new file mode 100644
index 00000000000..89285a1a0bc
--- /dev/null
+++
b/pinot-query-planner/src/test/java/org/apache/pinot/query/validate/PinotTypeCoercionTest.java
@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.query.validate;
+
+import org.apache.pinot.query.QueryEnvironment;
+import org.apache.pinot.query.QueryEnvironmentTestBase;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+
+/**
+ * Tests for {@link PinotTypeCoercion} — in particular, the rule that prefers
casting the non-column-reference operand
+ * in TIMESTAMP-vs-BIGINT binary comparisons. This avoids wrapping a column in
a per-row CAST when the other side is a
+ * literal or constant subexpression, which is both faster on the query path
and preserves index applicability.
+ */
+public class PinotTypeCoercionTest extends QueryEnvironmentTestBase {
+
+ // 1746022135000 ms == 2025-04-30 14:08:55 UTC (used for deterministic
literal-vs-TIMESTAMP comparisons).
+ private static final long TS_LITERAL_MS = 1746022135000L;
+ private static final String TS_LITERAL_RENDERED = "2025-04-30 14:08:55";
+ // RelNode digest uses positional ordinals ($N). Column ordinals depend on
the test schema in
+ // QueryEnvironmentTestBase#getSchemaBuilder, so pin down the ones we assert
on. If the schema
+ // changes, this constant fails loudly with a clear message rather than
letting cascade failures
+ // make the assertions inscrutable.
+ private static final String TS_TIMESTAMP_ORD = "$8";
+ private static final String COL7_ORD = "$6";
+
+ private String explain(String query) {
+ try (QueryEnvironment.CompiledQuery compiled =
_queryEnvironment.compile("EXPLAIN PLAN FOR " + query)) {
+ return compiled.explain(RANDOM_REQUEST_ID_GEN.nextLong(),
null).getExplainPlan();
+ }
+ }
+
+ /**
+ * Sanity: the test schema ordinals we hard-code below match the actual
columns. If
+ * {@code QueryEnvironmentTestBase#getSchemaBuilder} changes column order,
this test fails first
+ * with a clear message instead of cascading failures across the suite.
+ */
+ @Test
+ public void testSchemaOrdinalsAreStable() {
+ String plan = explain("SELECT ts_timestamp, col7 FROM a");
+ assertTrue(plan.contains("ts_timestamp=[" + TS_TIMESTAMP_ORD + "]"),
+ "Expected ts_timestamp to project from " + TS_TIMESTAMP_ORD + ".
Got:\n" + plan);
+ assertTrue(plan.contains("col7=[" + COL7_ORD + "]"),
+ "Expected col7 to project from " + COL7_ORD + ". Got:\n" + plan);
+ }
+
+ /**
+ * When a TIMESTAMP column is compared to a BIGINT literal, the cast must
land on the literal side so that constant
+ * folding produces a TIMESTAMP literal and the column is not wrapped in a
per-row CAST.
+ */
+ @Test
+ public void testTimestampColumnVsBigintLiteralKeepsColumnUnwrapped() {
+ // Cover all binary comparison operators in both orientations. Calcite
renders != as <>.
+ String[] sqlOps = {">", "<", ">=", "<=", "=", "!="};
+ for (String op : sqlOps) {
+ String rendered = "!=".equals(op) ? "<>" : op;
+
+ // Column on the left.
+ String q1 = "SELECT ts_timestamp FROM a WHERE ts_timestamp " + op + " "
+ TS_LITERAL_MS;
+ String plan1 = explain(q1);
+ assertTrue(plan1.contains(rendered + "(" + TS_TIMESTAMP_ORD + ", " +
TS_LITERAL_RENDERED + ")"),
+ "Expected column to remain unwrapped and literal cast to TIMESTAMP
for query: " + q1 + "\nGot:\n" + plan1);
+ assertFalse(plan1.contains("CAST(" + TS_TIMESTAMP_ORD + ")"),
+ "TIMESTAMP column should not be wrapped in CAST for query: " + q1 +
"\nGot:\n" + plan1);
+
+ // Column on the right.
+ String q2 = "SELECT ts_timestamp FROM a WHERE " + TS_LITERAL_MS + " " +
op + " ts_timestamp";
+ String plan2 = explain(q2);
+ assertTrue(plan2.contains(rendered + "(" + TS_LITERAL_RENDERED + ", " +
TS_TIMESTAMP_ORD + ")"),
+ "Expected literal cast to TIMESTAMP and column unwrapped for query:
" + q2 + "\nGot:\n" + plan2);
+ assertFalse(plan2.contains("CAST(" + TS_TIMESTAMP_ORD + ")"),
+ "TIMESTAMP column should not be wrapped in CAST for query: " + q2 +
"\nGot:\n" + plan2);
+ }
+ }
+
+ /**
+ * Symmetric case: when a BIGINT column is compared to a TIMESTAMP literal,
the cast must land on the literal side
+ * so that constant folding produces a BIGINT literal and the column is not
wrapped in a per-row CAST.
+ */
+ @Test
+ public void testBigintColumnVsTimestampLiteralKeepsColumnUnwrapped() {
+ String plan =
+ explain("SELECT col7 FROM a WHERE col7 < TIMESTAMP '" +
TS_LITERAL_RENDERED + "'");
+ assertFalse(plan.contains("CAST(" + COL7_ORD + ")"),
+ "BIGINT column should not be wrapped in CAST. Got:\n" + plan);
+ assertTrue(plan.contains("<(" + COL7_ORD + ", " + TS_LITERAL_MS + ")"),
+ "Expected TIMESTAMP literal folded to BIGINT and column unwrapped.
Got:\n" + plan);
+ }
+
+ /**
+ * When a TIMESTAMP column is compared to a non-column TIMESTAMP-typed
expression (e.g. {@code NOW() - 1000}, which
+ * after binary-arithmetic coercion is BIGINT-typed and then folded back to
TIMESTAMP for the comparison), the
+ * resulting plan must keep the column unwrapped and constant-fold the
right-hand side to a TIMESTAMP literal.
+ */
+ @Test
+ public void testTimestampColumnVsConstantSubexpressionKeepsColumnUnwrapped()
{
+ // NOW() - 1000 is a constant for the lifetime of the query: arithmetic
coercion casts NOW() to BIGINT, the
+ // subtraction is BIGINT - INT = BIGINT, and our comparison rule then
casts that BIGINT result to TIMESTAMP.
+ // After constant folding, the right-hand side is rendered as a single
TIMESTAMP literal.
+ String plan = explain("SELECT ts_timestamp FROM a WHERE ts_timestamp >
NOW() - 1000");
+ assertFalse(plan.contains("CAST(" + TS_TIMESTAMP_ORD + ")"),
+ "TIMESTAMP column should not be wrapped in CAST when right-hand side
is constant. Got:\n" + plan);
+ assertTrue(plan.matches("(?s).*>\\(\\" + TS_TIMESTAMP_ORD + ",
\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\).*"),
+ "Right-hand side should be constant-folded to a TIMESTAMP literal.
Got:\n" + plan);
+ }
+
+ /**
+ * When a BIGINT column is compared to a non-column TIMESTAMP-typed
expression (e.g. {@code NOW()}), the existing
+ * behavior of casting the TIMESTAMP side to BIGINT must be preserved: the
BIGINT column stays unwrapped, and the
+ * TIMESTAMP function is folded to a BIGINT literal.
+ */
+ @Test
+ public void testBigintColumnVsTimestampFunctionKeepsColumnUnwrapped() {
+ String plan = explain("SELECT col7 FROM a WHERE col7 < NOW()");
+ assertFalse(plan.contains("CAST(" + COL7_ORD + ")"),
+ "BIGINT column should not be wrapped in CAST. Got:\n" + plan);
+ assertTrue(plan.matches("(?s).*<\\(\\" + COL7_ORD + ", \\d+\\).*"),
+ "Right-hand side should be folded to a BIGINT literal. Got:\n" + plan);
+ }
+
+ /**
+ * When both operands are column references (TIMESTAMP vs BIGINT), neither
side wins the "non-column" tie-breaker, so
+ * we fall back to the long-standing default of casting the TIMESTAMP side
to BIGINT.
+ */
+ @Test
+ public void testBothColumnReferencesFallsBackToCastTimestampToBigint() {
+ String plan = explain("SELECT ts_timestamp FROM a WHERE ts_timestamp >
col7");
+ assertTrue(plan.contains(">(CAST(" + TS_TIMESTAMP_ORD + "):BIGINT"),
+ "Expected fallback to cast TIMESTAMP column to BIGINT when both
operands are columns. Got:\n" + plan);
+ }
+
+ /**
+ * TIMESTAMP-column vs TIMESTAMP-function shouldn't trigger any CAST — both
sides are already TIMESTAMP, so the
+ * coercion rule should not fire and Calcite should compare directly.
+ */
+ @Test
+ public void testTimestampColumnVsTimestampFunctionHasNoCast() {
+ String plan = explain("SELECT ts_timestamp FROM a WHERE ts_timestamp >
NOW()");
+ assertFalse(plan.contains("CAST(" + TS_TIMESTAMP_ORD + ")"),
+ "TIMESTAMP column should not be wrapped in CAST when compared to a
TIMESTAMP function. Got:\n" + plan);
+ assertEquals(plan.lines().filter(line -> line.contains("CAST(")).count(),
0L,
+ "No CAST should appear in the plan. Got:\n" + plan);
+ }
+
+ /**
+ * Regression for the {@code ago()}-style use case: {@code __time >
ago('PT5M')} should keep the column unwrapped.
+ * {@code ago(String)} is a scalar function that returns {@code long}
(rendered as BIGINT in SQL), so the comparison
+ * is TIMESTAMP-vs-BIGINT and the new rule applies. Before this rule, the
column was wrapped in {@code CAST(.. AS
+ * BIGINT)} per row, which made the query significantly slower than the
workaround {@code __time > concat(ago(...),
+ * '')} that happened to push the cast onto the literal side via the VARCHAR
coercion path.
+ */
+ @Test
+ public void testTimestampColumnVsAgoFunctionKeepsColumnUnwrapped() {
+ String plan = explain("SELECT ts_timestamp FROM a WHERE ts_timestamp >
ago('PT5M')");
+ assertFalse(plan.contains("CAST(" + TS_TIMESTAMP_ORD + ")"),
+ "TIMESTAMP column should not be wrapped in CAST when compared to
ago(...). Got:\n" + plan);
+ assertTrue(plan.matches("(?s).*>\\(\\" + TS_TIMESTAMP_ORD + ",
\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\).*"),
+ "Right-hand side should be constant-folded to a TIMESTAMP literal.
Got:\n" + plan);
+ }
+
+ /**
+ * Binary-arithmetic coercion (TIMESTAMP +/- BIGINT) is unchanged: the
result must be BIGINT (long arithmetic), so the
+ * TIMESTAMP side is always cast to BIGINT regardless of which side is a
column.
+ */
+ @Test
+ public void testBinaryArithmeticStillCastsTimestampToBigint() {
+ String plan = explain("SELECT ts_timestamp FROM a WHERE ts_timestamp +
1000 > 0");
+ // Column ts_timestamp is wrapped in CAST(..):BIGINT for the arithmetic,
then the result is compared to the BIGINT
+ // literal. We do not change arithmetic coercion here.
+ assertTrue(plan.contains("CAST(" + TS_TIMESTAMP_ORD + "):BIGINT"),
+ "TIMESTAMP column must be cast to BIGINT for binary arithmetic.
Got:\n" + plan);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]