This is an automated email from the ASF dual-hosted git repository.

yashmayya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new a9159035932 Avoid per-row CAST on TIMESTAMP column when compared to a 
BIGINT literal (#18396)
a9159035932 is described below

commit a9159035932a7436ad8dd1b96d4867f6d50c1094
Author: Yash Mayya <[email protected]>
AuthorDate: Mon May 4 14:30:45 2026 -0700

    Avoid per-row CAST on TIMESTAMP column when compared to a BIGINT literal 
(#18396)
---
 .../pinot/query/validate/PinotTypeCoercion.java    |  44 ++++-
 .../query/validate/PinotTypeCoercionTest.java      | 193 +++++++++++++++++++++
 2 files changed, 228 insertions(+), 9 deletions(-)

diff --git 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/validate/PinotTypeCoercion.java
 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/validate/PinotTypeCoercion.java
index d76d3120478..be906726c8e 100644
--- 
a/pinot-query-planner/src/main/java/org/apache/pinot/query/validate/PinotTypeCoercion.java
+++ 
b/pinot-query-planner/src/main/java/org/apache/pinot/query/validate/PinotTypeCoercion.java
@@ -21,7 +21,9 @@ package org.apache.pinot.query.validate;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeFactory;
 import org.apache.calcite.sql.SqlCallBinding;
+import org.apache.calcite.sql.SqlIdentifier;
 import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlNode;
 import org.apache.calcite.sql.SqlOperator;
 import org.apache.calcite.sql.type.SqlTypeFamily;
 import org.apache.calcite.sql.type.SqlTypeName;
@@ -33,14 +35,20 @@ import 
org.apache.calcite.sql.validate.implicit.TypeCoercionImpl;
 /**
  * Custom implementation of Calcite's default type coercion implementation to 
add Pinot specific type coercion rules.
  * Currently, the only additional rule we add is to for TIMESTAMP / BIGINT 
types. For binary arithmetic and binary
- * comparison operators, we add implicit casts to convert TIMESTAMP to BIGINT. 
For other standard operators, we add
+ * comparison operators, we add implicit casts between TIMESTAMP and BIGINT. 
For other standard operators, we add
  * implicit casts in both directions as and when needed (TIMESTAMP -> BIGINT 
and BIGINT -> TIMESTAMP).
  * <p>
  * This always works since Pinot's execution type for the TIMESTAMP SQL type 
is LONG (i.e., BIGINT). We add these
  * implicit casts for convenience since the single-stage engine already treats 
the two types as interchangeable and
- * many common user query patterns include things like TIMESTAMP + LONG or 
TIMESTAMP - LONG or TIMESTAMP > LONG. For
- * such operations, we're always adding the cast on the TIMESTAMP side, but 
users can choose to add explicit casts
- * instead, for performance reasons (adding cast on literal side instead of 
column side) and index applicability.
+ * many common user query patterns include things like TIMESTAMP + LONG or 
TIMESTAMP - LONG or TIMESTAMP > LONG.
+ * <p>
+ * For binary arithmetic, the cast is always added on the TIMESTAMP side so 
that the result is BIGINT (long) arithmetic
+ * (e.g., TIMESTAMP - LONG -> BIGINT). For binary comparisons, we prefer to 
add the cast on the operand that is not a
+ * column reference whenever possible. For example, given a TIMESTAMP column 
{@code ts} and a BIGINT literal {@code L},
+ * {@code ts > L} is rewritten as {@code ts > CAST(L AS TIMESTAMP)} rather 
than {@code CAST(ts AS BIGINT) > L}. This
+ * avoids wrapping the column in a per-row CAST (which is expensive on the 
query path and breaks index applicability)
+ * while remaining semantically equivalent. If both sides are column 
references (or neither is), we fall back to
+ * casting the TIMESTAMP side to BIGINT to preserve the long-standing default 
behavior.
  */
 public class PinotTypeCoercion extends TypeCoercionImpl {
   public PinotTypeCoercion(RelDataTypeFactory typeFactory,
@@ -77,17 +85,35 @@ public class PinotTypeCoercion extends TypeCoercionImpl {
       final RelDataType type1 = binding.getOperandType(0);
       final RelDataType type2 = binding.getOperandType(1);
 
-      if (SqlTypeUtil.isTimestamp(type1) && SqlTypeUtil.isIntType(type2)) {
-        return coerceOperandType(binding.getScope(), binding.getCall(), 0, 
factory.createSqlType(SqlTypeName.BIGINT));
-      }
-      if (SqlTypeUtil.isIntType(type1) && SqlTypeUtil.isTimestamp(type2)) {
-        return coerceOperandType(binding.getScope(), binding.getCall(), 1, 
factory.createSqlType(SqlTypeName.BIGINT));
+      boolean firstIsTimestamp = SqlTypeUtil.isTimestamp(type1);
+      boolean secondIsTimestamp = SqlTypeUtil.isTimestamp(type2);
+      boolean firstIsInt = SqlTypeUtil.isIntType(type1);
+      boolean secondIsInt = SqlTypeUtil.isIntType(type2);
+
+      if ((firstIsTimestamp && secondIsInt) || (firstIsInt && 
secondIsTimestamp)) {
+        int timestampIdx = firstIsTimestamp ? 0 : 1;
+        int intIdx = firstIsTimestamp ? 1 : 0;
+        // Prefer casting the non-column-reference operand to keep the column 
unwrapped.
+        // When the TIMESTAMP operand is a column and the BIGINT operand is 
not, cast the BIGINT operand to TIMESTAMP
+        // rather than wrapping the column in a per-row CAST. This is 
semantically equivalent because Pinot's
+        // execution type for TIMESTAMP is LONG.
+        if (isColumnReference(binding.operand(timestampIdx))
+            && !isColumnReference(binding.operand(intIdx))) {
+          return coerceOperandType(binding.getScope(), binding.getCall(), 
intIdx,
+              factory.createSqlType(SqlTypeName.TIMESTAMP));
+        }
+        return coerceOperandType(binding.getScope(), binding.getCall(), 
timestampIdx,
+            factory.createSqlType(SqlTypeName.BIGINT));
       }
     }
 
     return super.binaryComparisonCoercion(binding);
   }
 
+  private static boolean isColumnReference(SqlNode node) {
+    return node instanceof SqlIdentifier && !((SqlIdentifier) node).isStar();
+  }
+
   @Override
   public RelDataType implicitCast(RelDataType in, SqlTypeFamily expected) {
     if (in.getSqlTypeName() == SqlTypeName.TIMESTAMP && expected == 
SqlTypeFamily.NUMERIC) {
diff --git 
a/pinot-query-planner/src/test/java/org/apache/pinot/query/validate/PinotTypeCoercionTest.java
 
b/pinot-query-planner/src/test/java/org/apache/pinot/query/validate/PinotTypeCoercionTest.java
new file mode 100644
index 00000000000..89285a1a0bc
--- /dev/null
+++ 
b/pinot-query-planner/src/test/java/org/apache/pinot/query/validate/PinotTypeCoercionTest.java
@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.query.validate;
+
+import org.apache.pinot.query.QueryEnvironment;
+import org.apache.pinot.query.QueryEnvironmentTestBase;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+
+/**
+ * Tests for {@link PinotTypeCoercion} — in particular, the rule that prefers 
casting the non-column-reference operand
+ * in TIMESTAMP-vs-BIGINT binary comparisons. This avoids wrapping a column in 
a per-row CAST when the other side is a
+ * literal or constant subexpression, which is both faster on the query path 
and preserves index applicability.
+ */
+public class PinotTypeCoercionTest extends QueryEnvironmentTestBase {
+
+  // 1746022135000 ms == 2025-04-30 14:08:55 UTC (used for deterministic 
literal-vs-TIMESTAMP comparisons).
+  private static final long TS_LITERAL_MS = 1746022135000L;
+  private static final String TS_LITERAL_RENDERED = "2025-04-30 14:08:55";
+  // RelNode digest uses positional ordinals ($N). Column ordinals depend on 
the test schema in
+  // QueryEnvironmentTestBase#getSchemaBuilder, so pin down the ones we assert 
on. If the schema
+  // changes, this constant fails loudly with a clear message rather than 
letting cascade failures
+  // make the assertions inscrutable.
+  private static final String TS_TIMESTAMP_ORD = "$8";
+  private static final String COL7_ORD = "$6";
+
+  private String explain(String query) {
+    try (QueryEnvironment.CompiledQuery compiled = 
_queryEnvironment.compile("EXPLAIN PLAN FOR " + query)) {
+      return compiled.explain(RANDOM_REQUEST_ID_GEN.nextLong(), 
null).getExplainPlan();
+    }
+  }
+
+  /**
+   * Sanity: the test schema ordinals we hard-code below match the actual 
columns. If
+   * {@code QueryEnvironmentTestBase#getSchemaBuilder} changes column order, 
this test fails first
+   * with a clear message instead of cascading failures across the suite.
+   */
+  @Test
+  public void testSchemaOrdinalsAreStable() {
+    String plan = explain("SELECT ts_timestamp, col7 FROM a");
+    assertTrue(plan.contains("ts_timestamp=[" + TS_TIMESTAMP_ORD + "]"),
+        "Expected ts_timestamp to project from " + TS_TIMESTAMP_ORD + ". 
Got:\n" + plan);
+    assertTrue(plan.contains("col7=[" + COL7_ORD + "]"),
+        "Expected col7 to project from " + COL7_ORD + ". Got:\n" + plan);
+  }
+
+  /**
+   * When a TIMESTAMP column is compared to a BIGINT literal, the cast must 
land on the literal side so that constant
+   * folding produces a TIMESTAMP literal and the column is not wrapped in a 
per-row CAST.
+   */
+  @Test
+  public void testTimestampColumnVsBigintLiteralKeepsColumnUnwrapped() {
+    // Cover all binary comparison operators in both orientations. Calcite 
renders != as <>.
+    String[] sqlOps = {">", "<", ">=", "<=", "=", "!="};
+    for (String op : sqlOps) {
+      String rendered = "!=".equals(op) ? "<>" : op;
+
+      // Column on the left.
+      String q1 = "SELECT ts_timestamp FROM a WHERE ts_timestamp " + op + " " 
+ TS_LITERAL_MS;
+      String plan1 = explain(q1);
+      assertTrue(plan1.contains(rendered + "(" + TS_TIMESTAMP_ORD + ", " + 
TS_LITERAL_RENDERED + ")"),
+          "Expected column to remain unwrapped and literal cast to TIMESTAMP 
for query: " + q1 + "\nGot:\n" + plan1);
+      assertFalse(plan1.contains("CAST(" + TS_TIMESTAMP_ORD + ")"),
+          "TIMESTAMP column should not be wrapped in CAST for query: " + q1 + 
"\nGot:\n" + plan1);
+
+      // Column on the right.
+      String q2 = "SELECT ts_timestamp FROM a WHERE " + TS_LITERAL_MS + " " + 
op + " ts_timestamp";
+      String plan2 = explain(q2);
+      assertTrue(plan2.contains(rendered + "(" + TS_LITERAL_RENDERED + ", " + 
TS_TIMESTAMP_ORD + ")"),
+          "Expected literal cast to TIMESTAMP and column unwrapped for query: 
" + q2 + "\nGot:\n" + plan2);
+      assertFalse(plan2.contains("CAST(" + TS_TIMESTAMP_ORD + ")"),
+          "TIMESTAMP column should not be wrapped in CAST for query: " + q2 + 
"\nGot:\n" + plan2);
+    }
+  }
+
+  /**
+   * Symmetric case: when a BIGINT column is compared to a TIMESTAMP literal, 
the cast must land on the literal side
+   * so that constant folding produces a BIGINT literal and the column is not 
wrapped in a per-row CAST.
+   */
+  @Test
+  public void testBigintColumnVsTimestampLiteralKeepsColumnUnwrapped() {
+    String plan =
+        explain("SELECT col7 FROM a WHERE col7 < TIMESTAMP '" + 
TS_LITERAL_RENDERED + "'");
+    assertFalse(plan.contains("CAST(" + COL7_ORD + ")"),
+        "BIGINT column should not be wrapped in CAST. Got:\n" + plan);
+    assertTrue(plan.contains("<(" + COL7_ORD + ", " + TS_LITERAL_MS + ")"),
+        "Expected TIMESTAMP literal folded to BIGINT and column unwrapped. 
Got:\n" + plan);
+  }
+
+  /**
+   * When a TIMESTAMP column is compared to a non-column TIMESTAMP-typed 
expression (e.g. {@code NOW() - 1000}, which
+   * after binary-arithmetic coercion is BIGINT-typed and then folded back to 
TIMESTAMP for the comparison), the
+   * resulting plan must keep the column unwrapped and constant-fold the 
right-hand side to a TIMESTAMP literal.
+   */
+  @Test
+  public void testTimestampColumnVsConstantSubexpressionKeepsColumnUnwrapped() 
{
+    // NOW() - 1000 is a constant for the lifetime of the query: arithmetic 
coercion casts NOW() to BIGINT, the
+    // subtraction is BIGINT - INT = BIGINT, and our comparison rule then 
casts that BIGINT result to TIMESTAMP.
+    // After constant folding, the right-hand side is rendered as a single 
TIMESTAMP literal.
+    String plan = explain("SELECT ts_timestamp FROM a WHERE ts_timestamp > 
NOW() - 1000");
+    assertFalse(plan.contains("CAST(" + TS_TIMESTAMP_ORD + ")"),
+        "TIMESTAMP column should not be wrapped in CAST when right-hand side 
is constant. Got:\n" + plan);
+    assertTrue(plan.matches("(?s).*>\\(\\" + TS_TIMESTAMP_ORD + ", 
\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\).*"),
+        "Right-hand side should be constant-folded to a TIMESTAMP literal. 
Got:\n" + plan);
+  }
+
+  /**
+   * When a BIGINT column is compared to a non-column TIMESTAMP-typed 
expression (e.g. {@code NOW()}), the existing
+   * behavior of casting the TIMESTAMP side to BIGINT must be preserved: the 
BIGINT column stays unwrapped, and the
+   * TIMESTAMP function is folded to a BIGINT literal.
+   */
+  @Test
+  public void testBigintColumnVsTimestampFunctionKeepsColumnUnwrapped() {
+    String plan = explain("SELECT col7 FROM a WHERE col7 < NOW()");
+    assertFalse(plan.contains("CAST(" + COL7_ORD + ")"),
+        "BIGINT column should not be wrapped in CAST. Got:\n" + plan);
+    assertTrue(plan.matches("(?s).*<\\(\\" + COL7_ORD + ", \\d+\\).*"),
+        "Right-hand side should be folded to a BIGINT literal. Got:\n" + plan);
+  }
+
+  /**
+   * When both operands are column references (TIMESTAMP vs BIGINT), neither 
side wins the "non-column" tie-breaker, so
+   * we fall back to the long-standing default of casting the TIMESTAMP side 
to BIGINT.
+   */
+  @Test
+  public void testBothColumnReferencesFallsBackToCastTimestampToBigint() {
+    String plan = explain("SELECT ts_timestamp FROM a WHERE ts_timestamp > 
col7");
+    assertTrue(plan.contains(">(CAST(" + TS_TIMESTAMP_ORD + "):BIGINT"),
+        "Expected fallback to cast TIMESTAMP column to BIGINT when both 
operands are columns. Got:\n" + plan);
+  }
+
+  /**
+   * TIMESTAMP-column vs TIMESTAMP-function shouldn't trigger any CAST — both 
sides are already TIMESTAMP, so the
+   * coercion rule should not fire and Calcite should compare directly.
+   */
+  @Test
+  public void testTimestampColumnVsTimestampFunctionHasNoCast() {
+    String plan = explain("SELECT ts_timestamp FROM a WHERE ts_timestamp > 
NOW()");
+    assertFalse(plan.contains("CAST(" + TS_TIMESTAMP_ORD + ")"),
+        "TIMESTAMP column should not be wrapped in CAST when compared to a 
TIMESTAMP function. Got:\n" + plan);
+    assertEquals(plan.lines().filter(line -> line.contains("CAST(")).count(), 
0L,
+        "No CAST should appear in the plan. Got:\n" + plan);
+  }
+
+  /**
+   * Regression for the {@code ago()}-style use case: {@code __time > 
ago('PT5M')} should keep the column unwrapped.
+   * {@code ago(String)} is a scalar function that returns {@code long} 
(rendered as BIGINT in SQL), so the comparison
+   * is TIMESTAMP-vs-BIGINT and the new rule applies. Before this rule, the 
column was wrapped in {@code CAST(.. AS
+   * BIGINT)} per row, which made the query significantly slower than the 
workaround {@code __time > concat(ago(...),
+   * '')} that happened to push the cast onto the literal side via the VARCHAR 
coercion path.
+   */
+  @Test
+  public void testTimestampColumnVsAgoFunctionKeepsColumnUnwrapped() {
+    String plan = explain("SELECT ts_timestamp FROM a WHERE ts_timestamp > 
ago('PT5M')");
+    assertFalse(plan.contains("CAST(" + TS_TIMESTAMP_ORD + ")"),
+        "TIMESTAMP column should not be wrapped in CAST when compared to 
ago(...). Got:\n" + plan);
+    assertTrue(plan.matches("(?s).*>\\(\\" + TS_TIMESTAMP_ORD + ", 
\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\).*"),
+        "Right-hand side should be constant-folded to a TIMESTAMP literal. 
Got:\n" + plan);
+  }
+
+  /**
+   * Binary-arithmetic coercion (TIMESTAMP +/- BIGINT) is unchanged: the 
result must be BIGINT (long arithmetic), so the
+   * TIMESTAMP side is always cast to BIGINT regardless of which side is a 
column.
+   */
+  @Test
+  public void testBinaryArithmeticStillCastsTimestampToBigint() {
+    String plan = explain("SELECT ts_timestamp FROM a WHERE ts_timestamp + 
1000 > 0");
+    // Column ts_timestamp is wrapped in CAST(..):BIGINT for the arithmetic, 
then the result is compared to the BIGINT
+    // literal. We do not change arithmetic coercion here.
+    assertTrue(plan.contains("CAST(" + TS_TIMESTAMP_ORD + "):BIGINT"),
+        "TIMESTAMP column must be cast to BIGINT for binary arithmetic. 
Got:\n" + plan);
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to