This is an automated email from the ASF dual-hosted git repository.
gortiz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new a04a748a738 Fix QueryFingerprintVisitor mutating the input SqlNode
(#18603)
a04a748a738 is described below
commit a04a748a738be54bd33c5200f32830532bc822b0
Author: Yash Mayya <[email protected]>
AuthorDate: Thu May 28 04:26:45 2026 -0700
Fix QueryFingerprintVisitor mutating the input SqlNode (#18603)
---
.../utils/request/QueryFingerprintVisitor.java | 157 +++++++++++----------
.../utils/request/QueryFingerprintUtilsTest.java | 92 ++++++++++++
2 files changed, 175 insertions(+), 74 deletions(-)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/utils/request/QueryFingerprintVisitor.java
b/pinot-common/src/main/java/org/apache/pinot/common/utils/request/QueryFingerprintVisitor.java
index 607829dc868..807d1adead9 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/utils/request/QueryFingerprintVisitor.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/utils/request/QueryFingerprintVisitor.java
@@ -53,6 +53,10 @@ import org.apache.calcite.sql.util.SqlShuttle;
*
* <p><b>Note:</b> This visitor maintains internal state (dynamic parameter
index) that is not reset between visits.
* A new instance should be created for each query fingerprint.</p>
+ *
+ * <p>The visitor honors {@link SqlShuttle}'s non-mutating contract: the input
{@link SqlNode} tree is left
+ * unchanged and a new tree is returned. This matters because callers (the
broker) compile the same SqlNode
+ * to a PinotQuery after computing the fingerprint.</p>
*/
public class QueryFingerprintVisitor extends SqlShuttle {
// SqlSelect operand index for hints, see {@link
org.apache.calcite.sql.SqlSelect}.
@@ -117,12 +121,7 @@ public class QueryFingerprintVisitor extends SqlShuttle {
result = visitOrderBy((SqlOrderBy) call);
break;
case OVER:
- // Window functions: only visit the aggregate function (operand 0).
- // Skip the window specification (operand 1) due to its complex
structure
- // with ORDER BY and frame clauses. This means literals in PARTITION
BY,
- // ORDER BY, and window frames are preserved rather than replaced.
- call.setOperand(0, call.getOperandList().get(0).accept(this));
- result = call;
+ result = visitOver(call);
break;
case IN:
case NOT_IN:
@@ -134,98 +133,114 @@ public class QueryFingerprintVisitor extends SqlShuttle {
return result;
}
+ /**
+ * Rebuilds {@code original} with the supplied operands, mirroring the
pattern used by Calcite's
+ * {@link SqlShuttle.CallCopyingArgHandler#result()}. This is what allows
the visitor to honor the
+ * non-mutating {@link SqlShuttle} contract.
+ */
+ private static SqlNode rebuild(SqlCall original, SqlNode... newOperands) {
+ return original.getOperator().createCall(
+ original.getFunctionQuantifier(), original.getParserPosition(),
newOperands);
+ }
+
@Nullable
private SqlNode visitCase(SqlCase sqlCase) {
- List<SqlNode> newOperands = new ArrayList<>();
- for (SqlNode child : sqlCase.getOperandList()) {
- if (child == null) {
- newOperands.add(null);
- continue;
- }
- newOperands.add(child.accept(this));
- }
- int i = 0;
- for (SqlNode operand : newOperands) {
- sqlCase.setOperand(i++, operand);
+ List<SqlNode> operands = sqlCase.getOperandList();
+ SqlNode[] newOperands = new SqlNode[operands.size()];
+ for (int i = 0; i < operands.size(); i++) {
+ SqlNode child = operands.get(i);
+ newOperands[i] = child != null ? child.accept(this) : null;
}
- return sqlCase;
+ return rebuild(sqlCase, newOperands);
}
@Nullable
private SqlNode visitSelect(SqlSelect select) {
- List<SqlNode> newOperands = new ArrayList<>();
- for (SqlNode child : select.getOperandList()) {
- newOperands.add(child != null ? child.accept(this) : null);
- }
- int i = 0;
- for (SqlNode operand : newOperands) {
- // Preserve hints.
+ List<SqlNode> operands = select.getOperandList();
+ SqlNode[] newOperands = new SqlNode[operands.size()];
+ for (int i = 0; i < operands.size(); i++) {
+ SqlNode child = operands.get(i);
+ // Preserve hints (their literals are structural metadata, not data).
if (i == SQLSELECT_HINTS_OPERAND_INDEX) {
- break;
+ newOperands[i] = child;
+ continue;
}
- select.setOperand(i++, operand);
+ newOperands[i] = child != null ? child.accept(this) : null;
}
- return select;
+ return rebuild(select, newOperands);
}
@Nullable
private SqlNode visitJoin(SqlJoin join) {
- List<SqlNode> newOperands = new ArrayList<>();
- for (SqlNode child : join.getOperandList()) {
- newOperands.add(child != null ? child.accept(this) : null);
- }
- int i = 0;
- for (SqlNode operand : newOperands) {
+ List<SqlNode> operands = join.getOperandList();
+ SqlNode[] newOperands = new SqlNode[operands.size()];
+ for (int i = 0; i < operands.size(); i++) {
+ SqlNode child = operands.get(i);
// Preserve join metadata literals:
- // natural (true/false), joinType (INNER/LEFT/RIGHT/FULL) and
conditionType (ON/USING)
+ // natural (true/false), joinType (INNER/LEFT/RIGHT/FULL) and
conditionType (ON/USING).
// These are structural keywords, not data literals.
if (i == SQLJOIN_NATURAL_OPERAND_INDEX
|| i == SQLJOIN_JOIN_TYPE_OPERAND_INDEX
|| i == SQLJOIN_CONDITION_TYPE_OPERAND_INDEX) {
- i++;
+ newOperands[i] = child;
continue;
}
- join.setOperand(i++, operand);
+ newOperands[i] = child != null ? child.accept(this) : null;
}
- return join;
+ return rebuild(join, newOperands);
}
@Nullable
private SqlNode visitWith(SqlWith with) {
List<SqlNode> newList = new ArrayList<>();
for (SqlNode node : with.withList.getList()) {
- newList.add(node.accept(this));
+ newList.add(node != null ? node.accept(this) : null);
}
SqlNode newBody = with.body.accept(this);
- with.setOperand(0, new SqlNodeList(newList,
with.withList.getParserPosition()));
- with.setOperand(1, newBody);
- return with;
+ return rebuild(with, new SqlNodeList(newList,
with.withList.getParserPosition()), newBody);
}
/**
- * SqlWithItem has four fields: SqlIdentifier name, SqlNodeList
- * columnList, SqlNode query, and SqlLiteral recursive.
- * We will visit only the columnList and query since:
- * - name has already been visited in the SqlWith visit method.
- * - recursive is a literal which is a property of the WITH item and not the
query itself.
+ * SqlWithItem always has four operands: name, columnList, query, recursive.
We only rewrite
+ * columnList and query; name and recursive are preserved as-is.
*/
@Nullable
private SqlNode visitWithItem(SqlWithItem withItem) {
- if (withItem.columnList != null) {
- for (int i = 0; i < withItem.columnList.size(); i++) {
- SqlNode column = withItem.columnList.get(i);
- if (column != null) {
- withItem.columnList.set(i, column.accept(this));
- }
+ List<SqlNode> operands = withItem.getOperandList();
+ SqlNode[] newOperands = new SqlNode[operands.size()];
+ // operand 0: name — preserved as-is
+ newOperands[0] = operands.get(0);
+ // operand 1: columnList
+ SqlNode columnListOperand = operands.get(1);
+ if (columnListOperand instanceof SqlNodeList) {
+ SqlNodeList columnList = (SqlNodeList) columnListOperand;
+ List<SqlNode> newColumns = new ArrayList<>(columnList.size());
+ for (SqlNode column : columnList.getList()) {
+ newColumns.add(column != null ? column.accept(this) : null);
}
+ newOperands[1] = new SqlNodeList(newColumns,
columnList.getParserPosition());
+ } else {
+ newOperands[1] = columnListOperand;
}
+ // operand 2: query
+ SqlNode queryOperand = operands.get(2);
+ newOperands[2] = queryOperand != null ? queryOperand.accept(this) : null;
+ // operand 3: recursive — preserved as-is
+ newOperands[3] = operands.get(3);
+ return rebuild(withItem, newOperands);
+ }
- if (withItem.query != null) {
- SqlNode newQuery = withItem.query.accept(this);
- withItem.query = newQuery;
- }
-
- return withItem;
+ /**
+ * Window functions: only visit the aggregate function (operand 0). Skip the
window specification
+ * (operand 1) due to its complex structure with ORDER BY and frame clauses.
Literals in PARTITION BY,
+ * ORDER BY, and window frames are preserved rather than replaced.
+ */
+ @Nullable
+ private SqlNode visitOver(SqlCall call) {
+ List<SqlNode> operands = call.getOperandList();
+ SqlNode aggFunc = operands.get(0);
+ SqlNode newAggFunc = aggFunc != null ? aggFunc.accept(this) : null;
+ return rebuild(call, newAggFunc, operands.get(1));
}
@Nullable
@@ -300,31 +315,25 @@ public class QueryFingerprintVisitor extends SqlShuttle {
if (allDataLiterals && valueList.size() > 0) {
SqlDynamicParam singleParam = new
SqlDynamicParam(_dynamicParamIndex++, inCall.getParserPosition());
SqlNodeList newValueList = new SqlNodeList(List.of(singleParam),
valueList.getParserPosition());
- inCall.setOperand(0, leftOperand);
- inCall.setOperand(1, newValueList);
- return inCall;
+ return rebuild(inCall, leftOperand, newValueList);
}
// Otherwise, visit each value in the list normally
List<SqlNode> newValues = new ArrayList<>();
for (SqlNode value : valueList.getList()) {
- newValues.add(value.accept(this));
+ newValues.add(value != null ? value.accept(this) : null);
}
- inCall.setOperand(0, leftOperand);
- inCall.setOperand(1, new SqlNodeList(newValues,
valueList.getParserPosition()));
- return inCall;
+ return rebuild(inCall, leftOperand, new SqlNodeList(newValues,
valueList.getParserPosition()));
}
// Fallback: for subqueries or other non-SqlNodeList cases, visit all
operands normally
- List<SqlNode> newOperands = new ArrayList<>();
- for (SqlNode operand : operands) {
- newOperands.add(operand.accept(this));
- }
- int i = 0;
- for (SqlNode operand : newOperands) {
- inCall.setOperand(i++, operand);
+ SqlNode[] newOperands = new SqlNode[operands.size()];
+ newOperands[0] = leftOperand;
+ for (int i = 1; i < operands.size(); i++) {
+ SqlNode op = operands.get(i);
+ newOperands[i] = op != null ? op.accept(this) : null;
}
- return inCall;
+ return rebuild(inCall, newOperands);
}
/**
diff --git
a/pinot-common/src/test/java/org/apache/pinot/common/utils/request/QueryFingerprintUtilsTest.java
b/pinot-common/src/test/java/org/apache/pinot/common/utils/request/QueryFingerprintUtilsTest.java
index 979bc8d9139..7f0203abad1 100644
---
a/pinot-common/src/test/java/org/apache/pinot/common/utils/request/QueryFingerprintUtilsTest.java
+++
b/pinot-common/src/test/java/org/apache/pinot/common/utils/request/QueryFingerprintUtilsTest.java
@@ -18,6 +18,9 @@
*/
package org.apache.pinot.common.utils.request;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.dialect.AnsiSqlDialect;
+import org.apache.pinot.common.request.PinotQuery;
import org.apache.pinot.spi.trace.QueryFingerprint;
import org.apache.pinot.sql.parsers.CalciteSqlParser;
import org.apache.pinot.sql.parsers.SqlNodeAndOptions;
@@ -282,6 +285,95 @@ public class QueryFingerprintUtilsTest {
"Fingerprint should not contain newlines");
}
+ /**
+ * Data provider for queries whose original SqlNode tree must not be mutated
by fingerprint generation.
+ * The broker compiles the same SqlNode to a PinotQuery after computing the
fingerprint, so any in-place
+ * mutation (e.g. literal -> SqlDynamicParam) breaks query execution
downstream.
+ */
+ @DataProvider(name = "noMutationQueries")
+ public Object[][] provideNoMutationQueries() {
+ return new Object[][]{
+ // simple SELECT with WHERE literal
+ {"SELECT col1 FROM table1 WHERE col2 = 100"},
+ // IN list
+ {"SELECT col1 FROM table1 WHERE col2 IN (1, 2, 3)"},
+ // NOT IN list
+ {"SELECT col1 FROM table1 WHERE col2 NOT IN (1, 2, 3)"},
+ // IN with subquery (exercises the visitIn fallback branch)
+ {"SELECT col1 FROM table1 WHERE col2 IN (SELECT col2 FROM table2 WHERE
col3 = 100)"},
+ // JOIN with literal in ON / WHERE
+ {"SELECT t1.col1, t2.col2 FROM table1 t1 JOIN table2 t2 ON t1.id =
t2.id WHERE t1.col3 > 100"},
+ // BETWEEN
+ {"SELECT col1 FROM table1 WHERE col2 BETWEEN 10 AND 100"},
+ // CASE expression
+ {"SELECT CASE WHEN col1 > 100 THEN 'high' ELSE 'low' END FROM table1"},
+ // CTE
+ {"WITH cte AS (SELECT col1 FROM table1 WHERE col2 = 100) SELECT * FROM
cte WHERE col1 > 50"},
+ // Window function with literal in aggregate
+ {"SELECT SUM(amount + 10) OVER (PARTITION BY category) FROM sales
WHERE date_col = '2024-01-01'"},
+ // ORDER BY + LIMIT (top-level SqlOrderBy wraps SqlSelect)
+ {"SELECT col1 FROM table1 WHERE col2 = 100 ORDER BY col1 LIMIT 5"},
+ };
+ }
+
+ /**
+ * Regression: {@link
QueryFingerprintUtils#generateFingerprint(SqlNodeAndOptions)} must not mutate
the
+ * input {@link SqlNodeAndOptions#getSqlNode()}. The broker passes the same
SqlNode to the compiler after
+ * computing the fingerprint, so any in-place replacement of literals with
dynamic params would poison
+ * the parse tree and break downstream query compilation.
+ */
+ @Test(dataProvider = "noMutationQueries")
+ public void testFingerprintDoesNotMutateInputSqlNode(String query) throws
Exception {
+ SqlNodeAndOptions sqlNodeAndOptions =
CalciteSqlParser.compileToSqlNodeAndOptions(query);
+ SqlNode originalSqlNode = sqlNodeAndOptions.getSqlNode();
+ String beforeFingerprint = originalSqlNode.toSqlString(c ->
c.withDialect(AnsiSqlDialect.DEFAULT)).getSql();
+
+ QueryFingerprint fingerprint =
QueryFingerprintUtils.generateFingerprint(sqlNodeAndOptions);
+ Assert.assertNotNull(fingerprint, "Fingerprint should be generated for
query: " + query);
+
+ // Same object reference — no replacement was expected, just no mutation.
+ Assert.assertSame(sqlNodeAndOptions.getSqlNode(), originalSqlNode,
+ "SqlNodeAndOptions.getSqlNode() reference should be unchanged");
+
+ String afterFingerprint = originalSqlNode.toSqlString(c ->
c.withDialect(AnsiSqlDialect.DEFAULT)).getSql();
+ Assert.assertEquals(afterFingerprint, beforeFingerprint,
+ "Original SqlNode must not be mutated by generateFingerprint. Query: "
+ query);
+ Assert.assertFalse(afterFingerprint.contains("?"),
+ "Original SqlNode should not contain dynamic params after
fingerprinting. Query: " + query);
+ }
+
+ /**
+ * Data provider for single-stage queries that the broker's SSE path must be
able to compile to a
+ * PinotQuery after fingerprint generation. JOIN/CTE/OVER are excluded here
because they are MSE-only
+ * constructs in {@link CalciteSqlParser#compileToPinotQuery}.
+ */
+ @DataProvider(name = "noMutationSseQueries")
+ public Object[][] provideNoMutationSseQueries() {
+ return new Object[][]{
+ {"SELECT col1 FROM table1 WHERE col2 = 100"},
+ {"SELECT col1 FROM table1 WHERE col2 IN (1, 2, 3)"},
+ {"SELECT col1 FROM table1 WHERE col2 NOT IN (1, 2, 3)"},
+ {"SELECT col1 FROM table1 WHERE col2 BETWEEN 10 AND 100"},
+ {"SELECT col1 FROM table1 WHERE col2 = 100 ORDER BY col1 LIMIT 5"},
+ {"SELECT col1, COUNT(*) FROM table1 WHERE col2 > 100 GROUP BY col1
HAVING COUNT(*) > 10"},
+ {"SELECT CASE WHEN col1 > 100 THEN 'high' ELSE 'low' END FROM table1"},
+ };
+ }
+
+ /**
+ * End-to-end regression for the SSE broker flow: generate a fingerprint,
then compile the same
+ * SqlNodeAndOptions to a PinotQuery. With the visitor mutating literals in
place this used to fail
+ * because compileToPinotQuery saw SqlDynamicParam nodes where literals were
expected.
+ */
+ @Test(dataProvider = "noMutationSseQueries")
+ public void testFingerprintThenSseCompileSucceeds(String query) throws
Exception {
+ SqlNodeAndOptions sqlNodeAndOptions =
CalciteSqlParser.compileToSqlNodeAndOptions(query);
+ QueryFingerprint fingerprint =
QueryFingerprintUtils.generateFingerprint(sqlNodeAndOptions);
+ Assert.assertNotNull(fingerprint, "Fingerprint should be generated for
query: " + query);
+ PinotQuery pinotQuery =
CalciteSqlParser.compileToPinotQuery(sqlNodeAndOptions);
+ Assert.assertNotNull(pinotQuery, "PinotQuery compilation should succeed
after fingerprint. Query: " + query);
+ }
+
@Test
public void testComplexMultiStageQuery() throws Exception {
// Test a complex multi-stage query with JOINs, subqueries, and
aggregations
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]