This is an automated email from the ASF dual-hosted git repository.
yashmayya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 1515f49c4c Fix semi-join / pipeline breaker on BYTES and BIG_DECIMAL
(#15531)
1515f49c4c is described below
commit 1515f49c4cc5c761e07bc7c4bf914173ae3e018e
Author: Yash Mayya <[email protected]>
AuthorDate: Mon Apr 14 15:12:05 2025 +0100
Fix semi-join / pipeline breaker on BYTES and BIG_DECIMAL (#15531)
---
.../plan/server/ServerPlanRequestUtils.java | 24 ++++++-
.../src/test/resources/queries/SemiJoins.json | 75 ++++++++++++++++++++++
2 files changed, 98 insertions(+), 1 deletion(-)
diff --git
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java
index 7e6f925e30..adcae1f30b 100644
---
a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java
+++
b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java
@@ -20,6 +20,7 @@ package org.apache.pinot.query.runtime.plan.server;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
+import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -54,6 +55,7 @@ import
org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
import org.apache.pinot.spi.config.table.TableType;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.query.QueryThreadContext;
+import org.apache.pinot.spi.utils.ByteArray;
import org.apache.pinot.spi.utils.CommonConstants;
import org.apache.pinot.spi.utils.builder.TableNameBuilder;
import org.apache.pinot.sql.FilterKind;
@@ -357,8 +359,28 @@ public class ServerPlanRequestUtils {
expressions.add(RequestUtils.getLiteralExpression(arrString[rowIdx]));
}
break;
+ case BIG_DECIMAL:
+ BigDecimal[] arrBigDecimal = new BigDecimal[numRows];
+ for (int rowIdx = 0; rowIdx < numRows; rowIdx++) {
+ arrBigDecimal[rowIdx] = (BigDecimal)
dataContainer.get(rowIdx)[colIdx];
+ }
+ Arrays.sort(arrBigDecimal);
+ for (int rowIdx = 0; rowIdx < numRows; rowIdx++) {
+
expressions.add(RequestUtils.getLiteralExpression(arrBigDecimal[rowIdx]));
+ }
+ break;
+ case BYTES:
+ ByteArray[] arrBytes = new ByteArray[numRows];
+ for (int rowIdx = 0; rowIdx < numRows; rowIdx++) {
+ arrBytes[rowIdx] = (ByteArray) dataContainer.get(rowIdx)[colIdx];
+ }
+ Arrays.sort(arrBytes);
+ for (int rowIdx = 0; rowIdx < numRows; rowIdx++) {
+
expressions.add(RequestUtils.getLiteralExpression(arrBytes[rowIdx].getBytes()));
+ }
+ break;
default:
- throw new IllegalStateException("Illegal SV data type for ID_SET
aggregation function: " + storedType);
+ throw new IllegalStateException("Illegal SV data type for IN filter: "
+ storedType);
}
return expressions;
}
diff --git a/pinot-query-runtime/src/test/resources/queries/SemiJoins.json
b/pinot-query-runtime/src/test/resources/queries/SemiJoins.json
new file mode 100644
index 0000000000..4dc851aca9
--- /dev/null
+++ b/pinot-query-runtime/src/test/resources/queries/SemiJoins.json
@@ -0,0 +1,75 @@
+{
+ "semi_join_queries": {
+ "tables": {
+ "tbl": {
+ "schema": [
+ {"name": "int_col", "type": "INT"},
+ {"name": "long_col", "type": "LONG"},
+ {"name": "float_col", "type": "FLOAT"},
+ {"name": "double_col", "type": "DOUBLE"},
+ {"name": "string_col", "type": "STRING"},
+ {"name": "big_decimal_col", "type": "BIG_DECIMAL"},
+ {"name": "bytes_col", "type": "BYTES"}
+ ],
+ "inputs": [
+ [1, 1, 1.0, 1.0, "a", 1.0, "AA"],
+ [2, 2, 2.0, 2.0, "b", 2.0, "BB"],
+ [3, 3, 3.0, 3.0, "c", 3.0, "CC"],
+ [4, 4, 4.0, 4.0, "d", 4.0, "DD"],
+ [5, 5, 5.0, 5.0, "e", 5.0, "EE"]
+ ]
+ }
+ },
+ "queries": [
+ {
+ "description": "Semi join using IN clause with INT",
+ "sql": "SELECT int_col FROM {tbl} WHERE int_col IN (SELECT int_col
FROM {tbl} WHERE string_col = 'c')",
+ "outputs": [
+ [3]
+ ]
+ },
+ {
+ "description": "Semi join using IN clause with LONG",
+ "sql": "SELECT long_col FROM {tbl} WHERE long_col IN (SELECT long_col
FROM {tbl} WHERE string_col = 'c')",
+ "outputs": [
+ [3]
+ ]
+ },
+ {
+ "description": "Semi join using IN clause with FLOAT",
+ "sql": "SELECT float_col FROM {tbl} WHERE float_col IN (SELECT
float_col FROM {tbl} WHERE string_col = 'c')",
+ "outputs": [
+ [3.0]
+ ]
+ },
+ {
+ "description": "Semi join using IN clause with DOUBLE",
+ "sql": "SELECT double_col FROM {tbl} WHERE double_col IN (SELECT
double_col FROM {tbl} WHERE string_col = 'c')",
+ "outputs": [
+ [3.0]
+ ]
+ },
+ {
+ "description": "Semi join using IN clause with STRING",
+ "sql": "SELECT string_col FROM {tbl} WHERE string_col IN (SELECT
string_col FROM {tbl} WHERE int_col = 3)",
+ "outputs": [
+ ["c"]
+ ]
+ },
+ {
+ "description": "Semi join using IN clause with BIG_DECIMAL",
+ "sql": "SELECT big_decimal_col FROM {tbl} WHERE big_decimal_col IN
(SELECT big_decimal_col FROM {tbl} WHERE string_col = 'c')",
+ "outputs": [
+ ["3.0"]
+ ]
+ },
+ {
+ "description": "Semi join using IN clause with BYTES",
+ "sql": "SELECT bytes_col FROM {tbl} WHERE bytes_col IN (SELECT
bytes_col FROM {tbl} WHERE string_col = 'c')",
+ "outputs": [
+ ["cc"]
+ ]
+ }
+ ]
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]