Copilot commented on code in PR #17659: URL: https://github.com/apache/pinot/pull/17659#discussion_r2893395869
########## pinot-core/src/main/java/org/apache/pinot/core/function/scalar/FilterMvScalarFunction.java: ########## @@ -0,0 +1,278 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.function.scalar; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import java.util.EnumMap; +import java.util.Map; +import java.util.Set; +import javax.annotation.Nullable; +import org.apache.pinot.common.function.FunctionInfo; +import org.apache.pinot.common.function.PinotScalarFunction; +import org.apache.pinot.common.function.sql.PinotSqlFunction; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.operator.transform.function.FilterMvPredicateEvaluator; +import org.apache.pinot.spi.annotations.ScalarFunction; +import org.apache.pinot.spi.data.FieldSpec.DataType; + + +/** + * Scalar wrapper for filterMv so FunctionRegistry can expose type signatures for query planning and execution paths + * that resolve scalar functions. + */ +@ScalarFunction(names = {"filterMv"}) +public class FilterMvScalarFunction implements PinotScalarFunction { + private static final int MAX_CACHED_EVALUATORS = 10_000; + private static final Map<ColumnDataType, FunctionInfo> TYPE_FUNCTION_INFO_MAP = + new EnumMap<>(ColumnDataType.class); + private static final Cache<CacheKey, FilterMvPredicateEvaluator> EVALUATOR_CACHE = + CacheBuilder.newBuilder().maximumSize(MAX_CACHED_EVALUATORS).build(); + + static { + try { + TYPE_FUNCTION_INFO_MAP.put(ColumnDataType.INT_ARRAY, + new FunctionInfo(FilterMvScalarFunction.class.getMethod("filterMv", int[].class, String.class), + FilterMvScalarFunction.class, false)); + TYPE_FUNCTION_INFO_MAP.put(ColumnDataType.LONG_ARRAY, + new FunctionInfo(FilterMvScalarFunction.class.getMethod("filterMv", long[].class, String.class), + FilterMvScalarFunction.class, false)); + TYPE_FUNCTION_INFO_MAP.put(ColumnDataType.FLOAT_ARRAY, + new FunctionInfo(FilterMvScalarFunction.class.getMethod("filterMv", float[].class, String.class), + FilterMvScalarFunction.class, false)); + TYPE_FUNCTION_INFO_MAP.put(ColumnDataType.DOUBLE_ARRAY, + new FunctionInfo(FilterMvScalarFunction.class.getMethod("filterMv", double[].class, String.class), + FilterMvScalarFunction.class, false)); + TYPE_FUNCTION_INFO_MAP.put(ColumnDataType.STRING_ARRAY, + new FunctionInfo(FilterMvScalarFunction.class.getMethod("filterMv", String[].class, String.class), + FilterMvScalarFunction.class, false)); + TYPE_FUNCTION_INFO_MAP.put(ColumnDataType.BYTES_ARRAY, + new FunctionInfo(FilterMvScalarFunction.class.getMethod("filterMv", byte[][].class, String.class), + FilterMvScalarFunction.class, false)); + } catch (NoSuchMethodException e) { + throw new RuntimeException(e); + } + } + + @Override + public String getName() { + return "filterMv"; + } + + @Override + public Set<String> getNames() { + return Set.of("filterMv"); + } + + @Nullable + @Override + public PinotSqlFunction toPinotSqlFunction() { + // Should already be registered in PinotOperatorTable by the transform function implementation + return null; + } + + @Nullable + @Override + public FunctionInfo getFunctionInfo(ColumnDataType[] argumentTypes) { + if (argumentTypes.length != 2) { + return null; + } + if (argumentTypes[1] != ColumnDataType.STRING) { + return null; + } + return TYPE_FUNCTION_INFO_MAP.get(argumentTypes[0].getStoredType()); + } Review Comment: `TYPE_FUNCTION_INFO_MAP.get(argumentTypes[0].getStoredType())` collapses logical aliases like `BOOLEAN_ARRAY`→`INT_ARRAY` and `TIMESTAMP_ARRAY`→`LONG_ARRAY`. In multistage execution (which runs scalar functions), this can route boolean/timestamp arrays to the INT/LONG implementations, causing predicates like `v = true` or timestamp literals to fail or behave incorrectly. Consider mapping on the logical `ColumnDataType` (explicit entries for `BOOLEAN_ARRAY`/`TIMESTAMP_ARRAY`) and using `DataType.BOOLEAN` / `DataType.TIMESTAMP` when building the `FilterMvPredicateEvaluator` while still consuming the stored array representation. ########## pinot-core/src/main/java/org/apache/pinot/core/function/scalar/FilterMvScalarFunction.java: ########## @@ -0,0 +1,278 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.function.scalar; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import java.util.EnumMap; +import java.util.Map; +import java.util.Set; +import javax.annotation.Nullable; +import org.apache.pinot.common.function.FunctionInfo; +import org.apache.pinot.common.function.PinotScalarFunction; +import org.apache.pinot.common.function.sql.PinotSqlFunction; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.operator.transform.function.FilterMvPredicateEvaluator; +import org.apache.pinot.spi.annotations.ScalarFunction; +import org.apache.pinot.spi.data.FieldSpec.DataType; + + +/** + * Scalar wrapper for filterMv so FunctionRegistry can expose type signatures for query planning and execution paths + * that resolve scalar functions. + */ +@ScalarFunction(names = {"filterMv"}) +public class FilterMvScalarFunction implements PinotScalarFunction { + private static final int MAX_CACHED_EVALUATORS = 10_000; + private static final Map<ColumnDataType, FunctionInfo> TYPE_FUNCTION_INFO_MAP = + new EnumMap<>(ColumnDataType.class); + private static final Cache<CacheKey, FilterMvPredicateEvaluator> EVALUATOR_CACHE = + CacheBuilder.newBuilder().maximumSize(MAX_CACHED_EVALUATORS).build(); + + static { + try { + TYPE_FUNCTION_INFO_MAP.put(ColumnDataType.INT_ARRAY, + new FunctionInfo(FilterMvScalarFunction.class.getMethod("filterMv", int[].class, String.class), + FilterMvScalarFunction.class, false)); + TYPE_FUNCTION_INFO_MAP.put(ColumnDataType.LONG_ARRAY, + new FunctionInfo(FilterMvScalarFunction.class.getMethod("filterMv", long[].class, String.class), + FilterMvScalarFunction.class, false)); + TYPE_FUNCTION_INFO_MAP.put(ColumnDataType.FLOAT_ARRAY, + new FunctionInfo(FilterMvScalarFunction.class.getMethod("filterMv", float[].class, String.class), + FilterMvScalarFunction.class, false)); + TYPE_FUNCTION_INFO_MAP.put(ColumnDataType.DOUBLE_ARRAY, + new FunctionInfo(FilterMvScalarFunction.class.getMethod("filterMv", double[].class, String.class), + FilterMvScalarFunction.class, false)); + TYPE_FUNCTION_INFO_MAP.put(ColumnDataType.STRING_ARRAY, + new FunctionInfo(FilterMvScalarFunction.class.getMethod("filterMv", String[].class, String.class), + FilterMvScalarFunction.class, false)); + TYPE_FUNCTION_INFO_MAP.put(ColumnDataType.BYTES_ARRAY, + new FunctionInfo(FilterMvScalarFunction.class.getMethod("filterMv", byte[][].class, String.class), + FilterMvScalarFunction.class, false)); + } catch (NoSuchMethodException e) { + throw new RuntimeException(e); + } + } + + @Override + public String getName() { + return "filterMv"; + } + + @Override + public Set<String> getNames() { + return Set.of("filterMv"); + } + + @Nullable + @Override + public PinotSqlFunction toPinotSqlFunction() { + // Should already be registered in PinotOperatorTable by the transform function implementation + return null; + } + + @Nullable + @Override + public FunctionInfo getFunctionInfo(ColumnDataType[] argumentTypes) { + if (argumentTypes.length != 2) { + return null; + } + if (argumentTypes[1] != ColumnDataType.STRING) { + return null; + } + return TYPE_FUNCTION_INFO_MAP.get(argumentTypes[0].getStoredType()); + } + + @Nullable + @Override + public FunctionInfo getFunctionInfo(int numArguments) { + if (numArguments != 2) { + return null; + } + // Fall back to string + return getFunctionInfo(new ColumnDataType[]{ColumnDataType.STRING_ARRAY, ColumnDataType.STRING}); + } + + public static int[] filterMv(int[] values, String predicate) { + FilterMvPredicateEvaluator evaluator = evaluatorFor(predicate, DataType.INT); + int numValues = values.length; Review Comment: The `filterMv(int[] ...)` overload always builds the evaluator with `DataType.INT`. If this overload is used to back `BOOLEAN_ARRAY` (stored as `INT_ARRAY`), predicates like `v = true/false` will be parsed with INT semantics and can fail at runtime. Add dedicated overload(s) for stored-alias arrays (e.g. boolean/timestamp) that pass the correct logical `DataType` into `evaluatorFor()`. ########## pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/FilterMvTransformFunctionTest.java: ########## @@ -0,0 +1,165 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.transform.function; + +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; +import java.util.function.IntPredicate; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.RequestContextUtils; +import org.apache.pinot.core.operator.transform.TransformResultMetadata; +import org.apache.pinot.segment.spi.index.reader.Dictionary; +import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.apache.pinot.spi.exception.BadQueryRequestException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + + +public class FilterMvTransformFunctionTest extends BaseTransformFunctionTest { + + @Test + public void testFilterMvTransformFunctionInt() { + assertFilterMvTransformFunctionInt("v > 5", value -> value > 5); + } + Review Comment: Test coverage here exercises INT (dictionary-encoded) and STRING cases, but the new implementation also has dedicated paths for raw MV values and `BYTES` (`transformToBytesValuesMV`) plus stored-alias array types (BOOLEAN/TIMESTAMP). Adding unit tests for at least one no-dictionary MV column and a BYTES MV column would help catch regressions in those new code paths, especially for multistage execution which relies on scalar function evaluation. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
