Jackie-Jiang commented on code in PR #13573: URL: https://github.com/apache/pinot/pull/13573#discussion_r1674529450
########## pinot-common/src/main/java/org/apache/pinot/common/function/FunctionRegistry.java: ########## @@ -95,108 +153,162 @@ public static void init() { } /** - * Registers a method with the name of the method. + * Registers a {@link PinotScalarFunction} under the given canonical name. */ - public static void registerFunction(Method method, boolean nullableParameters, boolean isPlaceholder, - boolean isVarArg) { - registerFunction(method.getName(), method, nullableParameters, isPlaceholder, isVarArg); + private static void register(String canonicalName, PinotScalarFunction function, + Map<String, PinotScalarFunction> functionMap) { + Preconditions.checkState(functionMap.put(canonicalName, function) == null, "Function: %s is already registered", + canonicalName); } /** - * Registers a method with the given function name. + * Registers a {@link FunctionInfo} under the given canonical name. */ - public static void registerFunction(String functionName, Method method, boolean nullableParameters, - boolean isPlaceholder, boolean isVarArg) { - if (!isPlaceholder) { - registerFunctionInfoMap(functionName, method, nullableParameters, isVarArg); - } - registerCalciteNamedFunctionMap(functionName, method, nullableParameters, isVarArg); - } - - private static void registerFunctionInfoMap(String functionName, Method method, boolean nullableParameters, - boolean isVarArg) { - FunctionInfo functionInfo = new FunctionInfo(method, method.getDeclaringClass(), nullableParameters); - String canonicalName = canonicalize(functionName); - Map<Integer, FunctionInfo> functionInfoMap = FUNCTION_INFO_MAP.computeIfAbsent(canonicalName, k -> new HashMap<>()); - if (isVarArg) { - FunctionInfo existFunctionInfo = functionInfoMap.put(VAR_ARG_KEY, functionInfo); - Preconditions.checkState(existFunctionInfo == null || existFunctionInfo.getMethod() == functionInfo.getMethod(), - "Function: %s with variable number of parameters is already registered", functionName); - } else { - FunctionInfo existFunctionInfo = functionInfoMap.put(method.getParameterCount(), functionInfo); - Preconditions.checkState(existFunctionInfo == null || existFunctionInfo.getMethod() == functionInfo.getMethod(), - "Function: %s with %s parameters is already registered", functionName, method.getParameterCount()); - } - } - - private static void registerCalciteNamedFunctionMap(String functionName, Method method, boolean nullableParameters, - boolean isVarArg) { - if (method.getAnnotation(Deprecated.class) == null) { - FUNCTION_MAP.put(functionName, ScalarFunctionImpl.create(method)); - } + private static void register(String canonicalName, FunctionInfo functionInfo, int numArguments, + Map<String, Map<Integer, FunctionInfo>> functionInfoMap) { + Preconditions.checkState( + functionInfoMap.computeIfAbsent(canonicalName, k -> new HashMap<>()).put(numArguments, functionInfo) == null, + "Function: %s with %s arguments is already registered", canonicalName, + numArguments == VAR_ARG_KEY ? "variable" : numArguments); } - public static Map<String, List<Function>> getRegisteredCalciteFunctionMap() { - return FUNCTION_MAP.map(); + /** + * Returns {@code true} if the given canonical name is registered, {@code false} otherwise. + */ + public static boolean contains(String canonicalName) { + return FUNCTION_MAP.containsKey(canonicalName); } - public static Set<String> getRegisteredCalciteFunctionNames() { - return FUNCTION_MAP.map().keySet(); + /** + * @deprecated For performance concern, use {@link #contains(String)} instead to avoid invoking + * {@link #canonicalize(String)} multiple times. + */ + @Deprecated + public static boolean containsFunction(String name) { + return contains(canonicalize(name)); } /** - * Returns {@code true} if the given function name is registered, {@code false} otherwise. + * Returns the {@link FunctionInfo} associated with the given canonical name and argument types, or {@code null} if + * there is no matching method. This method should be called after the FunctionRegistry is initialized and all methods + * are already registered. */ - public static boolean containsFunction(String functionName) { - return FUNCTION_INFO_MAP.containsKey(canonicalize(functionName)); + @Nullable + public static FunctionInfo lookupFunctionInfo(String canonicalName, ColumnDataType[] argumentTypes) { + PinotScalarFunction function = FUNCTION_MAP.get(canonicalName); + return function != null ? function.getFunctionInfo(argumentTypes) : null; } /** - * Returns the {@link FunctionInfo} associated with the given function name and number of parameters, or {@code null} + * Returns the {@link FunctionInfo} associated with the given canonical name and number of arguments, or {@code null} * if there is no matching method. This method should be called after the FunctionRegistry is initialized and all * methods are already registered. + * TODO: Move all usages to {@link #lookupFunctionInfo(String, ColumnDataType[])}. */ @Nullable - public static FunctionInfo getFunctionInfo(String functionName, int numParameters) { - Map<Integer, FunctionInfo> functionInfoMap = FUNCTION_INFO_MAP.get(canonicalize(functionName)); - if (functionInfoMap != null) { - FunctionInfo functionInfo = functionInfoMap.get(numParameters); - if (functionInfo != null) { - return functionInfo; - } - return functionInfoMap.get(VAR_ARG_KEY); - } - return null; - } - - private static String canonicalize(String functionName) { - return StringUtils.remove(functionName, '_').toLowerCase(); + public static FunctionInfo lookupFunctionInfo(String canonicalName, int numArguments) { + PinotScalarFunction function = FUNCTION_MAP.get(canonicalName); + return function != null ? function.getFunctionInfo(numArguments) : null; } /** - * Placeholders for scalar function, they register and represents the signature for transform and filter predicate - * so that v2 engine can understand and plan them correctly. + * @deprecated For performance concern, use {@link #lookupFunctionInfo(String, int)} instead to avoid invoking + * {@link #canonicalize(String)} multiple times. */ - private static class PlaceholderScalarFunctions { + @Deprecated + @Nullable + public static FunctionInfo getFunctionInfo(String name, int numArguments) { + return lookupFunctionInfo(canonicalize(name), numArguments); + } + + public static String canonicalize(String name) { + return StringUtils.remove(name, '_').toLowerCase(); + } + + public static class ArgumentCountBasedScalarFunction implements PinotScalarFunction { + private final String _name; + private final Map<Integer, FunctionInfo> _functionInfoMap; - @ScalarFunction(names = {"textContains", "text_contains"}, isPlaceholder = true) - public static boolean textContains(String text, String pattern) { - throw new UnsupportedOperationException("Placeholder scalar function, should not reach here"); + private ArgumentCountBasedScalarFunction(String name, Map<Integer, FunctionInfo> functionInfoMap) { + _name = name; + _functionInfoMap = functionInfoMap; } - @ScalarFunction(names = {"textMatch", "text_match"}, isPlaceholder = true) - public static boolean textMatch(String text, String pattern) { - throw new UnsupportedOperationException("Placeholder scalar function, should not reach here"); + @Override + public String getName() { + return _name; + } + + @Override + public PinotSqlFunction toPinotSqlFunction() { + return new PinotSqlFunction(_name, getReturnTypeInference(), getOperandTypeChecker()); + } + + private SqlReturnTypeInference getReturnTypeInference() { + return opBinding -> { + int numArguments = opBinding.getOperandCount(); + FunctionInfo functionInfo = getFunctionInfo(numArguments); + Preconditions.checkState(functionInfo != null, "Failed to find function: %s with %s arguments", _name, + numArguments); + RelDataTypeFactory typeFactory = opBinding.getTypeFactory(); + Method method = functionInfo.getMethod(); + RelDataType returnType = FunctionUtils.getRelDataType(opBinding.getTypeFactory(), method.getReturnType()); + + if (!functionInfo.hasNullableParameters()) { + // When any parameter is null, return is null + for (RelDataType type : opBinding.collectOperandTypes()) { + if (type.isNullable()) { + return typeFactory.createTypeWithNullability(returnType, true); + } + } + } + + return method.isAnnotationPresent(Nullable.class) ? typeFactory.createTypeWithNullability(returnType, true) + : returnType; + }; + } + + private SqlOperandTypeChecker getOperandTypeChecker() { + if (_functionInfoMap.containsKey(VAR_ARG_KEY)) { + return OperandTypes.VARIADIC; + } + int numCheckers = _functionInfoMap.size(); + if (numCheckers == 1) { + return getOperandTypeChecker(_functionInfoMap.values().iterator().next().getMethod()); + } + SqlOperandTypeChecker[] operandTypeCheckers = new SqlOperandTypeChecker[numCheckers]; + int index = 0; + for (FunctionInfo functionInfo : _functionInfoMap.values()) { + operandTypeCheckers[index++] = getOperandTypeChecker(functionInfo.getMethod()); + } + return OperandTypes.or(operandTypeCheckers); + } + + private static SqlOperandTypeChecker getOperandTypeChecker(Method method) { + Class<?>[] parameterTypes = method.getParameterTypes(); + int length = parameterTypes.length; + SqlTypeFamily[] typeFamilies = new SqlTypeFamily[length]; + for (int i = 0; i < length; i++) { + typeFamilies[i] = getSqlTypeFamily(parameterTypes[i]); + } + return OperandTypes.family(typeFamilies); } - @ScalarFunction(names = {"jsonMatch", "json_match"}, isPlaceholder = true) - public static boolean jsonMatch(String text, String pattern) { - throw new UnsupportedOperationException("Placeholder scalar function, should not reach here"); + private static SqlTypeFamily getSqlTypeFamily(Class<?> clazz) { + // NOTE: Pinot allows some non-standard type conversions such as Timestamp <-> long, boolean <-> int etc. Do not + // restrict the type family for now. We only restrict the type family for String so that cast can be added. + // Explicit cast is required to correctly convert boolean and Timestamp to String. Without explicit case, + // BOOLEAN and TIMESTAMP type will be converted with their internal stored format which is INT and LONG + // respectively. E.g. true will be converted to "1", timestamp will be converted to long value string. + // TODO: Revisit this. Review Comment: I didn't dig into this part as Pinot can handle implicit cast even without `cast` except for BOOLEAN/TIMESTAMP to STRING. We should revisit this. Currently using boolean/timestamp as string will have different behavior in V1 and V2 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org