Jackie-Jiang commented on code in PR #13146: URL: https://github.com/apache/pinot/pull/13146#discussion_r1609052651
########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/array/ListAggDistinctFunction.java: ########## @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function.array; + +import it.unimi.dsi.fastutil.objects.AbstractObjectCollection; +import it.unimi.dsi.fastutil.objects.ObjectLinkedOpenHashSet; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.core.query.aggregation.AggregationResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; + + +/** + * The {@code ListAggDistinctFunction} extends the {@link ListAggFunction} to use {@link ObjectLinkedOpenHashSet} as + * the intermediate result to hold distinct values for aggregation. + */ +public class ListAggDistinctFunction extends ListAggFunction { + + public ListAggDistinctFunction(ExpressionContext expression, String separator, boolean nullHandlingEnabled) { + super(expression, separator, nullHandlingEnabled); + } + + @Override + protected AbstractObjectCollection<String> getObjectCollection(AggregationResultHolder aggregationResultHolder) { + ObjectLinkedOpenHashSet<String> valueSet = aggregationResultHolder.getResult(); Review Comment: Currently `ObjectSerDeUtils` will ser/de this the same way as `ObjectOpenHashSet`, and lose the ordering. We need to separate them in `ObjectSerDeUtils`. It would be good if we can make a test that fails when the intermediate result is serialized as `ObjectOpenHashSet` ########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java: ########## @@ -245,6 +247,24 @@ public static AggregationFunction getAggregationFunction(FunctionContext functio throw new IllegalArgumentException("Unsupported data type for FIRST_WITH_TIME: " + dataType); } } + case LISTAGG: + Preconditions.checkArgument(numArguments == 2 || numArguments == 3, + "LISTAGG expects 2 arguments, got: %s. The function can be used as " + + "listAgg(['distinct'] expression, 'separator')", numArguments); Review Comment: ```suggestion + "listAgg([distinct] expression, 'separator')", numArguments); ``` ########## pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/AggregateOperator.java: ########## @@ -296,11 +299,25 @@ private TransferableBlock consumeAggregation() { arguments.add(PLACEHOLDER_IDENTIFIER); } } + handleListAggDistinctArg(functionName, functionCall, arguments); return AggregationFunctionFactory.getAggregationFunction( new FunctionContext(FunctionContext.Type.AGGREGATION, functionName, arguments), true); } } + private static void handleListAggDistinctArg(String functionName, RexExpression.FunctionCall functionCall, + List<ExpressionContext> arguments) { + String upperCaseFunctionName = + AggregationFunctionType.getNormalizedAggregationFunctionName(functionName); + if (upperCaseFunctionName.equals("LISTAGG")) { + if (functionCall.isDistinct()) { + arguments.add(ExpressionContext.forLiteralContext(Literal.boolValue(true))); + } else { + arguments.add(ExpressionContext.forLiteralContext(Literal.boolValue(false))); Review Comment: (minor) Do we need to add a third argument? ########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java: ########## @@ -254,15 +274,15 @@ public static AggregationFunction getAggregationFunction(FunctionContext functio "ARRAY_AGG expects the 2nd argument to be literal, got: %s. The function can be used as " + "arrayAgg(dataColumn, 'dataType', ['isDistinct'])", dataTypeExp.getType()); DataType dataType = DataType.valueOf(dataTypeExp.getLiteral().getStringValue().toUpperCase()); - boolean isDistinct = false; + boolean isDistinctArrayAgg = false; Review Comment: (minor) You may revert this change if you put braces around the `LISTAGG` case. I feel it is easier to read to keep these variables local to the case ########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java: ########## @@ -245,6 +247,24 @@ public static AggregationFunction getAggregationFunction(FunctionContext functio throw new IllegalArgumentException("Unsupported data type for FIRST_WITH_TIME: " + dataType); } } + case LISTAGG: + Preconditions.checkArgument(numArguments == 2 || numArguments == 3, + "LISTAGG expects 2 arguments, got: %s. The function can be used as " + + "listAgg(['distinct'] expression, 'separator')", numArguments); + ExpressionContext separatorExpression = arguments.get(1); + Preconditions.checkArgument(separatorExpression.getType() == ExpressionContext.Type.LITERAL, + "LISTAGG expects the 2nd argument to be literal, got: %s. The function can be used as " + + "listAgg(['distinct'] expression, 'separator')", separatorExpression.getType()); Review Comment: ```suggestion + "listAgg([distinct] expression, 'separator')", separatorExpression.getType()); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
