Jackie-Jiang commented on a change in pull request #7664:
URL: https://github.com/apache/pinot/pull/7664#discussion_r740447764
##########
File path:
pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
##########
@@ -1026,6 +1029,39 @@ private String getActualTableName(String tableName) {
return tableName;
}
+ private static Set<String> getSegmentPartitionedColumns(TableCache
tableCache, String tableName) {
+ final TableConfig offlineTableConfig =
+
tableCache.getTableConfig(TableNameBuilder.OFFLINE.tableNameWithType(tableName));
+ final TableConfig realtimeTableConfig =
+
tableCache.getTableConfig(TableNameBuilder.REALTIME.tableNameWithType(tableName));
+ if (offlineTableConfig == null) {
+ return getSegmentPartitionedColumns(realtimeTableConfig);
+ }
+ if (realtimeTableConfig == null) {
+ return getSegmentPartitionedColumns(offlineTableConfig);
+ }
+ Set<String> segmentPartitionedColumns =
getSegmentPartitionedColumns(offlineTableConfig);
+
segmentPartitionedColumns.retainAll(getSegmentPartitionedColumns(realtimeTableConfig));
+ return segmentPartitionedColumns;
+ }
+
+ private static Set<String> getSegmentPartitionedColumns(TableConfig
tableConfig) {
Review comment:
(minor)
```suggestion
private static Set<String> getSegmentPartitionedColumns(@Nullable
TableConfig tableConfig) {
```
##########
File path:
pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
##########
@@ -1026,6 +1029,39 @@ private String getActualTableName(String tableName) {
return tableName;
}
+ private static Set<String> getSegmentPartitionedColumns(TableCache
tableCache, String tableName) {
+ final TableConfig offlineTableConfig =
+
tableCache.getTableConfig(TableNameBuilder.OFFLINE.tableNameWithType(tableName));
+ final TableConfig realtimeTableConfig =
+
tableCache.getTableConfig(TableNameBuilder.REALTIME.tableNameWithType(tableName));
+ if (offlineTableConfig == null) {
+ return getSegmentPartitionedColumns(realtimeTableConfig);
+ }
+ if (realtimeTableConfig == null) {
+ return getSegmentPartitionedColumns(offlineTableConfig);
+ }
+ Set<String> segmentPartitionedColumns =
getSegmentPartitionedColumns(offlineTableConfig);
+
segmentPartitionedColumns.retainAll(getSegmentPartitionedColumns(realtimeTableConfig));
+ return segmentPartitionedColumns;
+ }
+
+ private static Set<String> getSegmentPartitionedColumns(TableConfig
tableConfig) {
+ Set<String> segmentPartitionedColumns = new HashSet<>();
+ if (tableConfig == null) {
+ return segmentPartitionedColumns;
+ }
+ List<FieldConfig> fieldConfigs = tableConfig.getFieldConfigList();
+ if (fieldConfigs != null) {
+ for (FieldConfig fieldConfig : fieldConfigs) {
+ if (fieldConfig.getProperties() != null && "true".equalsIgnoreCase(
+
fieldConfig.getProperties().getOrDefault(FieldConfig.IS_SEGMENT_PARTITIONED_COLUMN_KEY,
"false"))) {
Review comment:
(minor)
```suggestion
if (fieldConfig.getProperties() != null && Boolean.parseBoolean(
fieldConfig.getProperties().get(FieldConfig.IS_SEGMENT_PARTITIONED_COLUMN_KEY)))
{
```
##########
File path:
pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
##########
@@ -1155,6 +1217,58 @@ private static void
handleDistinctCountBitmapOverride(BrokerRequest brokerReques
}
}
+ /**
+ * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given
SQL query.
+ */
+ @VisibleForTesting
+ static void handleSegmentPartitionedDistinctCountOverride(PinotQuery
pinotQuery,
+ Set<String> segmentPartitionedColumns) {
+ if (segmentPartitionedColumns.isEmpty()) {
+ return;
+ }
+ for (Expression expression : pinotQuery.getSelectList()) {
+ handleSegmentPartitionedDistinctCountOverride(expression,
segmentPartitionedColumns);
+ }
+ List<Expression> orderByExpressions = pinotQuery.getOrderByList();
+ if (orderByExpressions != null) {
+ for (Expression expression : orderByExpressions) {
+ // NOTE: Order-by is always a Function with the ordering of the
Expression
+
handleSegmentPartitionedDistinctCountOverride(expression.getFunctionCall().getOperands().get(0),
+ segmentPartitionedColumns);
+ }
+ }
+ Expression havingExpression = pinotQuery.getHavingExpression();
+ if (havingExpression != null) {
+ handleSegmentPartitionedDistinctCountOverride(havingExpression,
segmentPartitionedColumns);
+ }
+ }
+
+ /**
+ * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given
SQL expression.
+ */
+ private static void handleSegmentPartitionedDistinctCountOverride(Expression
expression,
+ Set<String> segmentPartitionedColumns) {
+ Function function = expression.getFunctionCall();
+ if (function == null) {
+ return;
+ }
+ if (StringUtils.remove(function.getOperator(), '_')
+ .equalsIgnoreCase(AggregationFunctionType.DISTINCTCOUNT.name())) {
+ final Set<String> identifiers =
+
CalciteSqlParser.extractIdentifiers(expression.getFunctionCall().getOperands(),
true);
Review comment:
(major) This is incorrect. We should not extract all the identifiers,
but only check the single top-level identifier. There is no guarantee that the
udf is still segment partitioned
##########
File path:
pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
##########
@@ -1026,6 +1029,39 @@ private String getActualTableName(String tableName) {
return tableName;
}
+ private static Set<String> getSegmentPartitionedColumns(TableCache
tableCache, String tableName) {
Review comment:
Add some javadoc on why we should intersect the columns?
##########
File path:
pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
##########
@@ -1155,6 +1218,59 @@ private static void
handleDistinctCountBitmapOverride(BrokerRequest brokerReques
}
}
+ /**
+ * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given
SQL query.
+ */
+ @VisibleForTesting
+ static void handleSegmentPartitionedDistinctCountOverride(PinotQuery
pinotQuery,
+ Set<String> segmentPartitionedColumns) {
+ if (segmentPartitionedColumns.isEmpty()) {
+ return;
+ }
+ for (Expression expression : pinotQuery.getSelectList()) {
+ handleSegmentPartitionedDistinctCountOverride(expression,
segmentPartitionedColumns);
+ }
+ List<Expression> orderByExpressions = pinotQuery.getOrderByList();
+ if (orderByExpressions != null) {
+ for (Expression expression : orderByExpressions) {
+ // NOTE: Order-by is always a Function with the ordering of the
Expression
+
handleSegmentPartitionedDistinctCountOverride(expression.getFunctionCall().getOperands().get(0),
+ segmentPartitionedColumns);
+ }
+ }
+ Expression havingExpression = pinotQuery.getHavingExpression();
+ if (havingExpression != null) {
+ handleSegmentPartitionedDistinctCountOverride(havingExpression,
segmentPartitionedColumns);
+ }
+ }
+
+ /**
+ * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given
SQL expression.
+ */
+ private static void handleSegmentPartitionedDistinctCountOverride(Expression
expression,
+ Set<String> segmentPartitionedColumns) {
+ Function function = expression.getFunctionCall();
+ if (function == null) {
+ return;
+ }
+ if (StringUtils.remove(function.getOperator(), '_')
+ .equalsIgnoreCase(AggregationFunctionType.DISTINCTCOUNT.name())) {
+ final Set<String> identifiers =
+ expression.getFunctionCall().getOperands().stream().filter(expr ->
expr.isSetIdentifier())
+ .map(expr ->
expr.getIdentifier().getName()).collect(Collectors.toUnmodifiableSet());
Review comment:
Suggest checking if there is a single identifier to avoid unexpected
rewrite
```suggestion
List<Expression> operands = function.getOperands();
if (operands.size() == 1 && operands.get(0).getType() == IDENTIFIER &&
segmentPartitionedColumns.contains(operands.get(0).getIdentifier.getName())) {
function.setOperator(AggregationFunctionType.SEGMENTPARTITIONEDDISTINCTCOUNT.name());
}
```
##########
File path:
pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
##########
@@ -1139,6 +1176,32 @@ static void handleQueryLimitOverride(PinotQuery
pinotQuery, int queryLimit) {
}
}
+ /**
+ * Rewrites 'DistinctCount' to 'SegmentPartitionDistinctCount' for the given
PQL broker request.
+ */
+ @Deprecated
+ @VisibleForTesting
+ static void handleSegmentPartitionedDistinctCountOverride(BrokerRequest
brokerRequest,
+ Set<String> segmentPartitionedColumns) {
+ if (segmentPartitionedColumns.isEmpty()) {
+ return;
+ }
+ List<AggregationInfo> aggregationsInfo =
brokerRequest.getAggregationsInfo();
+ if (aggregationsInfo != null) {
+ for (AggregationInfo aggregationInfo : aggregationsInfo) {
+ if (StringUtils.remove(aggregationInfo.getAggregationType(), '_')
+ .equalsIgnoreCase(AggregationFunctionType.DISTINCTCOUNT.name())) {
+ for (String expr : aggregationInfo.getExpressions()) {
Review comment:
Suggest checking if there is a single identifier to avoid unexpected
rewrite
```suggestion
List<String> expressions = aggregationInfo.getExpressions();
if (expressions.size() == 1 &&
segmentPartitionedColumns.contains(expressions.get(0))) {
aggregationInfo.setAggregationType(AggregationFunctionType.SEGMENTPARTITIONEDDISTINCTCOUNT.name());
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]