This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new c0056dc278 Allow broker to automatically rewrite expensive function to 
its approximate counterpart (#8655)
c0056dc278 is described below

commit c0056dc278e6bac6fedeee51aa6cccca80310398
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Mon May 9 15:19:45 2022 -0700

    Allow broker to automatically rewrite expensive function to its approximate 
counterpart (#8655)
    
    - Add broker config `pinot.broker.use.approximate.function` to turn the 
feature on (off by default)
    - Add query config `useApproximateFunction` to override the broker level 
config
---
 .../requesthandler/BaseBrokerRequestHandler.java   | 234 +++++++++++++++------
 .../requesthandler/DistinctCountRewriteTest.java   |  44 ----
 .../requesthandler/QueryLimitOverrideTest.java     |  50 -----
 .../broker/requesthandler/QueryOverrideTest.java   | 123 +++++++++++
 .../common/utils/config/TableConfigSerDeTest.java  |   6 +-
 .../tests/OfflineClusterIntegrationTest.java       |   6 +-
 .../apache/pinot/spi/config/table/QueryConfig.java |  23 +-
 .../apache/pinot/spi/utils/CommonConstants.java    |   5 +
 8 files changed, 312 insertions(+), 179 deletions(-)

diff --git 
a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
 
b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
index 402f33b66b..64d7f0db2b 100644
--- 
a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
+++ 
b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
@@ -119,6 +119,7 @@ public abstract class BaseBrokerRequestHandler implements 
BrokerRequestHandler {
   private final AtomicInteger _numDroppedLog;
 
   private final boolean _disableGroovy;
+  private final boolean _useApproximateFunction;
   private final int _defaultHllLog2m;
   private final boolean _enableQueryLimitOverride;
   private final boolean _enableDistinctCountBitmapOverride;
@@ -133,6 +134,7 @@ public abstract class BaseBrokerRequestHandler implements 
BrokerRequestHandler {
     _tableCache = tableCache;
     _brokerMetrics = brokerMetrics;
     _disableGroovy = 
_config.getProperty(CommonConstants.Broker.DISABLE_GROOVY, false);
+    _useApproximateFunction = 
_config.getProperty(Broker.USE_APPROXIMATE_FUNCTION, false);
     _defaultHllLog2m = 
_config.getProperty(CommonConstants.Helix.DEFAULT_HYPERLOGLOG_LOG2M_KEY,
         CommonConstants.Helix.DEFAULT_HYPERLOGLOG_LOG2M);
     _enableQueryLimitOverride = 
_config.getProperty(Broker.CONFIG_OF_ENABLE_QUERY_LIMIT_OVERRIDE, false);
@@ -325,7 +327,7 @@ public abstract class BaseBrokerRequestHandler implements 
BrokerRequestHandler {
     TableConfig realtimeTableConfig =
         
_tableCache.getTableConfig(TableNameBuilder.REALTIME.tableNameWithType(rawTableName));
 
-    if ((offlineTableName == null) && (realtimeTableName == null)) {
+    if (offlineTableName == null && realtimeTableName == null) {
       // No table matches the request
       if (realtimeTableConfig == null && offlineTableConfig == null) {
         LOGGER.info("Table not found for request {}: {}", requestId, query);
@@ -338,10 +340,20 @@ public abstract class BaseBrokerRequestHandler implements 
BrokerRequestHandler {
       return BrokerResponseNative.NO_TABLE_RESULT;
     }
 
-    if (isDisableGroovy(offlineTableName != null ? offlineTableConfig : null,
-        realtimeTableName != null ? realtimeTableConfig : null)) {
+    // Handle query rewrite that can be overridden by the table configs
+    if (offlineTableName == null) {
+      offlineTableConfig = null;
+    }
+    if (realtimeTableName == null) {
+      realtimeTableConfig = null;
+    }
+    HandlerContext handlerContext = getHandlerContext(offlineTableConfig, 
realtimeTableConfig);
+    if (handlerContext._disableGroovy) {
       rejectGroovyQuery(serverPinotQuery);
     }
+    if (handlerContext._useApproximateFunction) {
+      handleApproximateFunctionOverride(serverPinotQuery);
+    }
 
     // Validate QPS quota
     if (!_queryQuotaManager.acquire(tableName)) {
@@ -860,70 +872,6 @@ public abstract class BaseBrokerRequestHandler implements 
BrokerRequestHandler {
     }
   }
 
-  private boolean isDisableGroovy(@Nullable TableConfig offlineTableConfig, 
@Nullable TableConfig realtimeTableConfig) {
-    Boolean offlineTableDisableGroovyQuery = null;
-    if (offlineTableConfig != null && offlineTableConfig.getQueryConfig() != 
null) {
-      offlineTableDisableGroovyQuery = 
offlineTableConfig.getQueryConfig().getDisableGroovy();
-    }
-
-    Boolean realtimeTableDisableGroovyQuery = null;
-    if (realtimeTableConfig != null && realtimeTableConfig.getQueryConfig() != 
null) {
-      realtimeTableDisableGroovyQuery = 
realtimeTableConfig.getQueryConfig().getDisableGroovy();
-    }
-
-    if (offlineTableDisableGroovyQuery == null && 
realtimeTableDisableGroovyQuery == null) {
-      return _disableGroovy;
-    }
-
-    // If offline or online table config disables Groovy, then Groovy should 
be disabled
-    return Boolean.TRUE.equals(offlineTableDisableGroovyQuery) || 
Boolean.TRUE.equals(realtimeTableDisableGroovyQuery);
-  }
-
-  /**
-   * Verifies that no groovy is present in the PinotQuery when disabled.
-   */
-  @VisibleForTesting
-  static void rejectGroovyQuery(PinotQuery pinotQuery) {
-    List<Expression> selectList = pinotQuery.getSelectList();
-    for (Expression expression : selectList) {
-      rejectGroovyQuery(expression);
-    }
-    List<Expression> orderByList = pinotQuery.getOrderByList();
-    if (orderByList != null) {
-      for (Expression expression : orderByList) {
-        // NOTE: Order-by is always a Function with the ordering of the 
Expression
-        rejectGroovyQuery(expression.getFunctionCall().getOperands().get(0));
-      }
-    }
-    Expression havingExpression = pinotQuery.getHavingExpression();
-    if (havingExpression != null) {
-      rejectGroovyQuery(havingExpression);
-    }
-    Expression filterExpression = pinotQuery.getFilterExpression();
-    if (filterExpression != null) {
-      rejectGroovyQuery(filterExpression);
-    }
-    List<Expression> groupByList = pinotQuery.getGroupByList();
-    if (groupByList != null) {
-      for (Expression expression : groupByList) {
-        rejectGroovyQuery(expression);
-      }
-    }
-  }
-
-  private static void rejectGroovyQuery(Expression expression) {
-    Function function = expression.getFunctionCall();
-    if (function == null) {
-      return;
-    }
-    if (function.getOperator().equals("groovy")) {
-      throw new BadQueryRequestException("Groovy transform functions are 
disabled for queries");
-    }
-    for (Expression operandExpression : function.getOperands()) {
-      rejectGroovyQuery(operandExpression);
-    }
-  }
-
   /**
    * Sets HyperLogLog log2m for DistinctCountHLL functions if not explicitly 
set for the given expression.
    */
@@ -1044,6 +992,158 @@ public abstract class BaseBrokerRequestHandler 
implements BrokerRequestHandler {
     }
   }
 
+  private HandlerContext getHandlerContext(@Nullable TableConfig 
offlineTableConfig,
+      @Nullable TableConfig realtimeTableConfig) {
+    boolean offlineTableDisableGroovyQuery = _disableGroovy;
+    boolean offlineTableUseApproximateFunction = _useApproximateFunction;
+    if (offlineTableConfig != null && offlineTableConfig.getQueryConfig() != 
null) {
+      Boolean disableGroovyOverride = 
offlineTableConfig.getQueryConfig().getDisableGroovy();
+      if (disableGroovyOverride != null) {
+        offlineTableDisableGroovyQuery = disableGroovyOverride;
+      }
+      Boolean useApproximateFunctionOverride = 
offlineTableConfig.getQueryConfig().getUseApproximateFunction();
+      if (useApproximateFunctionOverride != null) {
+        offlineTableUseApproximateFunction = useApproximateFunctionOverride;
+      }
+    }
+
+    boolean realtimeTableDisableGroovyQuery = _disableGroovy;
+    boolean realtimeTableUseApproximateFunction = _useApproximateFunction;
+    if (realtimeTableConfig != null && realtimeTableConfig.getQueryConfig() != 
null) {
+      Boolean disableGroovyOverride = 
realtimeTableConfig.getQueryConfig().getDisableGroovy();
+      if (disableGroovyOverride != null) {
+        realtimeTableDisableGroovyQuery = disableGroovyOverride;
+      }
+      Boolean useApproximateFunctionOverride = 
realtimeTableConfig.getQueryConfig().getUseApproximateFunction();
+      if (useApproximateFunctionOverride != null) {
+        realtimeTableUseApproximateFunction = useApproximateFunctionOverride;
+      }
+    }
+
+    // Disable Groovy if either offline or realtime table config disables 
Groovy
+    boolean disableGroovy = offlineTableDisableGroovyQuery | 
realtimeTableDisableGroovyQuery;
+    // Use approximate function if both offline and realtime table config uses 
approximate function
+    boolean useApproximateFunction = offlineTableUseApproximateFunction & 
realtimeTableUseApproximateFunction;
+
+    return new HandlerContext(disableGroovy, useApproximateFunction);
+  }
+
+  private static class HandlerContext {
+    final boolean _disableGroovy;
+    final boolean _useApproximateFunction;
+
+    HandlerContext(boolean disableGroovy, boolean useApproximateFunction) {
+      _disableGroovy = disableGroovy;
+      _useApproximateFunction = useApproximateFunction;
+    }
+  }
+
+  /**
+   * Verifies that no groovy is present in the PinotQuery when disabled.
+   */
+  @VisibleForTesting
+  static void rejectGroovyQuery(PinotQuery pinotQuery) {
+    List<Expression> selectList = pinotQuery.getSelectList();
+    for (Expression expression : selectList) {
+      rejectGroovyQuery(expression);
+    }
+    List<Expression> orderByList = pinotQuery.getOrderByList();
+    if (orderByList != null) {
+      for (Expression expression : orderByList) {
+        // NOTE: Order-by is always a Function with the ordering of the 
Expression
+        rejectGroovyQuery(expression.getFunctionCall().getOperands().get(0));
+      }
+    }
+    Expression havingExpression = pinotQuery.getHavingExpression();
+    if (havingExpression != null) {
+      rejectGroovyQuery(havingExpression);
+    }
+    Expression filterExpression = pinotQuery.getFilterExpression();
+    if (filterExpression != null) {
+      rejectGroovyQuery(filterExpression);
+    }
+    List<Expression> groupByList = pinotQuery.getGroupByList();
+    if (groupByList != null) {
+      for (Expression expression : groupByList) {
+        rejectGroovyQuery(expression);
+      }
+    }
+  }
+
+  private static void rejectGroovyQuery(Expression expression) {
+    Function function = expression.getFunctionCall();
+    if (function == null) {
+      return;
+    }
+    if (function.getOperator().equals("groovy")) {
+      throw new BadQueryRequestException("Groovy transform functions are 
disabled for queries");
+    }
+    for (Expression operandExpression : function.getOperands()) {
+      rejectGroovyQuery(operandExpression);
+    }
+  }
+
+  /**
+   * Rewrites potential expensive functions to their approximation 
counterparts.
+   * - DISTINCT_COUNT -> DISTINCT_COUNT_SMART_HLL
+   * - PERCENTILE -> PERCENTILE_SMART_TDIGEST
+   */
+  @VisibleForTesting
+  static void handleApproximateFunctionOverride(PinotQuery pinotQuery) {
+    for (Expression expression : pinotQuery.getSelectList()) {
+      handleApproximateFunctionOverride(expression);
+    }
+    List<Expression> orderByExpressions = pinotQuery.getOrderByList();
+    if (orderByExpressions != null) {
+      for (Expression expression : orderByExpressions) {
+        // NOTE: Order-by is always a Function with the ordering of the 
Expression
+        
handleApproximateFunctionOverride(expression.getFunctionCall().getOperands().get(0));
+      }
+    }
+    Expression havingExpression = pinotQuery.getHavingExpression();
+    if (havingExpression != null) {
+      handleApproximateFunctionOverride(havingExpression);
+    }
+  }
+
+  private static void handleApproximateFunctionOverride(Expression expression) 
{
+    Function function = expression.getFunctionCall();
+    if (function == null) {
+      return;
+    }
+    String functionName = function.getOperator();
+    if (functionName.equals("distinctcount") || 
functionName.equals("distinctcountmv")) {
+      function.setOperator("distinctcountsmarthll");
+    } else if (functionName.startsWith("percentile")) {
+      String remainingFunctionName = functionName.substring(10);
+      if (remainingFunctionName.isEmpty() || 
remainingFunctionName.equals("mv")) {
+        function.setOperator("percentilesmarttdigest");
+      } else if (remainingFunctionName.matches("\\d+")) {
+        try {
+          int percentile = Integer.parseInt(remainingFunctionName);
+          function.setOperator("percentilesmarttdigest");
+          function.setOperands(
+              Arrays.asList(function.getOperands().get(0), 
RequestUtils.getLiteralExpression(percentile)));
+        } catch (Exception e) {
+          throw new BadQueryRequestException("Illegal function name: " + 
functionName);
+        }
+      } else if (remainingFunctionName.matches("\\d+mv")) {
+        try {
+          int percentile = Integer.parseInt(remainingFunctionName.substring(0, 
remainingFunctionName.length() - 2));
+          function.setOperator("percentilesmarttdigest");
+          function.setOperands(
+              Arrays.asList(function.getOperands().get(0), 
RequestUtils.getLiteralExpression(percentile)));
+        } catch (Exception e) {
+          throw new BadQueryRequestException("Illegal function name: " + 
functionName);
+        }
+      }
+    } else {
+      for (Expression operand : function.getOperands()) {
+        handleApproximateFunctionOverride(operand);
+      }
+    }
+  }
+
   private static void handleExpressionOverride(PinotQuery pinotQuery,
       @Nullable Map<Expression, Expression> expressionOverrideMap) {
     if (expressionOverrideMap == null) {
diff --git 
a/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/DistinctCountRewriteTest.java
 
b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/DistinctCountRewriteTest.java
deleted file mode 100644
index 1ca37feb9a..0000000000
--- 
a/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/DistinctCountRewriteTest.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.pinot.broker.requesthandler;
-
-import com.google.common.collect.ImmutableSet;
-import org.apache.pinot.common.request.PinotQuery;
-import org.apache.pinot.segment.spi.AggregationFunctionType;
-import org.apache.pinot.sql.parsers.CalciteSqlParser;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-
-public class DistinctCountRewriteTest {
-
-  @Test
-  public void testSql() {
-    String sql = "SELECT distinctCount(col1) FROM myTable";
-    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql);
-    
BaseBrokerRequestHandler.handleSegmentPartitionedDistinctCountOverride(pinotQuery,
 ImmutableSet.of("col2", "col3"));
-    
Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(),
-        AggregationFunctionType.DISTINCTCOUNT.name().toLowerCase());
-    
BaseBrokerRequestHandler.handleSegmentPartitionedDistinctCountOverride(pinotQuery,
-        ImmutableSet.of("col1", "col2", "col3"));
-    
Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(),
-        
AggregationFunctionType.SEGMENTPARTITIONEDDISTINCTCOUNT.name().toLowerCase());
-  }
-}
diff --git 
a/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/QueryLimitOverrideTest.java
 
b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/QueryLimitOverrideTest.java
deleted file mode 100644
index d9d0d4ef00..0000000000
--- 
a/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/QueryLimitOverrideTest.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.broker.requesthandler;
-
-import org.apache.pinot.common.request.PinotQuery;
-import org.apache.pinot.sql.parsers.CalciteSqlParser;
-import org.testng.annotations.Test;
-
-import static org.testng.Assert.assertEquals;
-
-
-public class QueryLimitOverrideTest {
-
-  @Test
-  public void testLimitOverride() {
-    // Selections
-    testLimitOverride("select * from vegetables LIMIT 999", 1000, 999);
-    testLimitOverride("select * from vegetables LIMIT 1000", 1000, 1000);
-    testLimitOverride("select * from vegetables LIMIT 1001", 1000, 1000);
-    testLimitOverride("select * from vegetables LIMIT 10000", 1000, 1000);
-
-    // Group-bys
-    testLimitOverride("select count(*) from vegetables group by a LIMIT 999", 
1000, 999);
-    testLimitOverride("select count(*) from vegetables group by a LIMIT 1000", 
1000, 1000);
-    testLimitOverride("select count(*) from vegetables group by a LIMIT 1001", 
1000, 1000);
-    testLimitOverride("select count(*) from vegetables group by a LIMIT 
10000", 1000, 1000);
-  }
-
-  private void testLimitOverride(String query, int maxQuerySelectionLimit, int 
expectedLimit) {
-    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
-    BaseBrokerRequestHandler.handleQueryLimitOverride(pinotQuery, 
maxQuerySelectionLimit);
-    assertEquals(pinotQuery.getLimit(), expectedLimit);
-  }
-}
diff --git 
a/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/QueryOverrideTest.java
 
b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/QueryOverrideTest.java
new file mode 100644
index 0000000000..8ed7ceb3b0
--- /dev/null
+++ 
b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/QueryOverrideTest.java
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.broker.requesthandler;
+
+import com.google.common.collect.ImmutableSet;
+import java.util.Arrays;
+import org.apache.pinot.common.request.PinotQuery;
+import org.apache.pinot.common.utils.request.RequestUtils;
+import org.apache.pinot.sql.parsers.CalciteSqlParser;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+public class QueryOverrideTest {
+  private static final int QUERY_LIMIT = 1000;
+
+  @Test
+  public void testLimitOverride() {
+    // Selections
+    testLimitOverride("SELECT * FROM vegetables LIMIT 999", 999);
+    testLimitOverride("select * from vegetables limit 1000", 1000);
+    testLimitOverride("SeLeCt * FrOm vegetables LiMit 1001", 1000);
+    testLimitOverride("sElEcT * fRoM vegetables lImIt 10000", 1000);
+
+    // Group-bys
+    testLimitOverride("SELECT COUNT(*) FROM vegetables GROUP BY a LIMIT 999", 
999);
+    testLimitOverride("select count(*) from vegetables group by a limit 1000", 
1000);
+    testLimitOverride("SeLeCt CoUnT(*) FrOm vegetables GrOuP By a LiMit 1001", 
1000);
+    testLimitOverride("sElEcT cOuNt(*) fRoM vegetables gRoUp bY a lImIt 
10000", 1000);
+  }
+
+  private void testLimitOverride(String query, int expectedLimit) {
+    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+    BaseBrokerRequestHandler.handleQueryLimitOverride(pinotQuery, QUERY_LIMIT);
+    assertEquals(pinotQuery.getLimit(), expectedLimit);
+  }
+
+  @Test
+  public void testDistinctCountOverride() {
+    String query = "SELECT DISTINCT_COUNT(col1) FROM myTable";
+    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+    
BaseBrokerRequestHandler.handleSegmentPartitionedDistinctCountOverride(pinotQuery,
 ImmutableSet.of("col2", "col3"));
+    
assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), 
"distinctcount");
+    
BaseBrokerRequestHandler.handleSegmentPartitionedDistinctCountOverride(pinotQuery,
+        ImmutableSet.of("col1", "col2", "col3"));
+    
assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), 
"segmentpartitioneddistinctcount");
+  }
+
+  @Test
+  public void testApproximateFunctionOverride() {
+    {
+      String query = "SELECT DISTINCT_COUNT(col1) FROM myTable GROUP BY col2 
HAVING DISTINCT_COUNT(col1) > 10 "
+          + "ORDER BY DISTINCT_COUNT(col1) DESC";
+      PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+      BaseBrokerRequestHandler.handleApproximateFunctionOverride(pinotQuery);
+      
assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), 
"distinctcountsmarthll");
+      assertEquals(
+          
pinotQuery.getOrderByList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperator(),
+          "distinctcountsmarthll");
+      assertEquals(
+          
pinotQuery.getHavingExpression().getFunctionCall().getOperands().get(0).getFunctionCall().getOperator(),
+          "distinctcountsmarthll");
+
+      query = "SELECT DISTINCT_COUNT_MV(col1) FROM myTable";
+      pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+      BaseBrokerRequestHandler.handleApproximateFunctionOverride(pinotQuery);
+      
assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), 
"distinctcountsmarthll");
+
+      query = "SELECT DISTINCT col1 FROM myTable";
+      pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+      BaseBrokerRequestHandler.handleApproximateFunctionOverride(pinotQuery);
+      
assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), 
"distinct");
+
+      query = "SELECT DISTINCT_COUNT_HLL(col1) FROM myTable";
+      pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+      BaseBrokerRequestHandler.handleApproximateFunctionOverride(pinotQuery);
+      
assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), 
"distinctcounthll");
+
+      query = "SELECT DISTINCT_COUNT_BITMAP(col1) FROM myTable";
+      pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+      BaseBrokerRequestHandler.handleApproximateFunctionOverride(pinotQuery);
+      
assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), 
"distinctcountbitmap");
+    }
+
+    for (String query : Arrays.asList("SELECT PERCENTILE(col1, 95) FROM 
myTable",
+        "SELECT PERCENTILE_MV(col1, 95) FROM myTable", "SELECT 
PERCENTILE95(col1) FROM myTable",
+        "SELECT PERCENTILE95MV(col1) FROM myTable")) {
+      PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+      BaseBrokerRequestHandler.handleApproximateFunctionOverride(pinotQuery);
+      
assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), 
"percentilesmarttdigest");
+      
assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1),
+          RequestUtils.getLiteralExpression(95));
+    }
+    {
+      String query = "SELECT PERCENTILE_TDIGEST(col1, 95) FROM myTable";
+      PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+      BaseBrokerRequestHandler.handleApproximateFunctionOverride(pinotQuery);
+      
assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), 
"percentiletdigest");
+
+      query = "SELECT PERCENTILE_EST(col1, 95) FROM myTable";
+      pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+      BaseBrokerRequestHandler.handleApproximateFunctionOverride(pinotQuery);
+      
assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), 
"percentileest");
+    }
+  }
+}
diff --git 
a/pinot-common/src/test/java/org/apache/pinot/common/utils/config/TableConfigSerDeTest.java
 
b/pinot-common/src/test/java/org/apache/pinot/common/utils/config/TableConfigSerDeTest.java
index 0d19eaafe8..1a2e39531d 100644
--- 
a/pinot-common/src/test/java/org/apache/pinot/common/utils/config/TableConfigSerDeTest.java
+++ 
b/pinot-common/src/test/java/org/apache/pinot/common/utils/config/TableConfigSerDeTest.java
@@ -189,7 +189,7 @@ public class TableConfigSerDeTest {
     }
     {
       // With query config
-      QueryConfig queryConfig = new QueryConfig(1000L, true, 
Collections.singletonMap("func(a)", "b"));
+      QueryConfig queryConfig = new QueryConfig(1000L, true, true, 
Collections.singletonMap("func(a)", "b"));
       TableConfig tableConfig = 
tableConfigBuilder.setQueryConfig(queryConfig).build();
 
       checkQueryConfig(tableConfig);
@@ -282,8 +282,8 @@ public class TableConfigSerDeTest {
       IngestionConfig ingestionConfig =
           new IngestionConfig(new BatchIngestionConfig(batchConfigMaps, 
"APPEND", "HOURLY"),
               new StreamIngestionConfig(streamConfigMaps), new 
FilterConfig("filterFunc(foo)"), transformConfigs,
-              new ComplexTypeConfig(fieldsToUnnest, ".",
-                      
ComplexTypeConfig.CollectionNotUnnestedToJson.NON_PRIMITIVE, prefixesToRename));
+              new ComplexTypeConfig(fieldsToUnnest, ".", 
ComplexTypeConfig.CollectionNotUnnestedToJson.NON_PRIMITIVE,
+                  prefixesToRename));
       TableConfig tableConfig = 
tableConfigBuilder.setIngestionConfig(ingestionConfig).build();
 
       checkIngestionConfig(tableConfig);
diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
index b1b52d89c6..9bb4853283 100644
--- 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
@@ -281,7 +281,7 @@ public class OfflineClusterIntegrationTest extends 
BaseClusterIntegrationTestSet
       throws Exception {
     // Set timeout as 5ms so that query will timeout
     TableConfig tableConfig = getOfflineTableConfig();
-    tableConfig.setQueryConfig(new QueryConfig(5L));
+    tableConfig.setQueryConfig(new QueryConfig(5L, null, null, null));
     updateTableConfig(tableConfig);
 
     // Wait for at most 1 minute for broker to receive and process the table 
config refresh message
@@ -910,7 +910,7 @@ public class OfflineClusterIntegrationTest extends 
BaseClusterIntegrationTestSet
     String groovyQuery = "SELECT 
GROOVY('{\"returnType\":\"STRING\",\"isSingleValue\":true}', "
         + "'arg0 + arg1', FlightNum, Origin) FROM myTable";
     TableConfig tableConfig = getOfflineTableConfig();
-    tableConfig.setQueryConfig(new QueryConfig(true));
+    tableConfig.setQueryConfig(new QueryConfig(null, true, null, null));
     updateTableConfig(tableConfig);
 
     TestUtils.waitForCondition(aVoid -> {
@@ -1157,7 +1157,7 @@ public class OfflineClusterIntegrationTest extends 
BaseClusterIntegrationTestSet
 
     // Add expression override
     TableConfig tableConfig = getOfflineTableConfig();
-    tableConfig.setQueryConfig(new QueryConfig(
+    tableConfig.setQueryConfig(new QueryConfig(null, null, null,
         Collections.singletonMap("times(times(DaysSinceEpoch, 24), 3600)", 
"NewAddedDerivedSecondsSinceEpoch")));
     updateTableConfig(tableConfig);
 
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/QueryConfig.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/QueryConfig.java
index 26d9d139d7..01c3993fe2 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/QueryConfig.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/QueryConfig.java
@@ -41,6 +41,9 @@ public class QueryConfig extends BaseJsonConfig {
   // Table config override for disable Groovy broker property
   private final Boolean _disableGroovy;
 
+  // Table config override for use approximate function property
+  private final Boolean _useApproximateFunction;
+
   // This map configures the expressions to override in the query. In certain 
scenarios, users cannot control the
   // queries sent to pinot (e.g. queries are auto-generated by some other 
tools), then they can use this map to override
   // the expressions within the query to the desired ones (e.g. override 
transform function to derived column).
@@ -49,25 +52,15 @@ public class QueryConfig extends BaseJsonConfig {
   @JsonCreator
   public QueryConfig(@JsonProperty("timeoutMs") @Nullable Long timeoutMs,
       @JsonProperty("disableGroovy") @Nullable Boolean disableGroovy,
+      @JsonProperty("useApproximateFunction") @Nullable Boolean 
useApproximateFunction,
       @JsonProperty("expressionOverrideMap") @Nullable Map<String, String> 
expressionOverrideMap) {
     Preconditions.checkArgument(timeoutMs == null || timeoutMs > 0, "Invalid 
'timeoutMs': %s", timeoutMs);
     _timeoutMs = timeoutMs;
     _disableGroovy = disableGroovy;
+    _useApproximateFunction = useApproximateFunction;
     _expressionOverrideMap = expressionOverrideMap;
   }
 
-  public QueryConfig(long timeoutMs) {
-    this(timeoutMs, null, null);
-  }
-
-  public QueryConfig(boolean disableGroovy) {
-    this(null, disableGroovy, null);
-  }
-
-  public QueryConfig(Map<String, String> expressionOverrideMap) {
-    this(null, null, expressionOverrideMap);
-  }
-
   @Nullable
   @JsonProperty("timeoutMs")
   public Long getTimeoutMs() {
@@ -80,6 +73,12 @@ public class QueryConfig extends BaseJsonConfig {
     return _disableGroovy;
   }
 
+  @Nullable
+  @JsonProperty("useApproximateFunction")
+  public Boolean getUseApproximateFunction() {
+    return _useApproximateFunction;
+  }
+
   @Nullable
   @JsonProperty("expressionOverrideMap")
   public Map<String, String> getExpressionOverrideMap() {
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
index 15fdc312a5..f004e5644c 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
@@ -231,6 +231,11 @@ public class CommonConstants {
 
     public static final String DISABLE_GROOVY = 
"pinot.broker.disable.query.groovy";
 
+    // Rewrite potential expensive functions to their approximation 
counterparts
+    // - DISTINCT_COUNT -> DISTINCT_COUNT_SMART_HLL
+    // - PERCENTILE -> PERCENTILE_SMART_TDIGEST
+    public static final String USE_APPROXIMATE_FUNCTION = 
"pinot.broker.use.approximate.function";
+
     public static final String CONTROLLER_URL = "pinot.broker.controller.url";
 
     public static class Request {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to