This is an automated email from the ASF dual-hosted git repository.

jlli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 5cc7231dc0 Backwards compatible theta sketch aggregation (#12288)
5cc7231dc0 is described below

commit 5cc7231dc0fc52d56b2a5435ba30489799013785
Author: David Cromberge <davecrombe...@gmail.com>
AuthorDate: Sun Jan 21 22:42:08 2024 +0000

    Backwards compatible theta sketch aggregation (#12288)
    
    * Backwards compatible theta sketch aggregation
    
    Servers running on versions before upgrading Pinot to the 
ThetaSketchAccumulator
    would return Sketches directly to the merge function.  This ensures that 
there
    is backwards compatibility between the two.
    
    * Add Theta Sketch distinct count queries to compatibility check queries
---
 .../config/queries/feature-test-1-sql.queries          |  6 +++---
 .../config/queries/feature-test-2-sql-realtime.queries |  4 ++--
 .../query-results/feature-test-1-rest-sql.results      |  6 +++---
 .../query-results/feature-test-2-sql-realtime.results  |  4 ++--
 .../DistinctCountThetaSketchAggregationFunction.java   | 18 ++++++++++++++++--
 5 files changed, 26 insertions(+), 12 deletions(-)

diff --git 
a/compatibility-verifier/sample-test-suite/config/queries/feature-test-1-sql.queries
 
b/compatibility-verifier/sample-test-suite/config/queries/feature-test-1-sql.queries
index 37a6120a5d..38b8484243 100644
--- 
a/compatibility-verifier/sample-test-suite/config/queries/feature-test-1-sql.queries
+++ 
b/compatibility-verifier/sample-test-suite/config/queries/feature-test-1-sql.queries
@@ -22,7 +22,7 @@ SELECT count(*) FROM FeatureTest1 WHERE generationNumber = 
__GENERATION_NUMBER__
 SELECT sum(intMetric1), sumMV(intDimMV1), min(intMetric1), minMV(intDimMV2), 
max(longDimSV1), maxMV(intDimMV1) FROM FeatureTest1 WHERE generationNumber = 
__GENERATION_NUMBER__
 SELECT count(longDimSV1), countMV(intDimMV1), avg(floatMetric1), 
avgMV(intDimMV2), minMaxRange(doubleMetric1), minMaxRangeMV(intDimMV2) FROM 
FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__
 SELECT percentile(longDimSV1, 80), percentileMV(intDimMV1, 90), 
percentileEst(longDimSV1, 80), percentileEstMV(intDimMV1, 90), 
percentileTDigest(longDimSV1, 80), percentileTDigestMV(intDimMV1, 90) FROM 
FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__
-SELECT distinctCount(longDimSV1), distinctCountMV(intDimMV1), 
distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1) FROM FeatureTest1 
WHERE generationNumber = __GENERATION_NUMBER__
+SELECT distinctCount(longDimSV1), distinctCountMV(intDimMV1), 
distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1), 
distinctCountThetaSketch(longDimSV1) FROM FeatureTest1 WHERE generationNumber = 
__GENERATION_NUMBER__
 
 # Selection
 SELECT longDimSV2, stringDimSV1, textDim1, bytesDimSV1 FROM FeatureTest1 WHERE 
generationNumber = __GENERATION_NUMBER__ ORDER BY longDimSV2 LIMIT 9
@@ -46,14 +46,14 @@ SELECT longDimSV1, intDimMV1, count(*) FROM FeatureTest1 
WHERE generationNumber
 SELECT longDimSV1, intDimMV1, sum(intMetric1), sumMV(intDimMV1), 
min(intMetric1), minMV(intDimMV2), max(longDimSV1), maxMV(intDimMV1) FROM 
FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ GROUP BY 
longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5
 SELECT longDimSV1, intDimMV1, count(longDimSV1), countMV(intDimMV1), 
avg(floatMetric1), avgMV(intDimMV2), minMaxRange(doubleMetric1), 
minMaxRangeMV(intDimMV2) FROM FeatureTest1 WHERE generationNumber = 
__GENERATION_NUMBER__ GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5
 SELECT longDimSV1, intDimMV1, percentile(longDimSV1, 80), 
percentileMV(intDimMV1, 90), percentileEst(longDimSV1, 80), 
percentileEstMV(intDimMV1, 90), percentileTDigest(longDimSV1, 80), 
percentileTDigestMV(intDimMV1, 90) FROM FeatureTest1 WHERE generationNumber = 
__GENERATION_NUMBER__ GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5
-SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1), 
distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), 
distinctCountHLLMV(intDimMV1) FROM FeatureTest1 WHERE generationNumber = 
__GENERATION_NUMBER__ GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5
+SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1), 
distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), 
distinctCountHLLMV(intDimMV1), distinctCountThetaSketch(longDimSV1) FROM 
FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ GROUP BY 
longDimSV1, intDimMV1 ORDER BY longDimSV1 LIMIT 5
 
 # Selection & Filtering & Grouping on Aggregation
 SELECT longDimSV1, intDimMV1, count(*) FROM FeatureTest1 WHERE 
generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND 
longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN 
('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, 
intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 5
 SELECT longDimSV1, intDimMV1, sum(intMetric1), sumMV(intDimMV1), 
min(intMetric1), minMV(intDimMV2), max(longDimSV1), maxMV(intDimMV1) FROM 
FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 
!= 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND 
stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY 
longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 5
 SELECT longDimSV1, intDimMV1, count(longDimSV1), countMV(intDimMV1), 
avg(floatMetric1), avgMV(intDimMV2), minMaxRange(doubleMetric1), 
minMaxRangeMV(intDimMV2) FROM FeatureTest1 WHERE generationNumber = 
__GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 
1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 
NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 
LIMIT 5
 SELECT longDimSV1, intDimMV1, percentile(longDimSV1, 80), 
percentileMV(intDimMV1, 90), percentileEst(longDimSV1, 80), 
percentileEstMV(intDimMV1, 90), percentileTDigest(longDimSV1, 80), 
percentileTDigestMV(intDimMV1, 90) FROM FeatureTest1 WHERE generationNumber = 
__GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 
1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 
NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, in [...]
-SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1), 
distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), 
distinctCountHLLMV(intDimMV1) FROM FeatureTest1 WHERE generationNumber = 
__GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 
1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 
NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 
LIMIT 5
+SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1), 
distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), 
distinctCountHLLMV(intDimMV1), distinctCountThetaSketch(longDimSV1) FROM 
FeatureTest1 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 
!= 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND 
stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY 
longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 5
 
 # Transformation Functions
 SELECT add(longDimSV1, sub(longDimSV2, 3)), mod(intMetric1, 10), 
div(doubleMetric1, mult(floatMetric1, 5)) FROM FeatureTest1 WHERE 
generationNumber = __GENERATION_NUMBER__ ORDER BY add(longDimSV1, 
sub(longDimSV2, 3)) DESC, mod(intMetric1, 10)
diff --git 
a/compatibility-verifier/sample-test-suite/config/queries/feature-test-2-sql-realtime.queries
 
b/compatibility-verifier/sample-test-suite/config/queries/feature-test-2-sql-realtime.queries
index da9c43d7ad..3627205534 100644
--- 
a/compatibility-verifier/sample-test-suite/config/queries/feature-test-2-sql-realtime.queries
+++ 
b/compatibility-verifier/sample-test-suite/config/queries/feature-test-2-sql-realtime.queries
@@ -32,7 +32,7 @@ SELECT sum(intMetric1), sumMV(intDimMV1), min(intMetric1), 
minMV(intDimMV2), max
 SELECT count(longDimSV1), countMV(intDimMV1), avg(floatMetric1), 
avgMV(intDimMV2), minMaxRange(doubleMetric1), minMaxRangeMV(intDimMV2) FROM 
FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__
 SELECT percentile(longDimSV1, 80), percentileMV(intDimMV1, 90), 
percentileEst(longDimSV1, 80), percentileEstMV(intDimMV1, 90), 
percentileTDigest(longDimSV1, 80), percentileTDigestMV(intDimMV1, 90) FROM 
FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__
 SELECT percentile(longDimSV1, 80.01), percentileMV(intDimMV1, 99.99), 
percentileEst(longDimSV1, 80.01), percentileEstMV(intDimMV1, 99.99), 
percentileTDigest(longDimSV1, 80.01), percentileTDigestMV(intDimMV1, 99.99) 
FROM FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__
-SELECT distinctCount(longDimSV1), distinctCountMV(intDimMV1), 
distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1) FROM FeatureTest2 
WHERE generationNumber = __GENERATION_NUMBER__
+SELECT distinctCount(longDimSV1), distinctCountMV(intDimMV1), 
distinctCountHLL(longDimSV1), distinctCountHLLMV(intDimMV1), 
distinctCountThetaSketch(longDimSV1) FROM FeatureTest2 WHERE generationNumber = 
__GENERATION_NUMBER__
 
 # Selection & Filtering & Grouping on Aggregation
 SELECT longDimSV1, intDimMV1, count(*) FROM FeatureTest2 WHERE 
generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND 
longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN 
('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, 
intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 20
@@ -40,7 +40,7 @@ SELECT longDimSV1, intDimMV1, sum(intMetric1), 
sumMV(intDimMV1), min(intMetric1)
 SELECT longDimSV1, intDimMV1, count(longDimSV1), countMV(intDimMV1), 
avg(floatMetric1), avgMV(intDimMV2), minMaxRange(doubleMetric1), 
minMaxRangeMV(intDimMV2) FROM FeatureTest2 WHERE generationNumber = 
__GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 
1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 
NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 
LIMIT 20
 SELECT longDimSV1, intDimMV1, percentile(longDimSV1, 80), 
percentileMV(intDimMV1, 90), percentileEst(longDimSV1, 80), 
percentileEstMV(intDimMV1, 90), percentileTDigest(longDimSV1, 80), 
percentileTDigestMV(intDimMV1, 90) FROM FeatureTest2 WHERE generationNumber = 
__GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 
1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 
NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, in [...]
 SELECT longDimSV1, intDimMV1, percentile(longDimSV1, 80.01), 
percentileMV(intDimMV1, 99.99), percentileEst(longDimSV1, 80.01), 
percentileEstMV(intDimMV1, 99.99), percentileTDigest(longDimSV1, 80.01), 
percentileTDigestMV(intDimMV1, 99.99) FROM FeatureTest2 WHERE generationNumber 
= __GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 
AND 1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND 
intDimMV2 NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER [...]
-SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1), 
distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), 
distinctCountHLLMV(intDimMV1) FROM FeatureTest2 WHERE generationNumber = 
__GENERATION_NUMBER__ AND (stringDimSV1 != 's1-6' AND longDimSV1 BETWEEN 10 AND 
1000 OR (intDimMV1 < 42 AND stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 
NOT IN (6,72))) GROUP BY longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 
LIMIT 20
+SELECT longDimSV1, intDimMV1, distinctCount(longDimSV1), 
distinctCountMV(intDimMV1), distinctCountHLL(longDimSV1), 
distinctCountHLLMV(intDimMV1), distinctCountThetaSketch(longDimSV1) FROM 
FeatureTest2 WHERE generationNumber = __GENERATION_NUMBER__ AND (stringDimSV1 
!= 's1-6' AND longDimSV1 BETWEEN 10 AND 1000 OR (intDimMV1 < 42 AND 
stringDimMV2 IN ('m2-0-0', 'm2-2-0') AND intDimMV2 NOT IN (6,72))) GROUP BY 
longDimSV1, intDimMV1 ORDER BY longDimSV1, intDimMV1 LIMIT 20
 
 # Transformation Functions
 SELECT DISTINCT add(longDimSV1, sub(longDimSV2, 3)), mod(intMetric1, 10), 
div(doubleMetric1, mult(floatMetric1, 5)) FROM FeatureTest2 WHERE 
generationNumber = __GENERATION_NUMBER__ ORDER BY add(longDimSV1, 
sub(longDimSV2, 3)) DESC, mod(intMetric1, 10), div(doubleMetric1, 
mult(floatMetric1, 5))
diff --git 
a/compatibility-verifier/sample-test-suite/config/query-results/feature-test-1-rest-sql.results
 
b/compatibility-verifier/sample-test-suite/config/query-results/feature-test-1-rest-sql.results
index 83ae247116..aad84fc46e 100644
--- 
a/compatibility-verifier/sample-test-suite/config/query-results/feature-test-1-rest-sql.results
+++ 
b/compatibility-verifier/sample-test-suite/config/query-results/feature-test-1-rest-sql.results
@@ -22,7 +22,7 @@
 
{"resultTable":{"dataSchema":{"columnDataTypes":["DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["sum(intMetric1)","summv(intDimMV1)","min(intMetric1)","minmv(intDimMV2)","max(longDimSV1)","maxmv(intDimMV1)"]},"rows":[[4.294967536E9,-2.147479976E9,0.0,6.0,7611.0,462.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":10,"numEntriesScannedPostFilter":40,"numGroupsLi
 [...]
 
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"]},"rows":[[10,19,114.09000263214111,1516.9,250.00000000000003,6656.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":10,"numEntriesScannedPostFil
 [...]
 
{"resultTable":{"dataSchema":{"columnDataTypes":["DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"],"columnNames":["percentile(longDimSV1,
 80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1, 
80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1, 
80.0)","percentiletdigestmv(intDimMV1, 
90.0)"]},"rows":[[7611.0,462.0,7611,462,7611.0,462.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegme
 [...]
-{"resultTable":{"dataSchema":{"columnDataTypes":["INT","INT","LONG","LONG"],"columnNames":["distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"]},"rows":[[6,8,6,8]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":10,"numEntriesScannedPostFilter":20,"numGroupsLimitReached":false,"totalDocs":10,"timeUsedMs":6,"segmentStati
 [...]
+{"resultTable":{"dataSchema":{"columnDataTypes":["INT","INT","LONG","LONG","LONG"],"columnNames":["distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"]},"rows":[[6,8,6,8,6]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":10,"numEntriesScannedPostFilter":20,"numGroupsLimitReached":f
 [...]
 
 # Selection
 
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","STRING","STRING","BYTES"],"columnNames":["longDimSV2","stringDimSV1","textDim1","bytesDimSV1"]},"rows":[[2,"s1-0","Java
 C++ Python","4877625602"],[2,"s1-0","Java C++ 
Python","01a0bc"],[21,"s1-2","Java C++ golang","13225573e3f5"],[21,"s1-2","Java 
C++ golang","deadbeef"],[22,"s1-4","Java C++ 
golang","deed0507"],[32,"s1-5","golang shell bash",""],[6777,"s1-7","golang 
Java","d54d0507"],[7621,"s1-6","C++ golang python","deed0507"],[7621 [...]
@@ -42,14 +42,14 @@
 
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","sum(intMetric1)","summv(intDimMV1)","min(intMetric1)","minmv(intDimMV2)","max(longDimSV1)","maxmv(intDimMV1)"]},"rows":[[-9223372036854775808,-2147483648,0.0,-2.147483648E9,0.0,22.0,-9.223372036854776E18,-2.147483648E9],[1,3,20.0,14.0,10.0,6.0,1.0,4.0],[1,4,20.0,14.0,10.0,6.0,1.0,4.0],[11,42,20.0,148.0,10.0,62.0,11.0,42.0],[11,32,20
 [...]
 
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"]},"rows":[[-9223372036854775808,-2147483648,1,1,0.0,57.0,0.0,70.0],[1,3,2,4,12.100000381469727,6.5,0.0,1.0],[1,4,2,4,12.100000381469727,6.5,0.0,1.0],[11,42,2,4,22.100000381469727,67.0,0.0,10.0],[11,32,2,4,22.1000
 [...]
 
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","percentile(longDimSV1,
 80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1, 
80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1, 
80.0)","percentiletdigestmv(intDimMV1, 
90.0)"]},"rows":[[-9223372036854775808,-2147483648,-9.223372036854776E18,-2.147483648E9,-9223372036854775808,-2147483648,-9.2233720368547
 [...]
-{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG"],"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1],[1,3,1,2,1,2],[1,4,1,2,1,2],[11,42,1,2,1,2],[11,32,1,2,1,2]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatche
 [...]
+{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG","LONG"],"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1,1],[1,3,1,2,1,2,1],[1,4,1,2,1,2,1],[11,42,1,2,1,2,1],[11,32,1,2,1,2,1]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmen
 [...]
 
 # Selection & Filtering & Grouping on Aggregation
 
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","LONG"],"columnNames":["longDimSV1","intDimMV1","count(*)"]},"rows":[[-9223372036854775808,-2147483648,1],[11,32,2],[11,42,2],[41,42,1],[41,52,1]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":4,"numEntriesScannedPostFilter":8,"numGroupsLimitReached":false,"totalDocs":10,"timeUsedMs":8,"segmentStatistics":[],"traceInfo":{},
 [...]
 
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","sum(intMetric1)","summv(intDimMV1)","min(intMetric1)","minmv(intDimMV2)","max(longDimSV1)","maxmv(intDimMV1)"]},"rows":[[-9223372036854775808,-2147483648,0,-2147483648,0,22,-9223372036854776000,-2147483648],[11,32,20,148,10,62,11,42],[11,42,20,148,10,62,11,42],[41,42,14,94,14,72,41,52],[41,52,14,94,14,72,41,52]]},"exceptions":[],"nu
 [...]
 
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"]},"rows":[[-9223372036854775808,-2147483648,1,1,0,57,0,70],[11,32,2,4,22.100000381469727,67,0,10],[11,42,2,4,22.100000381469727,67,0,10],[41,42,1,2,24.100000381469727,77,0,10],[41,52,1,2,24.100000381469727,77,0,1
 [...]
 
{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"],"columnNames":["longDimSV1","intDimMV1","percentile(longDimSV1,
 80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1, 
80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1, 
80.0)","percentiletdigestmv(intDimMV1, 
90.0)"]},"rows":[[-9223372036854775808,-2147483648,-9223372036854775808,-2147483648,-9223372036854775808,-2147483648,-9223372036854776000
 [...]
-{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG"],"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1],[11,32,1,2,1,2],[11,42,1,2,1,2],[41,42,1,2,1,2],[41,52,1,2,1,2]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMa
 [...]
+{"resultTable":{"dataSchema":{"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG","LONG"],"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1,1],[11,32,1,2,1,2,1],[11,42,1,2,1,2,1],[41,42,1,2,1,2,1],[41,52,1,2,1,2,1]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSe
 [...]
 
 # Transformation Functions
 
{"resultTable":{"dataSchema":{"columnDataTypes":["DOUBLE","DOUBLE","DOUBLE"],"columnNames":["add(longDimSV1,sub(longDimSV2,'3'))","mod(intMetric1,'10')","div(doubleMetric1,mult(floatMetric1,'5'))"]},"rows":[[15229.0,1.0,0.20076306285631254],[15229.0,7.0,0.20076306285631254],[15229.0,7.0,0.20076306285631254],[13540.0,7.0,0.20076306285631254],[60.0,4.0,0.1999999968342762],[29.0,0.0,0.20904977014723267],[29.0,0.0,0.20904977014723267],[0.0,0.0,0.21652891879345226],[0.0,0.0,0.2165289187934522
 [...]
diff --git 
a/compatibility-verifier/sample-test-suite/config/query-results/feature-test-2-sql-realtime.results
 
b/compatibility-verifier/sample-test-suite/config/query-results/feature-test-2-sql-realtime.results
index 47f7a2805c..849020c104 100644
--- 
a/compatibility-verifier/sample-test-suite/config/query-results/feature-test-2-sql-realtime.results
+++ 
b/compatibility-verifier/sample-test-suite/config/query-results/feature-test-2-sql-realtime.results
@@ -29,7 +29,7 @@
 
{"resultTable":{"dataSchema":{"columnNames":["count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"],"columnDataTypes":["LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"]},"rows":[[66,125,105.11969939145175,1383.2575757575758,250.00000000000003,6656.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":66,"numEntriesS
 [...]
 {"resultTable":{"dataSchema":{"columnNames":["percentile(longDimSV1, 
80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1, 
80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1, 
80.0)","percentiletdigestmv(intDimMV1, 
90.0)"],"columnDataTypes":["DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"]},"rows":[[7611.0,462.0,7611,462,7611.0,462.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegme
 [...]
 {"resultTable":{"dataSchema":{"columnNames":["percentile(longDimSV1, 
80.01)","percentilemv(intDimMV1, 99.99)","percentileest(longDimSV1, 
80.01)","percentileestmv(intDimMV1, 99.99)","percentiletdigest(longDimSV1, 
80.01)","percentiletdigestmv(intDimMV1, 
99.99)"],"columnDataTypes":["DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"]},"rows":[[7611.0,462.0,7611,462,7611.0,462.0]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"nu
 [...]
-{"resultTable":{"dataSchema":{"columnNames":["distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"],"columnDataTypes":["INT","INT","LONG","LONG"]},"rows":[[6,8,6,8]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":66,"numEntriesScannedPostFilter":132,"numGroupsLimitReached":false,"totalDocs":66,"timeUsedMs":5,"offlineThre
 [...]
+{"resultTable":{"dataSchema":{"columnNames":["distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"],"columnDataTypes":["INT","INT","LONG","LONG","LONG"]},"rows":[[6,8,6,8,6]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":66,"numEntriesScannedPostFilter":132,"numGroupsLimitReached":
 [...]
 
 # Selection & Filtering & Grouping on Aggregation
 
{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","count(*)"],"columnDataTypes":["LONG","INT","LONG"]},"rows":[[-9223372036854775808,-2147483648,7],[11,32,14],[11,42,14],[41,42,7],[41,52,7]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMatched":1,"numDocsScanned":28,"numEntriesScannedPostFilter":56,"numGroupsLimitReached":false,"totalDocs":66,"timeUsedMs":6,"offlineThreadCpuTimeNs":0,"realti
 [...]
@@ -37,7 +37,7 @@
 
{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","count(*)","countmv(intDimMV1)","avg(floatMetric1)","avgmv(intDimMV2)","minmaxrange(doubleMetric1)","minmaxrangemv(intDimMV2)"],"columnDataTypes":["LONG","INT","LONG","LONG","DOUBLE","DOUBLE","DOUBLE","DOUBLE"]},"rows":[[-9223372036854775808,-2147483648,7,7,0.0,57.0,0.0,70.0],[11,32,14,28,22.100000381469727,67.0,0.0,10.0],[11,42,14,28,22.100000381469727,67.0,0.0,10.0],[41,42,7,14,24.100000381469727,77.0,0.0,10.0],[41,5
 [...]
 
{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","percentile(longDimSV1,
 80.0)","percentilemv(intDimMV1, 90.0)","percentileest(longDimSV1, 
80.0)","percentileestmv(intDimMV1, 90.0)","percentiletdigest(longDimSV1, 
80.0)","percentiletdigestmv(intDimMV1, 
90.0)"],"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"]},"rows":[[-9223372036854775808,-2147483648,-9.223372036854776E18,-2.147483648E9,-9223372036854775808,-2147483648,-9.2233720368547
 [...]
 
{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","percentile(longDimSV1,
 80.01)","percentilemv(intDimMV1, 99.99)","percentileest(longDimSV1, 
80.01)","percentileestmv(intDimMV1, 99.99)","percentiletdigest(longDimSV1, 
80.01)","percentiletdigestmv(intDimMV1, 
99.99)"],"columnDataTypes":["LONG","INT","DOUBLE","DOUBLE","LONG","LONG","DOUBLE","DOUBLE"]},"rows":[[-9223372036854775808,-2147483648,-9.223372036854776E18,-2.147483648E9,-9223372036854775808,-2147483648,-9.2233720
 [...]
-{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)"],"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1],[11,32,1,2,1,2],[11,42,1,2,1,2],[41,42,1,2,1,2],[41,52,1,2,1,2]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSegmentsQueried":1,"numSegmentsProcessed":1,"numSegmentsMa
 [...]
+{"resultTable":{"dataSchema":{"columnNames":["longDimSV1","intDimMV1","distinctcount(longDimSV1)","distinctcountmv(intDimMV1)","distinctcounthll(longDimSV1)","distinctcounthllmv(intDimMV1)","distinctcountthetasketch(longDimSV1)"],"columnDataTypes":["LONG","INT","INT","INT","LONG","LONG","LONG"]},"rows":[[-9223372036854775808,-2147483648,1,1,1,1,1],[11,32,1,2,1,2,1],[11,42,1,2,1,2,1],[41,42,1,2,1,2,1],[41,52,1,2,1,2,1]]},"exceptions":[],"numServersQueried":1,"numServersResponded":1,"numSe
 [...]
 
 # Transformation Functions
 
{"resultTable":{"dataSchema":{"columnNames":["add(longDimSV1,sub(longDimSV2,'3'))","mod(intMetric1,'10')","div(doubleMetric1,mult(floatMetric1,'5'))"],"columnDataTypes":["DOUBLE","DOUBLE","DOUBLE"]},"rows":[[15229.0,1.0,0.20076306285631254],[15229.0,7.0,0.20076306285631254],[13540.0,7.0,0.20076306285631254],[60.0,4.0,0.1999999968342762],[29.0,0.0,0.20904977014723267],[0.0,0.0,0.21652891879345226],[-9.223372036854776E18,0.0,"Infinity"]]},"exceptions":[],"numServersQueried":1,"numServersRe
 [...]
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
index 9cef1d1931..83709857f9 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountThetaSketchAggregationFunction.java
@@ -993,8 +993,8 @@ public class DistinctCountThetaSketchAggregationFunction
     int numAccumulators = acc1.size();
     List<ThetaSketchAccumulator> mergedAccumulators = new 
ArrayList<>(numAccumulators);
     for (int i = 0; i < numAccumulators; i++) {
-      ThetaSketchAccumulator thetaSketchAccumulator1 = acc1.get(i);
-      ThetaSketchAccumulator thetaSketchAccumulator2 = acc2.get(i);
+      ThetaSketchAccumulator thetaSketchAccumulator1 = 
convertSketchAccumulator(acc1.get(i));
+      ThetaSketchAccumulator thetaSketchAccumulator2 = 
convertSketchAccumulator(acc2.get(i));
       if (thetaSketchAccumulator1.isEmpty()) {
         mergedAccumulators.add(thetaSketchAccumulator2);
         continue;
@@ -1033,6 +1033,20 @@ public class DistinctCountThetaSketchAggregationFunction
     return 
Math.round(evaluatePostAggregationExpression(_postAggregationExpression, 
mergedSketches).getEstimate());
   }
 
+  // This ensures backward compatibility with servers that still return 
sketches directly.
+  // The AggregationDataTableReducer casts intermediate results to Objects and 
although the code compiles,
+  // types might still be incompatible at runtime due to type erasure.
+  // Due to performance overheads of redundant casts, this should be removed 
at some future point.
+  private ThetaSketchAccumulator convertSketchAccumulator(Object mergeResult) {
+    if (mergeResult instanceof Sketch) {
+      Sketch sketch = (Sketch) mergeResult;
+      ThetaSketchAccumulator accumulator = new 
ThetaSketchAccumulator(_setOperationBuilder, _accumulatorThreshold);
+      accumulator.apply(sketch);
+      return accumulator;
+    }
+    return (ThetaSketchAccumulator) mergeResult;
+  }
+
   /**
    * Helper method to collect expressions in the filter.
    */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to