This is an automated email from the ASF dual-hosted git repository.

twalthr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
     new 795a36ee854 [FLINK-39399][table-runtime] Fix integer overflow in 
HyperLogLogPlusPlus causing APPROX_COUNT_DISTINCT undercount
795a36ee854 is described below

commit 795a36ee85403b183514a0aeccc022af0ca6068c
Author: Jim Hughes <[email protected]>
AuthorDate: Wed Apr 8 10:39:38 2026 -0400

    [FLINK-39399][table-runtime] Fix integer overflow in HyperLogLogPlusPlus 
causing APPROX_COUNT_DISTINCT undercount
    
    This closes #27895.
---
 .../aggregate/hyperloglog/HyperLogLogPlusPlus.java |  4 +--
 .../hyperloglog/HyperLogLogPlusPlusTest.java       | 30 ++++++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git 
a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/aggregate/hyperloglog/HyperLogLogPlusPlus.java
 
b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/aggregate/hyperloglog/HyperLogLogPlusPlus.java
index befa3b72a78..2c551d198bf 100644
--- 
a/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/aggregate/hyperloglog/HyperLogLogPlusPlus.java
+++ 
b/flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/aggregate/hyperloglog/HyperLogLogPlusPlus.java
@@ -4946,8 +4946,8 @@ public class HyperLogLogPlusPlus {
             int i = 0;
             int shift = 0;
             while (idx < m && i < REGISTERS_PER_WORD) {
-                long mIdx = (word >>> shift) & REGISTER_WORD_MASK;
-                zInverse += 1.0 / (1 << mIdx);
+                int mIdx = (int) ((word >>> shift) & REGISTER_WORD_MASK);
+                zInverse += 1.0 / (1L << mIdx);
                 if (mIdx == 0) {
                     v += 1.0d;
                 }
diff --git 
a/flink-table/flink-table-runtime/src/test/java/org/apache/flink/table/runtime/functions/aggregate/hyperloglog/HyperLogLogPlusPlusTest.java
 
b/flink-table/flink-table-runtime/src/test/java/org/apache/flink/table/runtime/functions/aggregate/hyperloglog/HyperLogLogPlusPlusTest.java
index e06ad04ec93..af3a064068f 100644
--- 
a/flink-table/flink-table-runtime/src/test/java/org/apache/flink/table/runtime/functions/aggregate/hyperloglog/HyperLogLogPlusPlusTest.java
+++ 
b/flink-table/flink-table-runtime/src/test/java/org/apache/flink/table/runtime/functions/aggregate/hyperloglog/HyperLogLogPlusPlusTest.java
@@ -159,6 +159,36 @@ class HyperLogLogPlusPlusTest {
         }
     }
 
+    @Test
+    void testQueryWithRegisterValuesAbove32() {
+        // Directly construct an HLL buffer where every register holds value 
35 (>= 32).
+        // Before the fix, "1 << mIdx" used int shift which wraps for mIdx >= 
32
+        // (1 << 35 == 1 << 3 == 8), producing incorrect estimates.
+        // The fix uses "1L << mIdx" (long shift) so 1L << 35 == 34359738368.
+        int registerValue = 35;
+        HyperLogLogPlusPlus hll = new HyperLogLogPlusPlus(0.01);
+        HllBuffer buffer = createHllBuffer(hll);
+
+        // Pack each word with 10 registers (6 bits each) all set to 
registerValue.
+        for (int w = 0; w < hll.getNumWords(); w++) {
+            long word = 0L;
+            for (int r = 0; r < 10; r++) {
+                word |= ((long) registerValue) << (r * 6);
+            }
+            buffer.array[w] = word;
+        }
+
+        long estimate = hll.query(buffer);
+
+        // With correct long shift, the estimate should be astronomically large
+        // (on the order of alpha * m * 2^35 ~ 4e14).
+        // With the buggy int shift, the estimate would be around 95K.
+        // Assert the estimate is at least 1e12 to catch the int-shift bug.
+        assertThat(estimate)
+                .as("Estimate should reflect long-shift math for register 
values >= 32")
+                .isGreaterThan(1_000_000_000_000L);
+    }
+
     public HllBuffer createHllBuffer(HyperLogLogPlusPlus hll) {
         HllBuffer buffer = new HllBuffer();
         buffer.array = new long[hll.getNumWords()];

Reply via email to