This is an automated email from the ASF dual-hosted git repository.

reschke pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new a9641650ac Revert "OAK-10674 Use Oak's Bloom filter (#2412)"
a9641650ac is described below

commit a9641650acb173d8a10183f14e8414519a4c646e
Author: Julian Reschke <[email protected]>
AuthorDate: Tue Aug 5 05:09:29 2025 +0100

    Revert "OAK-10674 Use Oak's Bloom filter (#2412)"
    
    This reverts commit c438a3f96a3b7c148a478b098aa59b7a57650c7d.
---
 .../jackrabbit/oak/spi/blob/split/BlobIdSet.java   |  16 +-
 .../oak/spi/blob/split/BlobIdSetTest.java          | 238 ---------------------
 oak-commons/pom.xml                                |   4 -
 .../oak/commons/collections/package-info.java      |   2 +-
 oak-run-commons/pom.xml                            |   5 -
 .../flatfile/analysis/modules/BinaryId.java        |   8 +-
 .../analysis/modules/DistinctBinarySize.java       |   4 +-
 .../modules/DistinctBinarySizeHistogram.java       |   2 +-
 .../flatfile/analysis/modules/PropertyStats.java   |   4 +-
 .../flatfile/analysis/utils}/BloomFilter.java      |  34 +--
 .../document/flatfile/analysis/utils/Hash.java     |  21 +-
 .../flatfile/analysis/utils}/HyperLogLog.java      |   4 +-
 .../flatfile/analysis/utils/TopKValues.java        |   3 +-
 .../index/indexer/document/tree/Prefetcher.java    |   6 +-
 .../analysis/modules/PropertyStatsTest.java        |   2 +-
 .../flatfile/analysis/utils}/BloomFilterTest.java  |  92 +-------
 .../analysis/utils/CountMinSketchTest.java         |   5 +-
 .../analysis/utils/HyperLogLog3Linear64Test.java   | 165 --------------
 .../flatfile/analysis/utils}/HyperLogLogTest.java  |  39 +++-
 .../flatfile/analysis/utils/TopKValuesTest.java    |  10 +-
 .../document/cache/CacheChangesTracker.java        |  44 ++--
 .../cache/CacheChangesTrackerConcurrencyTest.java  | 215 -------------------
 22 files changed, 108 insertions(+), 815 deletions(-)

diff --git 
a/oak-blob/src/main/java/org/apache/jackrabbit/oak/spi/blob/split/BlobIdSet.java
 
b/oak-blob/src/main/java/org/apache/jackrabbit/oak/spi/blob/split/BlobIdSet.java
index 49acb59232..e62084fcfe 100644
--- 
a/oak-blob/src/main/java/org/apache/jackrabbit/oak/spi/blob/split/BlobIdSet.java
+++ 
b/oak-blob/src/main/java/org/apache/jackrabbit/oak/spi/blob/split/BlobIdSet.java
@@ -25,6 +25,7 @@ import java.io.FileNotFoundException;
 import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 
 import org.apache.commons.io.IOUtils;
 import org.slf4j.Logger;
@@ -32,7 +33,8 @@ import org.slf4j.LoggerFactory;
 
 import org.apache.jackrabbit.guava.common.cache.Cache;
 import org.apache.jackrabbit.guava.common.cache.CacheBuilder;
-import org.apache.jackrabbit.oak.commons.collections.BloomFilter;
+import org.apache.jackrabbit.guava.common.hash.BloomFilter;
+import org.apache.jackrabbit.guava.common.hash.Funnels;
 
 class BlobIdSet {
 
@@ -40,19 +42,19 @@ class BlobIdSet {
 
     private final File store;
 
-    private final BloomFilter bloomFilter;
+    private final BloomFilter<CharSequence> bloomFilter;
 
     private final Cache<String, Boolean> cache;
 
     BlobIdSet(String repositoryDir, String filename) {
         store = new File(new File(repositoryDir), filename);
-        bloomFilter = BloomFilter.construct(9000000, 0.03); // 9M entries, 3% 
false positive rate
+        bloomFilter = 
BloomFilter.create(Funnels.stringFunnel(StandardCharsets.UTF_8), 9000000); // 
about 8MB
         cache = CacheBuilder.newBuilder().maximumSize(1000).build();
         fillBloomFilter();
     }
 
     synchronized boolean contains(String blobId) throws IOException {
-        if (!bloomFilter.mayContain(blobId)) {
+        if (!bloomFilter.apply(blobId)) {
             return false;
         }
         Boolean cached = cache.getIfPresent(blobId);
@@ -62,7 +64,7 @@ class BlobIdSet {
 
         if (isPresentInStore(blobId)) {
             cache.put(blobId, Boolean.TRUE);
-            bloomFilter.add(blobId);
+            bloomFilter.put(blobId);
             return true;
         } else {
             cache.put(blobId, Boolean.FALSE);
@@ -72,7 +74,7 @@ class BlobIdSet {
 
     synchronized void add(String blobId) throws IOException {
         addToStore(blobId);
-        bloomFilter.add(blobId);
+        bloomFilter.put(blobId);
         cache.put(blobId, Boolean.TRUE);
     }
 
@@ -112,7 +114,7 @@ class BlobIdSet {
             reader = new BufferedReader(new FileReader(store));
             String line;
             while ((line = reader.readLine()) != null) {
-                bloomFilter.add(line);
+                bloomFilter.put(line);
             }
         } catch (IOException e) {
             log.error("Can't fill bloom filter", e);
diff --git 
a/oak-blob/src/test/java/org/apache/jackrabbit/oak/spi/blob/split/BlobIdSetTest.java
 
b/oak-blob/src/test/java/org/apache/jackrabbit/oak/spi/blob/split/BlobIdSetTest.java
deleted file mode 100644
index d933bf29d4..0000000000
--- 
a/oak-blob/src/test/java/org/apache/jackrabbit/oak/spi/blob/split/BlobIdSetTest.java
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.jackrabbit.oak.spi.blob.split;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.util.List;
-
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-public class BlobIdSetTest {
-
-    private static final String TEST_FILENAME = "test-blob-ids.txt";
-
-    private File tempDir;
-    private BlobIdSet blobIdSet;
-
-    @Before
-    public void setup() throws IOException {
-        tempDir = Files.createTempDirectory("blob-id-set-test").toFile();
-        blobIdSet = new BlobIdSet(tempDir.getAbsolutePath(), TEST_FILENAME);
-    }
-
-    @After
-    public void cleanup() {
-        File testFile = new File(tempDir, TEST_FILENAME);
-        if (testFile.exists()) {
-            testFile.delete();
-        }
-        tempDir.delete();
-    }
-
-    @Test
-    public void testAddAndContains() throws IOException {
-        String blobId = "testblob123";
-        Assert.assertFalse("New set should not contain blob ID", 
blobIdSet.contains(blobId));
-
-        blobIdSet.add(blobId);
-        Assert.assertTrue("Set should contain added blob ID", 
blobIdSet.contains(blobId));
-    }
-
-    @Test
-    public void testMultipleAddAndContains() throws IOException {
-        String[] blobIds = {"blob1", "blob2", "blob3", "blob4", "blob5"};
-
-        // Add all blob IDs
-        for (String blobId : blobIds) {
-            blobIdSet.add(blobId);
-        }
-
-        // Check all blob IDs are present
-        for (String blobId : blobIds) {
-            Assert.assertTrue("Set should contain: " + blobId, 
blobIdSet.contains(blobId));
-        }
-
-        // Check a non-existent blob ID
-        Assert.assertFalse("Set should not contain non-existent blob ID", 
blobIdSet.contains("nonexistentblob"));
-    }
-
-    @Test
-    public void testPersistenceAcrossInstances() throws IOException {
-        String blobId = "persistenceblob";
-
-        // Add to the first instance
-        blobIdSet.add(blobId);
-
-        // Create a new instance pointing to the same file
-        BlobIdSet newSet = new BlobIdSet(tempDir.getAbsolutePath(), 
TEST_FILENAME);
-
-        // Verify the new instance sees the previously added blob ID
-        Assert.assertTrue("New instance should see blob ID from file", 
newSet.contains(blobId));
-    }
-
-    @Test
-    public void testEmptyFileStore() throws IOException {
-        // Create with non-existent file
-        File nonExistentDir = 
Files.createTempDirectory("non-existent").toFile();
-        BlobIdSet emptySet = new BlobIdSet(nonExistentDir.getAbsolutePath(), 
"empty.txt");
-
-        // Should not contain any blob IDs
-        Assert.assertFalse(emptySet.contains("anyblob"));
-
-        // Clean up
-        nonExistentDir.delete();
-    }
-
-    @Test
-    public void testLargeNumberOfEntries() throws IOException {
-        // Add a moderate number of entries
-        int count = 1000;
-        for (int i = 0; i < count; i++) {
-            blobIdSet.add("blob-" + i);
-        }
-
-        // Verify all entries can be found
-        for (int i = 0; i < count; i++) {
-            Assert.assertTrue(blobIdSet.contains("blob-" + i));
-        }
-
-        // Non-existent entries should return false
-        for (int i = 0; i < 10; i++) {
-            Assert.assertFalse(blobIdSet.contains("nonexistent-blob-" + i));
-        }
-    }
-
-    @Test
-    public void testFileContainsAddedEntries() throws IOException {
-        // Add several blob IDs
-        String[] blobIds = {"a", "b", "c"};
-        for (String id : blobIds) {
-            blobIdSet.add(id);
-        }
-
-        // Verify the file contains the added blob IDs
-        File storeFile = new File(tempDir, TEST_FILENAME);
-        Assert.assertTrue("Store file should exist", storeFile.exists());
-
-        List<String> fileContent = Files.readAllLines(storeFile.toPath());
-        Assert.assertEquals("File should contain all added blob IDs", 
blobIds.length, fileContent.size());
-
-        for (int i = 0; i < blobIds.length; i++) {
-            Assert.assertEquals("File line should match blob ID", blobIds[i], 
fileContent.get(i));
-        }
-    }
-
-    @Test
-    public void testBloomFilterPreventsUnnecessaryFileReads() throws 
IOException {
-        // The bloom filter should prevent checking the file for non-existent 
IDs
-
-        // Add some entries to populate the bloom filter
-        for (int i = 0; i < 10; i++) {
-            blobIdSet.add("existing-" + i);
-        }
-
-        // Using a unique pattern for non-existent IDs to ensure they hash 
differently
-        // than the ones we added
-        for (int i = 0; i < 10; i++) {
-            Assert.assertFalse(blobIdSet.contains("definitely-not-there-" + 
i));
-        }
-    }
-
-    @Test
-    public void testCachingBehavior() throws IOException {
-        String blobId = "cachedblob";
-
-        // Add the blob ID
-        blobIdSet.add(blobId);
-
-        // First check should populate cache
-        Assert.assertTrue(blobIdSet.contains(blobId));
-
-        // Even if we delete the file, the cached result should be used
-        File storeFile = new File(tempDir, TEST_FILENAME);
-        storeFile.delete();
-
-        // Should still return true due to cache
-        Assert.assertTrue(blobIdSet.contains(blobId));
-    }
-
-    @Test
-    public void testContainsFindsExistingEntriesInFile() throws IOException {
-        // Create some blob IDs
-        String[] blobIds = {"fileblob1", "fileblob2", "fileblob3"};
-
-        // Write blob IDs directly to file (not using BlobIdSet.add())
-        File storeFile = new File(tempDir, TEST_FILENAME);
-        try (FileWriter writer = new FileWriter(storeFile)) {
-            for (String id : blobIds) {
-                writer.write(id + "\n");
-            }
-        }
-
-        // Create a new BlobIdSet instance (which should load from the file)
-        BlobIdSet newBlobIdSet = new BlobIdSet(tempDir.getAbsolutePath(), 
TEST_FILENAME);
-
-        // Verify that contains() finds all the IDs
-        for (String id : blobIds) {
-            Assert.assertTrue("Should contain blob ID written directly to 
file: " + id, newBlobIdSet.contains(id));
-        }
-
-        // Verify a non-existent ID still returns false
-        Assert.assertFalse(newBlobIdSet.contains("notinfile"));
-    }
-
-    @Test
-    public void testBloomFilterFalsePositiveProbabilityLessThanThreePercent() 
throws IOException {
-        // Load the bloom filter with a significant number of entries (about 
5% of configured capacity)
-        final int numToAdd = 5000;
-
-        // Add entries to the bloom filter
-        for (int i = 0; i < numToAdd; i++) {
-            blobIdSet.add("entry-" + i);
-        }
-
-        // Test with non-existent entries using carefully crafted IDs
-        int numTests = 1000;
-        int falsePositives = 0;
-
-        // Use a distinct prefix to ensure test IDs don't conflict with added 
entries
-        for (int i = 0; i < numTests; i++) {
-            String nonExistentId = "non-existent-" + i;
-
-            if (blobIdSet.contains(nonExistentId)) {
-                falsePositives++;
-            }
-        }
-
-        final double falsePositiveRate = (double) falsePositives / numTests;
-
-        // Verify the false positive rate is below the configured 3%
-        Assert.assertTrue(
-                "False positive rate should be less than 3%, was: " + 
(falsePositiveRate * 100) + "%",
-                falsePositiveRate < 0.03
-        );
-    }
-}
\ No newline at end of file
diff --git a/oak-commons/pom.xml b/oak-commons/pom.xml
index 8b774a03dc..5e8396cd65 100644
--- a/oak-commons/pom.xml
+++ b/oak-commons/pom.xml
@@ -102,10 +102,6 @@
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-collections4</artifactId>
     </dependency>
-    <dependency>
-      <groupId>commons-codec</groupId>
-      <artifactId>commons-codec</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.jackrabbit</groupId>
       <artifactId>jackrabbit-jcr-commons</artifactId>
diff --git 
a/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/collections/package-info.java
 
b/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/collections/package-info.java
index 5d1490f210..81c20db5ca 100644
--- 
a/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/collections/package-info.java
+++ 
b/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/collections/package-info.java
@@ -21,7 +21,7 @@
  * Utilities for Java collections and streams.
  */
 @Internal(since = "1.0.0")
-@Version("2.3.0")
+@Version("2.2.0")
 package org.apache.jackrabbit.oak.commons.collections;
 import org.apache.jackrabbit.oak.commons.annotations.Internal;
 import org.osgi.annotation.versioning.Version;
diff --git a/oak-run-commons/pom.xml b/oak-run-commons/pom.xml
index efd3657e51..e1288dccc2 100644
--- a/oak-run-commons/pom.xml
+++ b/oak-run-commons/pom.xml
@@ -110,11 +110,6 @@
             <artifactId>oak-jcr</artifactId>
             <version>${project.version}</version>
         </dependency>
-        <dependency>
-            <groupId>org.apache.jackrabbit</groupId>
-            <artifactId>oak-commons</artifactId>
-            <version>${project.version}</version>
-        </dependency>
 
         <dependency>
             <groupId>org.jetbrains</groupId>
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/BinaryId.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/BinaryId.java
index 741c1cceae..3ec93f175d 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/BinaryId.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/BinaryId.java
@@ -20,7 +20,7 @@ package 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.modul
 
 import java.util.Objects;
 
-import org.apache.jackrabbit.oak.commons.collections.HashUtils;
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.Hash;
 
 /**
  * A binary id.
@@ -51,9 +51,9 @@ public class BinaryId {
         // we need to hash again because some of the bits are fixed
         // in case of UUIDs: always a "4" here: xxxxxxxx-xxxx-4xxx
         // (the hash64 is a reversible mapping, so there is no risk of 
conflicts)
-        this.v0 = HashUtils.hash64(Long.parseUnsignedLong(buff.substring(0, 
16), 16));
-        this.v1 = HashUtils.hash64(Long.parseUnsignedLong(buff.substring(16, 
32), 16));
-        this.v2 = HashUtils.hash64(Long.parseUnsignedLong(buff.substring(32, 
Math.min(48, buff.length())), 16));
+        this.v0 = Hash.hash64(Long.parseUnsignedLong(buff.substring(0, 16), 
16));
+        this.v1 = Hash.hash64(Long.parseUnsignedLong(buff.substring(16, 32), 
16));
+        this.v2 = Hash.hash64(Long.parseUnsignedLong(buff.substring(32, 
Math.min(48, buff.length())), 16));
     }
 
     @Override
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.java
index 878375f1f8..272c7f357b 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.java
@@ -28,8 +28,8 @@ import java.util.stream.Collectors;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeData;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeProperty;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeProperty.ValueType;
-import org.apache.jackrabbit.oak.commons.collections.BloomFilter;
-import org.apache.jackrabbit.oak.commons.collections.HyperLogLog;
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.BloomFilter;
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.HyperLogLog;
 
 /**
  * Collects the number and size of distinct binaries.
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySizeHistogram.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySizeHistogram.java
index 061aa39d52..349a3fe35b 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySizeHistogram.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySizeHistogram.java
@@ -27,7 +27,7 @@ import java.util.stream.Collectors;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeData;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeProperty;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeProperty.ValueType;
-import org.apache.jackrabbit.oak.commons.collections.HyperLogLog;
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.HyperLogLog;
 
 /**
  * A histogram of distinct binaries. For each size range, we calculate the
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/PropertyStats.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/PropertyStats.java
index c749a1ab59..80227f5744 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/PropertyStats.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/PropertyStats.java
@@ -31,7 +31,7 @@ import java.util.stream.Collectors;
 
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeData;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeProperty;
-import org.apache.jackrabbit.oak.commons.collections.HashUtils;
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.Hash;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.HyperLogLog3Linear64;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.TopKValues;
 
@@ -123,7 +123,7 @@ public class PropertyStats implements StatsCollector {
         stats.values += p.getValues().length;
         if (stats.count > MIN_PROPERTY_COUNT) {
             for (String v : p.getValues()) {
-                long hash = HashUtils.hash64(v.hashCode(), seed);
+                long hash = Hash.hash64(v.hashCode(), seed);
                 stats.hll = HyperLogLog3Linear64.add(stats.hll, hash);
                 stats.size += v.length();
                 stats.maxSize = Math.max(stats.maxSize, v.length());
diff --git 
a/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/collections/BloomFilter.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/BloomFilter.java
similarity index 83%
rename from 
oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/collections/BloomFilter.java
rename to 
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/BloomFilter.java
index 3c266ea1bb..3ad439a909 100644
--- 
a/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/collections/BloomFilter.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/BloomFilter.java
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.jackrabbit.oak.commons.collections;
+package 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils;
 
 /**
  * A Bloom filter implementation.
@@ -44,12 +44,6 @@ public class BloomFilter {
      * @return the Bloom filter
      */
     public static BloomFilter construct(long n, double fpp) {
-        if (n <= 0) {
-            throw new IllegalArgumentException("n must be greater than 0");
-        }
-        if (fpp <= 0 || fpp >= 1) {
-            throw new IllegalArgumentException("fpp must be between 0 and 1");
-        }
         long m = calculateBits(n, fpp);
         int k = calculateK((double) m / n);
         return new BloomFilter(new long[(int) ((m + 63) / 64)], k);
@@ -102,15 +96,6 @@ public class BloomFilter {
         return Math.pow(1 - Math.exp(-k / ((double) bits / n)), k);
     }
 
-    /**
-     * Add an entry, using an int hash code.
-     *
-     * @param value the value to check
-     */
-    public void add(String value) {
-        add(HashUtils.hash64(value));
-    }
-
     /**
      * Add an entry.
      *
@@ -121,22 +106,11 @@ public class BloomFilter {
         long a = (hash >>> 32) | (hash << 32);
         long b = hash;
         for (int i = 0; i < k; i++) {
-            data[HashUtils.reduce((int) (a >>> 32), arraySize)] |= 1L << a;
+            data[Hash.reduce((int) (a >>> 32), arraySize)] |= 1L << a;
             a += b;
         }
     }
 
-    /**
-     * Whether the entry may be in the set.
-     *
-     * @param value the value to check
-     * @return true if the entry was added, or, with a certain false positive
-     *         probability, even if it was not added
-     */
-    public boolean mayContain(String value) {
-        return mayContain(HashUtils.hash64(value));
-    }
-
     /**
      * Whether the entry may be in the set.
      *
@@ -149,7 +123,7 @@ public class BloomFilter {
         long a = (hash >>> 32) | (hash << 32);
         long b = hash;
         for (int i = 0; i < k; i++) {
-            if ((data[HashUtils.reduce((int) (a >>> 32), arraySize)] & 1L << 
a) == 0) {
+            if ((data[Hash.reduce((int) (a >>> 32), arraySize)] & 1L << a) == 
0) {
                 return false;
             }
             a += b;
@@ -185,4 +159,4 @@ public class BloomFilter {
         return (long) (-(m / k) * Math.log(1 - (x / m)));
     }
 
-} 
\ No newline at end of file
+}
\ No newline at end of file
diff --git 
a/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/collections/HashUtils.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/Hash.java
similarity index 83%
rename from 
oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/collections/HashUtils.java
rename to 
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/Hash.java
index 90d998664c..a2cd599e7b 100644
--- 
a/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/collections/HashUtils.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/Hash.java
@@ -16,30 +16,17 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.jackrabbit.oak.commons.collections;
-
-import java.nio.charset.StandardCharsets;
-import org.apache.commons.codec.digest.MurmurHash3;
+package 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils;
 
 /**
  * A hash function utility class.
  */
-public class HashUtils {
+public class Hash {
 
-    private HashUtils() {
+    private Hash() {
         // utility class
     }
 
-    /**
-     * Calculate a 64-bit hash value from a string.
-     *
-     * @param s the string
-     * @return the hash value
-     */
-    public static long hash64(String s) {
-        return MurmurHash3.hash128(s.getBytes(StandardCharsets.UTF_8))[0];
-    }
-
     /**
      * Calculate a 64-bit hash value from a value, using a seed.
      *
@@ -83,4 +70,4 @@ public class HashUtils {
         return (int) (((hash & 0xffffffffL) * (n & 0xffffffffL)) >>> 32);
     }
 
-} 
\ No newline at end of file
+}
diff --git 
a/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/collections/HyperLogLog.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/HyperLogLog.java
similarity index 97%
rename from 
oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/collections/HyperLogLog.java
rename to 
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/HyperLogLog.java
index 2054decff0..dd20a1e777 100644
--- 
a/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/collections/HyperLogLog.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/HyperLogLog.java
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.jackrabbit.oak.commons.collections;
+package 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils;
 
 import java.util.HashSet;
 
@@ -92,4 +92,4 @@ public class HyperLogLog {
         return est;
     }
 
-} 
\ No newline at end of file
+}
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/TopKValues.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/TopKValues.java
index 48b4a63b0b..2c235f2197 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/TopKValues.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/TopKValues.java
@@ -21,7 +21,6 @@ package 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils
 import java.util.ArrayList;
 import java.util.HashMap;
 
-import org.apache.jackrabbit.oak.commons.collections.HashUtils;
 import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
 
 /**
@@ -92,7 +91,7 @@ public class TopKValues {
         if (countedCount > 1000) {
             skipRemaining = SKIP;
         }
-        long hash = HashUtils.hash64(value);
+        long hash = Hash.hash64(value.hashCode());
         long est = sketch.addAndEstimate(hash);
         if (est < min) {
             return;
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/Prefetcher.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/Prefetcher.java
index 8f4e6d2cac..10992f5c06 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/Prefetcher.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/tree/Prefetcher.java
@@ -34,8 +34,8 @@ import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.api.PropertyState;
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry;
-import org.apache.jackrabbit.oak.commons.collections.HashUtils;
-import org.apache.jackrabbit.oak.commons.collections.HyperLogLog;
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.Hash;
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.HyperLogLog;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -240,7 +240,7 @@ public class Prefetcher {
         // and then we use a secondary hash
         // otherwise the estimation is way off
         int h = blob.getContentIdentity().hashCode();
-        return HashUtils.hash64(h | (blob.length() << 32));
+        return Hash.hash64(h | (blob.length() << 32));
     }
 
     static enum PrefetchType {
diff --git 
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/PropertyStatsTest.java
 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/PropertyStatsTest.java
index 5fe819335d..7803a54400 100644
--- 
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/PropertyStatsTest.java
+++ 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/PropertyStatsTest.java
@@ -67,7 +67,7 @@ public class PropertyStatsTest {
             pc.add(n);
         }
         assertEquals("PropertyStats\n"
-                + "skewed weight 3 count 1000000 distinct 394382 avgSize 7 
maxSize 11 top 
{\"skipped\":899091,\"counted\":90910,\"false\":25618,\"true\":25543,\"-411461567\":1,\"1483286044\":1,\"1310925467\":1,\"-1752252714\":1,\"-1433290908\":1,\"-1209544007\":1}\n"
+                + "skewed weight 3 count 1000000 distinct 394382 avgSize 7 
maxSize 11 top 
{\"skipped\":899091,\"counted\":90910,\"false\":25583,\"true\":25518,\"-411461567\":1,\"1483286044\":1,\"1310925467\":1,\"-1752252714\":1,\"-1433290908\":1,\"-1209544007\":1}\n"
                 + "", pc.toString());
     }
 
diff --git 
a/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/collections/BloomFilterTest.java
 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/BloomFilterTest.java
similarity index 61%
rename from 
oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/collections/BloomFilterTest.java
rename to 
oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/BloomFilterTest.java
index 464de8c4ef..c3813f340d 100644
--- 
a/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/collections/BloomFilterTest.java
+++ 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/BloomFilterTest.java
@@ -16,12 +16,10 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.jackrabbit.oak.commons.collections;
+package 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThrows;
 
 import org.junit.Test;
 
@@ -63,6 +61,9 @@ public class BloomFilterTest {
         BloomFilter f = BloomFilter.construct(100, 0.01);
         assertEquals(960, f.getBitCount());
         assertEquals(7, f.getK());
+        f = BloomFilter.construct(0, 0.01);
+        assertEquals(0, f.getBitCount());
+        assertEquals(1, f.getK());
     }
 
     @Test
@@ -71,14 +72,14 @@ public class BloomFilterTest {
             int size = 500_000;
             BloomFilter f = BloomFilter.construct(size, fpp);
             for (int i = 0; i < size; i++) {
-                f.add(HashUtils.hash64(i));
+                f.add(Hash.hash64(i));
             }
             for (int i = 0; i < size; i++) {
-                assertTrue(f.mayContain(HashUtils.hash64(i)));
+                assertTrue(f.mayContain(Hash.hash64(i)));
             }
             int falsePositives = 0;
             for (int i = 0; i < size; i++) {
-                if (f.mayContain(HashUtils.hash64(i + size))) {
+                if (f.mayContain(Hash.hash64(i + size))) {
                     falsePositives++;
                 }
             }
@@ -104,7 +105,7 @@ public class BloomFilterTest {
         HyperLogLog hll = new HyperLogLog(1024, 0);
         // now we calculate estimations with both the Bloom filter and 
HyperLogLog
         for(int i = 0; i < 20_000; i++) {
-            long x = HashUtils.hash64(i);
+            long x = Hash.hash64(i);
             bloom.add(x);
             hll.add(x);
             if (i > 0 && i % 1000 == 0) {
@@ -122,79 +123,4 @@ public class BloomFilterTest {
         }
     }
 
-    @Test
-    public void filterFunctionality() {
-        BloomFilter filter = BloomFilter.construct(100, 0.01);
-        String testValue = "test-value";
-
-        // Initially should not contain anything
-        assertFalse(filter.mayContain(testValue));
-
-        // Add the item and verify it's found
-        filter.add(testValue);
-        assertTrue(filter.mayContain(testValue));
-
-        // Verify another value is not found
-        assertFalse(filter.mayContain("different-value"));
-    }
-
-    @Test
-    public void filterWithMultipleEntries() {
-        BloomFilter filter = BloomFilter.construct(100, 0.01);
-
-        // Add multiple entries
-        for (int i = 0; i < 100; i++) {
-            filter.add("value-" + i);
-        }
-
-        // Verify all entries are found
-        for (int i = 0; i < 100; i++) {
-            assertTrue(filter.mayContain("value-" + i));
-        }
-    }
-
-    @Test
-    public void falsePositiveProbability() {
-        // Create a filter with high false positive probability for testing
-        double fpp = 0.3;
-        BloomFilter filter = BloomFilter.construct(100, fpp);
-
-        // Fill the filter to capacity
-        for (int i = 0; i < 100; i++) {
-            filter.add("existing-" + i);
-        }
-
-        // Test with values not in the filter
-        int falsePositives = 0;
-        int trials = 1000;
-
-        for (int i = 0; i < trials; i++) {
-            if (filter.mayContain("nonexistent-" + i)) {
-                falsePositives++;
-            }
-        }
-
-        // The false positive rate should be approximately fpp
-        double actualFpp = (double) falsePositives / trials;
-        assertTrue("False positive rate should be close to expected, got " + 
actualFpp + " expected " + fpp, Math.abs(actualFpp - fpp) < 0.15);
-    }
-
-    @Test
-    public void invalidEntries() {
-        // Should throw exception for entries < 1
-        assertThrows(IllegalArgumentException.class,() -> 
BloomFilter.construct(0, 0.01));
-    }
-
-    @Test
-    public void invalidFppZero() {
-        // Should throw exception for fpp <= 0
-        assertThrows(IllegalArgumentException.class,() -> 
BloomFilter.construct(100, 0.0));
-    }
-
-    @Test
-    public void invalidFppOne() {
-        // Should throw exception for fpp >= 1
-        assertThrows(IllegalArgumentException.class,() -> 
BloomFilter.construct(100, 1.0));
-    }    
-
-} 
\ No newline at end of file
+}
diff --git 
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/CountMinSketchTest.java
 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/CountMinSketchTest.java
index 212c0fba22..92935ca286 100644
--- 
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/CountMinSketchTest.java
+++ 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/CountMinSketchTest.java
@@ -24,7 +24,6 @@ import static org.junit.Assert.assertTrue;
 import java.util.Arrays;
 import java.util.Random;
 
-import org.apache.jackrabbit.oak.commons.collections.HashUtils;
 import org.junit.Test;
 
 public class CountMinSketchTest {
@@ -84,11 +83,11 @@ public class CountMinSketchTest {
                     }
                     CountMinSketch est = new CountMinSketch(5, 16);
                     for (int i = 0; i < size; i++) {
-                        est.add(HashUtils.hash64(x + data[i]));
+                        est.add(Hash.hash64(x + data[i]));
                     }
                     int[] counts = getCounts(data);
                     for (int i = 0; i < 10; i++) {
-                        long e = est.estimate(HashUtils.hash64(x + i));
+                        long e = est.estimate(Hash.hash64(x + i));
                         long expectedPercent = (int) (100. * counts[i] / size);
                         long estPercent = (int) (100. * e / size);
                         if (debug) {
diff --git 
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/HyperLogLog3Linear64Test.java
 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/HyperLogLog3Linear64Test.java
deleted file mode 100644
index eb7ab9a692..0000000000
--- 
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/HyperLogLog3Linear64Test.java
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils;
-
-import static org.junit.Assert.assertTrue;
-
-import org.apache.jackrabbit.oak.commons.collections.HashUtils;
-import org.apache.jackrabbit.oak.commons.collections.HyperLogLog;
-import org.junit.Test;
-
-public class HyperLogLog3Linear64Test {
-
-    @Test
-    public void test() {
-        int testCount = 50;
-        for (int m = 8; m <= 128; m *= 2) {
-            double avg = Math.sqrt(averageOverRange(m, 30_000, testCount, 
false, 2));
-            int min, max;
-            switch (m) {
-            case 8:
-                min = 16;
-                max = 17;
-                break;
-            case 16:
-                min = 22;
-                max = 23;
-                break;
-            case 32:
-                min = 15;
-                max = 16;
-                break;
-            case 64:
-                min = 10;
-                max = 11;
-                break;
-            case 128:
-                min = 7;
-                max = 8;
-                break;
-            default:
-                min = 0;
-                max = 0;
-                break;
-            }
-            assertTrue("m " + m + " expected " + min + ".." + max + " got " + 
avg, min < avg && avg < max);
-        }
-    }
-
-    private static double averageOverRange(int m, long maxSize, int testCount, 
boolean debug, double exponent) {
-        double sum = 0;
-        int count = 0;
-        for (long size = 1; size <= 20; size++) {
-            sum += test(m, size, testCount, debug, exponent);
-            count++;
-        }
-        for (long size = 22; size <= 300; size += size / 5) {
-            sum += test(m, size, testCount, debug, exponent);
-            count++;
-        }
-        for (long size = 400; size <= maxSize; size *= 2) {
-            sum += test(m, size, testCount, debug, exponent);
-            count++;
-        }
-        return sum / count;
-    }
-
-    private static double test(int m, long size, int testCount, boolean debug, 
double exponent) {
-        long x = 0;
-        long min = Long.MAX_VALUE, max = Long.MIN_VALUE;
-        long ns = System.nanoTime();
-        double sumSquareError = 0;
-        double sum = 0;
-        double sumFirst = 0;
-        int repeat = 10;
-        int runs = 2;
-        for (int test = 0; test < testCount; test++) {
-            HyperLogLog hll;
-            if (m == 8) {
-                hll = new HyperLogLogUsingLong(16, 0);
-            } else {
-                hll = new HyperLogLog(m, 0);
-            }
-            long baseX = x;
-            for (int i = 0; i < size; i++) {
-                hll.add(HashUtils.hash64(x));
-                x++;
-            }
-            long e = hll.estimate();
-            sum += e;
-            min = Math.min(min, e);
-            max = Math.max(max, e);
-            long error = e - size;
-            sumSquareError += error * error;
-            sumFirst += e;
-            for (int add = 0; add < repeat; add++) {
-                long x2 = baseX;
-                for (int i = 0; i < size; i++) {
-                    hll.add(HashUtils.hash64(x2));
-                    x2++;
-                }
-            }
-            e = hll.estimate();
-            sum += e;
-            min = Math.min(min, e);
-            max = Math.max(max, e);
-            error = e - size;
-            sumSquareError += error * error;
-        }
-        ns = System.nanoTime() - ns;
-        long nsPerItem = ns / testCount / runs / (1 + repeat) / size;
-        double stdDev = Math.sqrt(sumSquareError / testCount / runs);
-        double relStdDevP = stdDev / size * 100;
-        int biasFirstP = (int) (100 * (sumFirst / testCount / size) - 100);
-        int biasP = (int) (100 * (sum / testCount / runs / size) - 100);
-        if (debug) {
-            System.out.println("m " + m + " size " + size + " relStdDev% " + 
(int) relStdDevP +
-                    " range " + min + ".." + max +
-                    " biasFirst% " + biasFirstP +
-                    " bias% " + biasP +
-                    " avg " + (sum / testCount / runs) +
-                    " time " + nsPerItem);
-        }
-        // we try to reduce the relStdDevP, make sure there are no large values
-        // (trying to reduce sumSquareError directly
-        // would mean we care more about larger sets, but we don't)
-        return Math.pow(relStdDevP, exponent);
-    }
-
-    static class HyperLogLogUsingLong extends HyperLogLog {
-
-        private long value;
-
-        public HyperLogLogUsingLong(int m, int maxSmallSetSize) {
-            super(m, maxSmallSetSize);
-        }
-
-        @Override
-        public void add(long hash) {
-            value = HyperLogLog3Linear64.add(value, hash);
-        }
-
-        @Override
-        public long estimate() {
-            return HyperLogLog3Linear64.estimate(value);
-        }
-
-    }
-
-} 
\ No newline at end of file
diff --git 
a/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/collections/HyperLogLogTest.java
 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/HyperLogLogTest.java
similarity index 84%
rename from 
oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/collections/HyperLogLogTest.java
rename to 
oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/HyperLogLogTest.java
index fd3fb39bff..570af9f16f 100644
--- 
a/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/collections/HyperLogLogTest.java
+++ 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/HyperLogLogTest.java
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.jackrabbit.oak.commons.collections;
+package 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
@@ -53,10 +53,14 @@ public class HyperLogLogTest {
     @Test
     public void test() {
         int testCount = 50;
-        for (int m = 16; m <= 128; m *= 2) {
+        for (int m = 8; m <= 128; m *= 2) {
             double avg = Math.sqrt(averageOverRange(m, 30_000, testCount, 
false, 2));
             int min, max;
             switch (m) {
+            case 8:
+                min = 16;
+                max = 17;
+                break;
             case 16:
                 min = 22;
                 max = 23;
@@ -78,6 +82,7 @@ public class HyperLogLogTest {
                 max = 0;
                 break;
             }
+            // System.out.println(type + " expected " + min + ".." + max + " 
got " + avg);
             assertTrue("m " + m + " expected " + min + ".." + max + " got " + 
avg, min < avg && avg < max);
         }
     }
@@ -111,10 +116,14 @@ public class HyperLogLogTest {
         int runs = 2;
         for (int test = 0; test < testCount; test++) {
             HyperLogLog hll;
-            hll = new HyperLogLog(m, 0);
+            if (m == 8) {
+                hll = new HyperLogLogUsingLong(16, 0);
+            } else {
+                hll = new HyperLogLog(m, 0);
+            }
             long baseX = x;
             for (int i = 0; i < size; i++) {
-                hll.add(HashUtils.hash64(x));
+                hll.add(Hash.hash64(x));
                 x++;
             }
             long e = hll.estimate();
@@ -127,7 +136,7 @@ public class HyperLogLogTest {
             for (int add = 0; add < repeat; add++) {
                 long x2 = baseX;
                 for (int i = 0; i < size; i++) {
-                    hll.add(HashUtils.hash64(x2));
+                    hll.add(Hash.hash64(x2));
                     x2++;
                 }
             }
@@ -158,4 +167,22 @@ public class HyperLogLogTest {
         return Math.pow(relStdDevP, exponent);
     }
 
-} 
\ No newline at end of file
+    static class HyperLogLogUsingLong extends HyperLogLog {
+
+        private long value;
+
+        public HyperLogLogUsingLong(int m, int maxSmallSetSize) {
+            super(m, maxSmallSetSize);
+        }
+
+        public void add(long hash) {
+            value = HyperLogLog3Linear64.add(value, hash);
+        }
+
+        public long estimate() {
+            return HyperLogLog3Linear64.estimate(value);
+        }
+
+    }
+
+}
diff --git 
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/TopKValuesTest.java
 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/TopKValuesTest.java
index dfd36a6eba..696e2e724f 100644
--- 
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/TopKValuesTest.java
+++ 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/utils/TopKValuesTest.java
@@ -30,17 +30,17 @@ public class TopKValuesTest {
     public void test() {
         TopKValues v = new TopKValues(3);
         Random r = new Random(1);
-        for (int i = 0; i < 1000000; i++) {
-            if (r.nextBoolean()) {
+        for(int i=0; i<1000000; i++) {
+            if(r.nextBoolean()) {
                 v.add("common" + r.nextInt(2));
             } else {
                 v.add("rare" + r.nextInt(100));
             }
         }
-        
assertEquals("{\"notSkewed\":5,\"skipped\":908191,\"counted\":91809,\"common0\":24231,\"common1\":23844,\"rare13\":2722}",
 v.toString());
+        
assertEquals("{\"notSkewed\":5,\"skipped\":908191,\"counted\":91809,\"common1\":24849,\"common0\":24652,\"rare13\":2374}",
 v.toString());
         assertEquals(91809, v.getCount());
-        assertEquals(24231, v.getTopCount());
-        assertEquals(23844, v.getSecondCount());
+        assertEquals(24849, v.getTopCount());
+        assertEquals(24652, v.getSecondCount());
     }
 
 }
diff --git 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/cache/CacheChangesTracker.java
 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/cache/CacheChangesTracker.java
index 196f6188d4..14e30cd673 100644
--- 
a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/cache/CacheChangesTracker.java
+++ 
b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/cache/CacheChangesTracker.java
@@ -16,13 +16,14 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.cache;
 
-import org.apache.jackrabbit.oak.commons.collections.BloomFilter;
+import org.apache.jackrabbit.guava.common.hash.BloomFilter;
+import org.apache.jackrabbit.guava.common.hash.Funnel;
+import org.apache.jackrabbit.guava.common.hash.PrimitiveSink;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.Closeable;
 import java.util.List;
-import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Predicate;
 
 public class CacheChangesTracker implements Closeable {
@@ -61,10 +62,10 @@ public class CacheChangesTracker implements Closeable {
         changeTrackers.remove(this);
 
         if (LOG.isDebugEnabled()) {
-            if (lazyBloomFilter.filterRef.get() == null) {
+            if (lazyBloomFilter.filter == null) {
                 LOG.debug("Disposing CacheChangesTracker for {}, no filter was 
needed", keyFilter);
             } else {
-                LOG.debug("Disposing CacheChangesTracker for {}, filter fpp 
was: {}", keyFilter, LazyBloomFilter.FPP);
+                LOG.debug("Disposing CacheChangesTracker for {}, filter fpp 
was: {}", keyFilter, lazyBloomFilter.filter.expectedFpp());
             }
         }
     }
@@ -75,33 +76,38 @@ public class CacheChangesTracker implements Closeable {
 
         private final int entries;
 
-        private final AtomicReference<BloomFilter> filterRef;
+        private volatile BloomFilter<String> filter;
 
         public LazyBloomFilter(int entries) {
             this.entries = entries;
-            this.filterRef = new AtomicReference<>();
         }
 
         public synchronized void put(String entry) {
-            getFilter().add(entry);
+            getFilter().put(entry);
         }
 
         public boolean mightContain(String entry) {
-            BloomFilter f = filterRef.get();
-            return f != null && f.mayContain(entry);
+            if (filter == null) {
+                return false;
+            } else {
+                synchronized (this) {
+                    return filter.mightContain(entry);
+                }
+            }
         }
 
-        private BloomFilter getFilter() {
-            BloomFilter result = filterRef.get();
-            if (result == null) {
-                BloomFilter newFilter = BloomFilter.construct(entries, FPP);
-                if (filterRef.compareAndSet(null, newFilter)) {
-                    result = newFilter;
-                } else {
-                    result = filterRef.get();
-                }
+        private BloomFilter<String> getFilter() {
+            if (filter == null) {
+                filter = BloomFilter.create(new Funnel<String>() {
+                    private static final long serialVersionUID = 
-7114267990225941161L;
+
+                    @Override
+                    public void funnel(String from, PrimitiveSink into) {
+                        into.putUnencodedChars(from);
+                    }
+                }, entries, FPP);
             }
-            return result;
+            return filter;
         }
     }
 }
diff --git 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/cache/CacheChangesTrackerConcurrencyTest.java
 
b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/cache/CacheChangesTrackerConcurrencyTest.java
deleted file mode 100644
index 5d593df76c..0000000000
--- 
a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/cache/CacheChangesTrackerConcurrencyTest.java
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.jackrabbit.oak.plugins.document.cache;
-
-import org.junit.Test;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import static org.junit.Assert.*;
-
-/**
- * Tests for CacheChangesTracker concurrency scenarios, particularly
- * the LazyBloomFilter double-checked locking implementation.
- */
-public class CacheChangesTrackerConcurrencyTest {
-
-    /**
-     * Test concurrent initialization of LazyBloomFilter to ensure
-     * double-checked locking prevents race conditions.
-     */
-    @Test
-    public void testLazyBloomFilterConcurrentInitialization() throws 
InterruptedException {
-        final int threadCount = 20;
-        final int entriesPerThread = 50;
-        final CountDownLatch startLatch = new CountDownLatch(1);
-        final CountDownLatch doneLatch = new CountDownLatch(threadCount);
-        final ExecutorService executor = 
Executors.newFixedThreadPool(threadCount);
-        
-        // Create a LazyBloomFilter instance
-        final CacheChangesTracker.LazyBloomFilter lazyFilter = 
-            new CacheChangesTracker.LazyBloomFilter(1000);
-        
-        final AtomicInteger putOperations = new AtomicInteger(0);
-        final List<Exception> exceptions = Collections.synchronizedList(new 
ArrayList<>());
-        
-        try {
-            // Create multiple threads that will all try to initialize and use 
the filter simultaneously
-            for (int i = 0; i < threadCount; i++) {
-                final int threadId = i;
-                executor.submit(() -> {
-                    try {
-                        // Wait for all threads to be ready
-                        startLatch.await();
-                        
-                        // Each thread adds multiple entries
-                        for (int j = 0; j < entriesPerThread; j++) {
-                            String key = "thread-" + threadId + "-key-" + j;
-                            lazyFilter.put(key);
-                            putOperations.incrementAndGet();
-                            
-                            // Add a small random delay to increase chance of 
race condition
-                            if (j % 10 == 0) {
-                                Thread.sleep(1);
-                            }
-                        }
-                    } catch (Exception e) {
-                        exceptions.add(e);
-                    } finally {
-                        doneLatch.countDown();
-                    }
-                });
-            }
-            
-            // Start all threads simultaneously
-            startLatch.countDown();
-            
-            // Wait for all threads to complete
-            assertTrue("Test timed out", doneLatch.await(30, 
TimeUnit.SECONDS));
-            
-            // Verify no exceptions occurred
-            if (!exceptions.isEmpty()) {
-                fail("Exceptions occurred during concurrent access: " + 
exceptions.get(0));
-            }
-            
-            // Verify all put operations completed
-            assertEquals(threadCount * entriesPerThread, putOperations.get());
-            
-            // Verify the filter works correctly after concurrent 
initialization
-            for (int i = 0; i < threadCount; i++) {
-                for (int j = 0; j < entriesPerThread; j++) {
-                    String key = "thread-" + i + "-key-" + j;
-                    assertTrue("Filter should contain key: " + key, 
lazyFilter.mightContain(key));
-                }
-            }
-            
-            // Verify false positive behavior (some keys that weren't added 
should return false)
-            int falsePositives = 0;
-            int testKeys = 100;
-            for (int i = 0; i < testKeys; i++) {
-                String nonExistentKey = "non-existent-key-" + i;
-                if (lazyFilter.mightContain(nonExistentKey)) {
-                    falsePositives++;
-                }
-            }
-            
-            // With 1000 entries and 1% FPP, we expect roughly 1% false 
positives for non-existent keys
-            // Allow for some variance but it shouldn't be too high
-            assertTrue("False positive rate too high: " + falsePositives + "/" 
+ testKeys, 
-                       falsePositives < testKeys * 0.05); // Allow up to 5% to 
account for variance
-            
-        } finally {
-            executor.shutdown();
-            if (!executor.awaitTermination(5, TimeUnit.SECONDS)) {
-                executor.shutdownNow();
-            }
-        }
-    }
-    
-    /**
-     * Test concurrent put and mightContain operations to ensure thread safety.
-     */
-    @Test
-    public void testLazyBloomFilterConcurrentReadWrite() throws 
InterruptedException {
-        final int threadCount = 10;
-        final int operationsPerThread = 100;
-        final CountDownLatch startLatch = new CountDownLatch(1);
-        final CountDownLatch doneLatch = new CountDownLatch(threadCount);
-        final ExecutorService executor = 
Executors.newFixedThreadPool(threadCount);
-        
-        final CacheChangesTracker.LazyBloomFilter lazyFilter = 
-            new CacheChangesTracker.LazyBloomFilter(2000);
-        
-        final AtomicInteger readOperations = new AtomicInteger(0);
-        final AtomicInteger writeOperations = new AtomicInteger(0);
-        final List<Exception> exceptions = Collections.synchronizedList(new 
ArrayList<>());
-        
-        try {
-            // Create mixed read/write threads
-            for (int i = 0; i < threadCount; i++) {
-                final int threadId = i;
-                final boolean isWriter = (i % 2 == 0);
-                
-                executor.submit(() -> {
-                    try {
-                        startLatch.await();
-                        
-                        for (int j = 0; j < operationsPerThread; j++) {
-                            String key = "mixed-thread-" + threadId + "-key-" 
+ j;
-                            
-                            if (isWriter || j < 10) { // Writers, or first few 
operations of readers
-                                lazyFilter.put(key);
-                                writeOperations.incrementAndGet();
-                            }
-                            
-                            // All threads also do reads
-                            boolean result = lazyFilter.mightContain(key);
-                            readOperations.incrementAndGet();
-                            
-                            // If we just wrote the key, it should definitely 
be found
-                            if (isWriter || j < 10) {
-                                assertTrue("Key should be found after being 
added: " + key, result);
-                            }
-                        }
-                    } catch (Exception e) {
-                        exceptions.add(e);
-                    } finally {
-                        doneLatch.countDown();
-                    }
-                });
-            }
-            
-            startLatch.countDown();
-            assertTrue("Test timed out", doneLatch.await(30, 
TimeUnit.SECONDS));
-            
-            if (!exceptions.isEmpty()) {
-                fail("Exceptions occurred during concurrent read/write: " + 
exceptions.get(0));
-            }
-            
-            assertTrue("Should have performed read operations", 
readOperations.get() > 0);
-            assertTrue("Should have performed write operations", 
writeOperations.get() > 0);
-            
-        } finally {
-            executor.shutdown();
-            if (!executor.awaitTermination(5, TimeUnit.SECONDS)) {
-                executor.shutdownNow();
-            }
-        }
-    }
-    
-    /**
-     * Test that LazyBloomFilter behaves correctly when filter is never 
initialized
-     * (i.e., only mightContain is called, never put).
-     */
-    @Test
-    public void testLazyBloomFilterNoInitialization() {
-        CacheChangesTracker.LazyBloomFilter lazyFilter = 
-            new CacheChangesTracker.LazyBloomFilter(1000);
-        
-        // Should return false for any key when filter is not initialized
-        assertFalse(lazyFilter.mightContain("any-key"));
-        assertFalse(lazyFilter.mightContain("another-key"));
-        assertFalse(lazyFilter.mightContain(""));
-    }
-} 
\ No newline at end of file

Reply via email to