thomasmueller commented on code in PR #2412:
URL: https://github.com/apache/jackrabbit-oak/pull/2412#discussion_r2243055681


##########
oak-blob/src/main/java/org/apache/jackrabbit/oak/spi/blob/split/BlobIdSet.java:
##########
@@ -25,36 +25,34 @@
 import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 
 import org.apache.commons.io.IOUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.jackrabbit.guava.common.cache.Cache;
 import org.apache.jackrabbit.guava.common.cache.CacheBuilder;
-import org.apache.jackrabbit.guava.common.hash.BloomFilter;
-import org.apache.jackrabbit.guava.common.hash.Funnels;
+import org.apache.jackrabbit.oak.commons.collections.BloomFilter;
 
 class BlobIdSet {
 
     private static final Logger log = LoggerFactory.getLogger(BlobIdSet.class);
 
     private final File store;
 
-    private final BloomFilter<CharSequence> bloomFilter;
+    private final BloomFilter bloomFilter;
 
     private final Cache<String, Boolean> cache;
 
     BlobIdSet(String repositoryDir, String filename) {
         store = new File(new File(repositoryDir), filename);
-        bloomFilter = 
BloomFilter.create(Funnels.stringFunnel(StandardCharsets.UTF_8), 9000000); // 
about 8MB
+        bloomFilter = BloomFilter.construct(9000000, 0.01); // 9M entries, 1% 
false positive rate

Review Comment:
   Oh! That is surprisingly high. OK, I'll also use 3%.



##########
oak-commons/pom.xml:
##########
@@ -102,6 +102,10 @@
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-collections4</artifactId>
     </dependency>
+    <dependency>

Review Comment:
   It is needed for MurmurHash3



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to