This is an automated email from the ASF dual-hosted git repository.

jiangtian pushed a commit to branch mem_usage_refinement
in repository https://gitbox.apache.org/repos/asf/tsfile.git

commit c143d885f2bf0aa9cf639a5b8944b300e44d452d
Author: Tian Jiang <[email protected]>
AuthorDate: Wed Apr 23 11:26:05 2025 +0800

    revert bloom filter
---
 .../java/org/apache/tsfile/utils/BloomFilter.java  | 110 +--------------------
 .../apache/tsfile/write/writer/TsFileIOWriter.java |   2 +-
 2 files changed, 6 insertions(+), 106 deletions(-)

diff --git a/java/tsfile/src/main/java/org/apache/tsfile/utils/BloomFilter.java 
b/java/tsfile/src/main/java/org/apache/tsfile/utils/BloomFilter.java
index d4a0f9f5..51a01ce2 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/utils/BloomFilter.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/utils/BloomFilter.java
@@ -19,18 +19,12 @@
 package org.apache.tsfile.utils;
 
 import org.apache.tsfile.common.conf.TSFileConfig;
-import org.apache.tsfile.common.conf.TSFileDescriptor;
-import org.apache.tsfile.file.metadata.IDeviceID;
-import org.apache.tsfile.file.metadata.IDeviceID.Factory;
-import org.apache.tsfile.read.common.FullPath;
 import org.apache.tsfile.read.common.Path;
 
 import java.io.IOException;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
-import java.util.ArrayList;
 import java.util.BitSet;
-import java.util.List;
 import java.util.Objects;
 
 import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfLongArray;
@@ -49,35 +43,18 @@ public class BloomFilter {
   private final int size;
   private final int hashFunctionSize;
   private final BitSet bits;
-  private final boolean useCheapHash;
 
   // do not try to initialize the filter by construction method
   private BloomFilter(byte[] bytes, int size, int hashFunctionSize) {
     this.size = size;
     this.hashFunctionSize = hashFunctionSize;
     bits = BitSet.valueOf(bytes);
-    useCheapHash = false;
   }
 
   private BloomFilter(int size, int hashFunctionSize) {
     this.size = size;
     this.hashFunctionSize = hashFunctionSize;
     bits = new BitSet(size);
-    useCheapHash = false;
-  }
-
-  private BloomFilter(byte[] bytes, int size, int hashFunctionSize, boolean 
useCheapHash) {
-    this.size = size;
-    this.hashFunctionSize = hashFunctionSize;
-    bits = BitSet.valueOf(bytes);
-    this.useCheapHash = useCheapHash;
-  }
-
-  private BloomFilter(int size, int hashFunctionSize, boolean useCheapHash) {
-    this.size = size;
-    this.hashFunctionSize = hashFunctionSize;
-    bits = new BitSet(size);
-    this.useCheapHash = useCheapHash;
   }
 
   /**
@@ -98,24 +75,6 @@ public class BloomFilter {
         Math.max(MINIMAL_SIZE, size), Math.min(MAXIMAL_HASH_FUNCTION_SIZE, 
hashFunctionSize));
   }
 
-  /**
-   * get empty bloom filter
-   *
-   * @param errorPercent the tolerant percent of error of the bloom filter
-   * @param numOfString the number of string want to store in the bloom filter
-   * @return empty bloom
-   */
-  public static BloomFilter getEmptyBloomFilterWithCheapHash(double 
errorPercent, int numOfString) {
-    errorPercent = Math.max(errorPercent, 
TSFileConfig.MIN_BLOOM_FILTER_ERROR_RATE);
-    errorPercent = Math.min(errorPercent, 
TSFileConfig.MAX_BLOOM_FILTER_ERROR_RATE);
-
-    double ln2 = Math.log(2);
-    int size = (int) (-numOfString * Math.log(errorPercent) / ln2 / ln2) + 1;
-    int hashFunctionSize = (int) (-Math.log(errorPercent) / ln2) + 1;
-    return new BloomFilter(
-        Math.max(MINIMAL_SIZE, size), Math.min(MAXIMAL_HASH_FUNCTION_SIZE, 
hashFunctionSize), true);
-  }
-
   /**
    * build bloom filter by bytes
    *
@@ -126,18 +85,6 @@ public class BloomFilter {
     return new BloomFilter(bytes, size, Math.min(MAXIMAL_HASH_FUNCTION_SIZE, 
hashFunctionSize));
   }
 
-  /**
-   * build bloom filter by bytes
-   *
-   * @param bytes bytes of bits
-   * @return bloom filter
-   */
-  public static BloomFilter buildBloomFilterWithCheapHash(
-      byte[] bytes, int size, int hashFunctionSize) {
-    return new BloomFilter(
-        bytes, size, Math.min(MAXIMAL_HASH_FUNCTION_SIZE, hashFunctionSize), 
true);
-  }
-
   public int getHashFunctionSize() {
     return hashFunctionSize;
   }
@@ -147,12 +94,8 @@ public class BloomFilter {
   }
 
   public void add(Path path) {
-    if (!useCheapHash) {
-      add(path.getFullPath());
-    } else {
-      for (int i = 0; i < hashFunctionSize; i++) {
-        bits.set(hash(path, size, SEEDS[i]), true);
-      }
+    for (int i = 0; i < hashFunctionSize; i++) {
+      bits.set(hash(path, size, SEEDS[i]), true);
     }
   }
 
@@ -169,11 +112,7 @@ public class BloomFilter {
     boolean ret = true;
     int index = 0;
     while (ret && index < hashFunctionSize) {
-      if (!useCheapHash) {
-        ret = bits.get(hash(value.getFullPath(), size, SEEDS[index++]));
-      } else {
-        ret = bits.get(hash(value, size, SEEDS[index++]));
-      }
+      ret = bits.get(hash(value.getFullPath(), size, SEEDS[index++]));
     }
 
     return ret;
@@ -247,14 +186,7 @@ public class BloomFilter {
       outputStream.write(bytes);
       byteLen += bytes.length;
       byteLen += ReadWriteForEncodingUtils.writeUnsignedVarInt(getSize(), 
outputStream);
-      if (!useCheapHash) {
-        byteLen +=
-            
ReadWriteForEncodingUtils.writeUnsignedVarInt(getHashFunctionSize(), 
outputStream);
-      } else {
-        byteLen += 
ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE, outputStream);
-        byteLen +=
-            
ReadWriteForEncodingUtils.writeUnsignedVarInt(getHashFunctionSize(), 
outputStream);
-      }
+      byteLen += 
ReadWriteForEncodingUtils.writeUnsignedVarInt(getHashFunctionSize(), 
outputStream);
     }
     return byteLen;
   }
@@ -264,40 +196,8 @@ public class BloomFilter {
     if (bytes.length != 0) {
       int filterSize = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
       int hashFunctionSize = 
ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
-      if (hashFunctionSize != Integer.MIN_VALUE) {
-        return BloomFilter.buildBloomFilter(bytes, filterSize, 
hashFunctionSize);
-      } else {
-        hashFunctionSize = 
ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
-        return BloomFilter.buildBloomFilterWithCheapHash(bytes, filterSize, 
hashFunctionSize);
-      }
+      return BloomFilter.buildBloomFilter(bytes, filterSize, hashFunctionSize);
     }
     return null;
   }
-
-  public static void main(String[] args) {
-    int deviceNum = 100;
-    int measurementNum = 10000;
-    List<Path> pathList = new ArrayList<>();
-    for (int i = 0; i < deviceNum; i++) {
-      IDeviceID deviceID = Factory.DEFAULT_FACTORY.create("root.db1.d" + i);
-      for (int j = 0; j < measurementNum; j++) {
-        String measurement = "s" + j;
-        pathList.add(new FullPath(deviceID, measurement));
-      }
-    }
-
-    int repeat = 100;
-    long startTime = System.currentTimeMillis();
-    for (int i = 0; i < repeat; i++) {
-      //      BloomFilter bloomFilter = BloomFilter.getEmptyBloomFilter(
-      //          
TSFileDescriptor.getInstance().getConfig().getBloomFilterErrorRate(), deviceNum 
*
-      // measurementNum);
-      BloomFilter bloomFilter =
-          BloomFilter.getEmptyBloomFilterWithCheapHash(
-              
TSFileDescriptor.getInstance().getConfig().getBloomFilterErrorRate(),
-              deviceNum * measurementNum);
-      pathList.forEach(bloomFilter::add);
-    }
-    System.out.println(System.currentTimeMillis() - startTime);
-  }
 }
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriter.java 
b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriter.java
index cf3c62ba..f1df08cd 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriter.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriter.java
@@ -450,7 +450,7 @@ public class TsFileIOWriter implements AutoCloseable {
         new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT);
     int seriesIdxForCurrDevice = 0;
     BloomFilter filter =
-        BloomFilter.getEmptyBloomFilterWithCheapHash(
+        BloomFilter.getEmptyBloomFilter(
             
TSFileDescriptor.getInstance().getConfig().getBloomFilterErrorRate(), 
pathCount);
 
     while (tsmIterator.hasNext()) {

Reply via email to