(tsfile) 01/02: multiple optimizations

jiangtian Tue, 22 Apr 2025 00:07:24 -0700

This is an automated email from the ASF dual-hosted git repository.

jiangtian pushed a commit to branch mem_usage_refinement
in repository https://gitbox.apache.org/repos/asf/tsfile.git


commit c327a3173ad5ba25e69bf5ef1785ba9a337a4f0b
Author: Tian Jiang <[email protected]>
AuthorDate: Wed Apr 9 11:56:06 2025 +0800

    multiple optimizations
---
 .../tsfile/file/metadata/StringArrayDeviceID.java  |   3 +
 .../tsfile/file/metadata/TimeseriesMetadata.java   |  39 +++
 .../tsfile/file/metadata/TsFileMetadata.java       |  25 +-
 .../tsfile/fileSystem/fsFactory/FSFactory.java     |   8 +-
 .../fileSystem/fsFactory/HybridFSFactory.java      |   8 +-
 .../fileSystem/fsFactory/LocalFSFactory.java       |  15 +-
 .../org/apache/tsfile/read/common/FullPath.java    |  63 ++++
 .../java/org/apache/tsfile/read/common/Path.java   |   2 +-
 .../tsfile/read/query/executor/TsFileExecutor.java |   2 +-
 .../java/org/apache/tsfile/utils/BloomFilter.java  | 155 +++++++++
 .../java/org/apache/tsfile/utils/BytesUtils.java   |  12 +-
 .../tsfile/utils/NoSyncBufferedInputStream.java    | 354 +++++++++++++++++++++
 .../tsfile/utils/NoSyncBufferedOutputStream.java   | 132 ++++++++
 .../tsfile/write/writer/LocalTsFileOutput.java     |   7 +-
 .../apache/tsfile/write/writer/TsFileIOWriter.java |   6 +-
 .../write/writer/tsmiterator/TSMIterator.java      | 122 +++++--
 16 files changed, 885 insertions(+), 68 deletions(-)

diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/StringArrayDeviceID.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/StringArrayDeviceID.java
index e012cdfb..685df51f 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/StringArrayDeviceID.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/StringArrayDeviceID.java
@@ -238,6 +238,9 @@ public class StringArrayDeviceID implements IDeviceID {
 
   @Override
   public int compareTo(IDeviceID o) {
+    if (this == o) {
+      return 0;
+    }
     int thisSegmentNum = segmentNum();
     int otherSegmentNum = o.segmentNum();
     for (int i = 0; i < thisSegmentNum; i++) {
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TimeseriesMetadata.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TimeseriesMetadata.java
index e59fd9dc..657f183c 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TimeseriesMetadata.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TimeseriesMetadata.java
@@ -21,6 +21,8 @@ package org.apache.tsfile.file.metadata;
 
 import org.apache.tsfile.common.conf.TSFileConfig;
 import org.apache.tsfile.enums.TSDataType;
+import org.apache.tsfile.file.metadata.enums.CompressionType;
+import org.apache.tsfile.file.metadata.enums.TSEncoding;
 import org.apache.tsfile.file.metadata.statistics.Statistics;
 import org.apache.tsfile.read.controller.IChunkMetadataLoader;
 import org.apache.tsfile.read.reader.TsFileInput;
@@ -28,13 +30,17 @@ import org.apache.tsfile.utils.PublicBAOS;
 import org.apache.tsfile.utils.RamUsageEstimator;
 import org.apache.tsfile.utils.ReadWriteForEncodingUtils;
 import org.apache.tsfile.utils.ReadWriteIOUtils;
+import org.apache.tsfile.write.writer.LocalTsFileOutput;
+import org.apache.tsfile.write.writer.tsmiterator.TSMIterator;
 
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Optional;
 import java.util.Set;
@@ -394,4 +400,37 @@ public class TimeseriesMetadata implements 
ITimeSeriesMetadata {
         + chunkMetadataList
         + '}';
   }
+
+  public static void main(String[] args) throws IOException {
+    int deviceNum = 100;
+    int measurementNum = 10000;
+    List<TimeseriesMetadata> timeseriesMetadataList = new 
ArrayList<>(deviceNum * measurementNum);
+    for (int i = 0; i < deviceNum; i++) {
+      for (int j = 0; j < measurementNum; j++) {
+        timeseriesMetadataList.add(
+            TSMIterator.constructOneTimeseriesMetadata(
+                "s" + j,
+                Collections.singletonList(
+                    new ChunkMetadata(
+                        "s" + j,
+                        TSDataType.INT64,
+                        TSEncoding.PLAIN,
+                        CompressionType.UNCOMPRESSED,
+                        0,
+                        Statistics.getStatsByType(TSDataType.INT64)))));
+      }
+    }
+
+    long startTime = System.currentTimeMillis();
+    int repeat = 100;
+    for (int i = 0; i < repeat; i++) {
+      LocalTsFileOutput tsFileOutput = new LocalTsFileOutput(new 
FileOutputStream("test.tsfile"));
+      for (int j = 0; j < timeseriesMetadataList.size(); j++) {
+        TimeseriesMetadata timeseriesMetadata = timeseriesMetadataList.get(j);
+        timeseriesMetadata.serializeTo(tsFileOutput);
+      }
+      tsFileOutput.close();
+    }
+    System.out.println(System.currentTimeMillis() - startTime);
+  }
 }
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TsFileMetadata.java 
b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TsFileMetadata.java
index d7ac19bb..e4303043 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TsFileMetadata.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TsFileMetadata.java
@@ -110,13 +110,7 @@ public class TsFileMetadata {
 
     // read bloom filter
     if (buffer.hasRemaining()) {
-      byte[] bytes = 
ReadWriteIOUtils.readByteBufferWithSelfDescriptionLength(buffer);
-      if (bytes.length != 0) {
-        int filterSize = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
-        int hashFunctionSize = 
ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
-        fileMetaData.bloomFilter =
-            BloomFilter.buildBloomFilter(bytes, filterSize, hashFunctionSize);
-      }
+      fileMetaData.bloomFilter = BloomFilter.deserialize(buffer);
     }
 
     fileMetaData.propertiesOffset = buffer.position() - startPos;
@@ -234,7 +228,7 @@ public class TsFileMetadata {
     // metaOffset
     byteLen += ReadWriteIOUtils.write(metaOffset, outputStream);
     if (bloomFilter != null) {
-      byteLen += serializeBloomFilter(outputStream, bloomFilter);
+      byteLen += bloomFilter.serialize(outputStream);
     } else {
       byteLen += ReadWriteForEncodingUtils.writeUnsignedVarInt(0, 
outputStream);
     }
@@ -252,21 +246,6 @@ public class TsFileMetadata {
     return byteLen;
   }
 
-  public int serializeBloomFilter(OutputStream outputStream, BloomFilter 
filter)
-      throws IOException {
-    int byteLen = 0;
-    byte[] bytes = filter.serialize();
-    byteLen += ReadWriteForEncodingUtils.writeUnsignedVarInt(bytes.length, 
outputStream);
-    if (bytes.length > 0) {
-      outputStream.write(bytes);
-      byteLen += bytes.length;
-      byteLen += 
ReadWriteForEncodingUtils.writeUnsignedVarInt(filter.getSize(), outputStream);
-      byteLen +=
-          
ReadWriteForEncodingUtils.writeUnsignedVarInt(filter.getHashFunctionSize(), 
outputStream);
-    }
-    return byteLen;
-  }
-
   public long getMetaOffset() {
     return metaOffset;
   }
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/fileSystem/fsFactory/FSFactory.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/fileSystem/fsFactory/FSFactory.java
index 0a00ff30..4cc9403e 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/fileSystem/fsFactory/FSFactory.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/fileSystem/fsFactory/FSFactory.java
@@ -19,12 +19,12 @@
 
 package org.apache.tsfile.fileSystem.fsFactory;
 
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.net.URI;
 
 public interface FSFactory {
@@ -94,7 +94,7 @@ public interface FSFactory {
    * @param filePath file path
    * @return input stream
    */
-  BufferedInputStream getBufferedInputStream(String filePath);
+  InputStream getBufferedInputStream(String filePath);
 
   /**
    * get output stream
@@ -102,7 +102,7 @@ public interface FSFactory {
    * @param filePath file path
    * @return output stream
    */
-  BufferedOutputStream getBufferedOutputStream(String filePath);
+  OutputStream getBufferedOutputStream(String filePath);
 
   /**
    * move file
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/fileSystem/fsFactory/HybridFSFactory.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/fileSystem/fsFactory/HybridFSFactory.java
index 395f5306..7e5700a3 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/fileSystem/fsFactory/HybridFSFactory.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/fileSystem/fsFactory/HybridFSFactory.java
@@ -25,12 +25,12 @@ import org.apache.tsfile.utils.FSUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.net.URI;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
@@ -101,13 +101,13 @@ public class HybridFSFactory implements FSFactory {
   }
 
   @Override
-  public BufferedInputStream getBufferedInputStream(String filePath) {
+  public InputStream getBufferedInputStream(String filePath) {
     FSPath path = FSUtils.parse(filePath);
     return 
getFSFactory(path.getFsType()).getBufferedInputStream(path.getPath());
   }
 
   @Override
-  public BufferedOutputStream getBufferedOutputStream(String filePath) {
+  public OutputStream getBufferedOutputStream(String filePath) {
     FSPath path = FSUtils.parse(filePath);
     return 
getFSFactory(path.getFsType()).getBufferedOutputStream(path.getPath());
   }
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/fileSystem/fsFactory/LocalFSFactory.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/fileSystem/fsFactory/LocalFSFactory.java
index 2ae099d2..d64a8534 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/fileSystem/fsFactory/LocalFSFactory.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/fileSystem/fsFactory/LocalFSFactory.java
@@ -19,12 +19,13 @@
 
 package org.apache.tsfile.fileSystem.fsFactory;
 
+import org.apache.tsfile.utils.NoSyncBufferedInputStream;
+import org.apache.tsfile.utils.NoSyncBufferedOutputStream;
+
 import org.apache.commons.io.FileUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
@@ -33,6 +34,8 @@ import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.net.URI;
 import java.nio.file.Files;
 
@@ -91,9 +94,9 @@ public class LocalFSFactory implements FSFactory {
   }
 
   @Override
-  public BufferedInputStream getBufferedInputStream(String filePath) {
+  public InputStream getBufferedInputStream(String filePath) {
     try {
-      return new BufferedInputStream(new FileInputStream(filePath));
+      return new NoSyncBufferedInputStream(new FileInputStream(filePath));
     } catch (IOException e) {
       logger.error("Failed to get buffered input stream for {}. ", filePath, 
e);
       return null;
@@ -101,9 +104,9 @@ public class LocalFSFactory implements FSFactory {
   }
 
   @Override
-  public BufferedOutputStream getBufferedOutputStream(String filePath) {
+  public OutputStream getBufferedOutputStream(String filePath) {
     try {
-      return new BufferedOutputStream(new FileOutputStream(filePath));
+      return new NoSyncBufferedOutputStream(new FileOutputStream(filePath));
     } catch (IOException e) {
       logger.error("Failed to get buffered output stream for {}. ", filePath, 
e);
       return null;
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/FullPath.java 
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/FullPath.java
new file mode 100644
index 00000000..b332241a
--- /dev/null
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/FullPath.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tsfile.read.common;
+
+import org.apache.tsfile.common.constant.TsFileConstant;
+import org.apache.tsfile.file.metadata.IDeviceID;
+
+import java.util.Objects;
+
+public class FullPath extends Path {
+  public FullPath(IDeviceID device, String measurement) {
+    this.device = device;
+    this.measurement = measurement;
+  }
+
+  @Override
+  public String getFullPath() {
+    if (fullPath == null) {
+      fullPath = device.toString() + TsFileConstant.PATH_SEPARATOR + 
measurement;
+    }
+    return fullPath;
+  }
+
+  @Override
+  public int hashCode() {
+    return device.hashCode() + measurement.hashCode();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (!(obj instanceof Path)) {
+      return false;
+    }
+    return Objects.equals(this.device, ((Path) obj).getIDeviceID())
+        && Objects.equals(this.measurement, ((Path) obj).getMeasurement());
+  }
+
+  @Override
+  public int compareTo(Path path) {
+    int deviceCompare = device.compareTo(path.getIDeviceID());
+    if (deviceCompare != 0) {
+      return deviceCompare;
+    }
+    return measurement.compareTo(path.getMeasurement());
+  }
+}
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/Path.java 
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/Path.java
index 52968495..b7a0a144 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/Path.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/Path.java
@@ -45,7 +45,7 @@ import java.nio.ByteBuffer;
 public class Path implements Serializable, Comparable<Path> {
 
   private static final long serialVersionUID = 3405277066329298200L;
-  private String measurement;
+  protected String measurement;
   protected IDeviceID device;
   protected String fullPath;
   private static final String ILLEGAL_PATH_ARGUMENT = "Path parameter is null";
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/read/query/executor/TsFileExecutor.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/read/query/executor/TsFileExecutor.java
index afa34ba9..3dac3a77 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/read/query/executor/TsFileExecutor.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/read/query/executor/TsFileExecutor.java
@@ -62,7 +62,7 @@ public class TsFileExecutor implements QueryExecutor {
     List<Path> filteredSeriesPath = new ArrayList<>();
     if (bloomFilter != null) {
       for (Path path : queryExpression.getSelectedSeries()) {
-        if (bloomFilter.contains(path.getFullPath())) {
+        if (bloomFilter.contains(path)) {
           filteredSeriesPath.add(path);
         }
       }
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/utils/BloomFilter.java 
b/java/tsfile/src/main/java/org/apache/tsfile/utils/BloomFilter.java
index e7bc3a93..d4a0f9f5 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/utils/BloomFilter.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/utils/BloomFilter.java
@@ -19,8 +19,18 @@
 package org.apache.tsfile.utils;
 
 import org.apache.tsfile.common.conf.TSFileConfig;
+import org.apache.tsfile.common.conf.TSFileDescriptor;
+import org.apache.tsfile.file.metadata.IDeviceID;
+import org.apache.tsfile.file.metadata.IDeviceID.Factory;
+import org.apache.tsfile.read.common.FullPath;
+import org.apache.tsfile.read.common.Path;
 
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.BitSet;
+import java.util.List;
 import java.util.Objects;
 
 import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfLongArray;
@@ -39,18 +49,35 @@ public class BloomFilter {
   private final int size;
   private final int hashFunctionSize;
   private final BitSet bits;
+  private final boolean useCheapHash;
 
   // do not try to initialize the filter by construction method
   private BloomFilter(byte[] bytes, int size, int hashFunctionSize) {
     this.size = size;
     this.hashFunctionSize = hashFunctionSize;
     bits = BitSet.valueOf(bytes);
+    useCheapHash = false;
   }
 
   private BloomFilter(int size, int hashFunctionSize) {
     this.size = size;
     this.hashFunctionSize = hashFunctionSize;
     bits = new BitSet(size);
+    useCheapHash = false;
+  }
+
+  private BloomFilter(byte[] bytes, int size, int hashFunctionSize, boolean 
useCheapHash) {
+    this.size = size;
+    this.hashFunctionSize = hashFunctionSize;
+    bits = BitSet.valueOf(bytes);
+    this.useCheapHash = useCheapHash;
+  }
+
+  private BloomFilter(int size, int hashFunctionSize, boolean useCheapHash) {
+    this.size = size;
+    this.hashFunctionSize = hashFunctionSize;
+    bits = new BitSet(size);
+    this.useCheapHash = useCheapHash;
   }
 
   /**
@@ -71,6 +98,24 @@ public class BloomFilter {
         Math.max(MINIMAL_SIZE, size), Math.min(MAXIMAL_HASH_FUNCTION_SIZE, 
hashFunctionSize));
   }
 
+  /**
+   * get empty bloom filter
+   *
+   * @param errorPercent the tolerant percent of error of the bloom filter
+   * @param numOfString the number of string want to store in the bloom filter
+   * @return empty bloom
+   */
+  public static BloomFilter getEmptyBloomFilterWithCheapHash(double 
errorPercent, int numOfString) {
+    errorPercent = Math.max(errorPercent, 
TSFileConfig.MIN_BLOOM_FILTER_ERROR_RATE);
+    errorPercent = Math.min(errorPercent, 
TSFileConfig.MAX_BLOOM_FILTER_ERROR_RATE);
+
+    double ln2 = Math.log(2);
+    int size = (int) (-numOfString * Math.log(errorPercent) / ln2 / ln2) + 1;
+    int hashFunctionSize = (int) (-Math.log(errorPercent) / ln2) + 1;
+    return new BloomFilter(
+        Math.max(MINIMAL_SIZE, size), Math.min(MAXIMAL_HASH_FUNCTION_SIZE, 
hashFunctionSize), true);
+  }
+
   /**
    * build bloom filter by bytes
    *
@@ -81,6 +126,18 @@ public class BloomFilter {
     return new BloomFilter(bytes, size, Math.min(MAXIMAL_HASH_FUNCTION_SIZE, 
hashFunctionSize));
   }
 
+  /**
+   * build bloom filter by bytes
+   *
+   * @param bytes bytes of bits
+   * @return bloom filter
+   */
+  public static BloomFilter buildBloomFilterWithCheapHash(
+      byte[] bytes, int size, int hashFunctionSize) {
+    return new BloomFilter(
+        bytes, size, Math.min(MAXIMAL_HASH_FUNCTION_SIZE, hashFunctionSize), 
true);
+  }
+
   public int getHashFunctionSize() {
     return hashFunctionSize;
   }
@@ -89,12 +146,39 @@ public class BloomFilter {
     return size;
   }
 
+  public void add(Path path) {
+    if (!useCheapHash) {
+      add(path.getFullPath());
+    } else {
+      for (int i = 0; i < hashFunctionSize; i++) {
+        bits.set(hash(path, size, SEEDS[i]), true);
+      }
+    }
+  }
+
   public void add(String value) {
     for (int i = 0; i < hashFunctionSize; i++) {
       bits.set(hash(value, size, SEEDS[i]), true);
     }
   }
 
+  public boolean contains(Path value) {
+    if (value == null) {
+      return false;
+    }
+    boolean ret = true;
+    int index = 0;
+    while (ret && index < hashFunctionSize) {
+      if (!useCheapHash) {
+        ret = bits.get(hash(value.getFullPath(), size, SEEDS[index++]));
+      } else {
+        ret = bits.get(hash(value, size, SEEDS[index++]));
+      }
+    }
+
+    return ret;
+  }
+
   public boolean contains(String value) {
     if (value == null) {
       return false;
@@ -145,4 +229,75 @@ public class BloomFilter {
 
     return Math.abs(res) % cap;
   }
+
+  private static int hash(Path value, int cap, int seed) {
+    int res = Objects.hash(value, seed);
+    if (res == Integer.MIN_VALUE) {
+      res = 0;
+    }
+
+    return Math.abs(res) % cap;
+  }
+
+  public int serialize(OutputStream outputStream) throws IOException {
+    int byteLen = 0;
+    byte[] bytes = serialize();
+    byteLen += ReadWriteForEncodingUtils.writeUnsignedVarInt(bytes.length, 
outputStream);
+    if (bytes.length > 0) {
+      outputStream.write(bytes);
+      byteLen += bytes.length;
+      byteLen += ReadWriteForEncodingUtils.writeUnsignedVarInt(getSize(), 
outputStream);
+      if (!useCheapHash) {
+        byteLen +=
+            
ReadWriteForEncodingUtils.writeUnsignedVarInt(getHashFunctionSize(), 
outputStream);
+      } else {
+        byteLen += 
ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE, outputStream);
+        byteLen +=
+            
ReadWriteForEncodingUtils.writeUnsignedVarInt(getHashFunctionSize(), 
outputStream);
+      }
+    }
+    return byteLen;
+  }
+
+  public static BloomFilter deserialize(ByteBuffer buffer) {
+    byte[] bytes = 
ReadWriteIOUtils.readByteBufferWithSelfDescriptionLength(buffer);
+    if (bytes.length != 0) {
+      int filterSize = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+      int hashFunctionSize = 
ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+      if (hashFunctionSize != Integer.MIN_VALUE) {
+        return BloomFilter.buildBloomFilter(bytes, filterSize, 
hashFunctionSize);
+      } else {
+        hashFunctionSize = 
ReadWriteForEncodingUtils.readUnsignedVarInt(buffer);
+        return BloomFilter.buildBloomFilterWithCheapHash(bytes, filterSize, 
hashFunctionSize);
+      }
+    }
+    return null;
+  }
+
+  public static void main(String[] args) {
+    int deviceNum = 100;
+    int measurementNum = 10000;
+    List<Path> pathList = new ArrayList<>();
+    for (int i = 0; i < deviceNum; i++) {
+      IDeviceID deviceID = Factory.DEFAULT_FACTORY.create("root.db1.d" + i);
+      for (int j = 0; j < measurementNum; j++) {
+        String measurement = "s" + j;
+        pathList.add(new FullPath(deviceID, measurement));
+      }
+    }
+
+    int repeat = 100;
+    long startTime = System.currentTimeMillis();
+    for (int i = 0; i < repeat; i++) {
+      //      BloomFilter bloomFilter = BloomFilter.getEmptyBloomFilter(
+      //          
TSFileDescriptor.getInstance().getConfig().getBloomFilterErrorRate(), deviceNum 
*
+      // measurementNum);
+      BloomFilter bloomFilter =
+          BloomFilter.getEmptyBloomFilterWithCheapHash(
+              
TSFileDescriptor.getInstance().getConfig().getBloomFilterErrorRate(),
+              deviceNum * measurementNum);
+      pathList.forEach(bloomFilter::add);
+    }
+    System.out.println(System.currentTimeMillis() - startTime);
+  }
 }
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/utils/BytesUtils.java 
b/java/tsfile/src/main/java/org/apache/tsfile/utils/BytesUtils.java
index f38a0da1..82acc0b4 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/utils/BytesUtils.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/utils/BytesUtils.java
@@ -467,7 +467,17 @@ public class BytesUtils {
    * @return byte[8]
    */
   public static byte[] longToBytes(long num) {
-    return longToBytes(num, 8);
+    byte[] byteNum = new byte[8];
+    byteNum[0] = (byte) (num >>> 56);
+    byteNum[1] = (byte) (num >>> 48);
+    byteNum[2] = (byte) (num >>> 40);
+    byteNum[3] = (byte) (num >>> 32);
+    byteNum[4] = (byte) (num >>> 24);
+    byteNum[5] = (byte) (num >>> 16);
+    byteNum[6] = (byte) (num >>> 8);
+    byteNum[7] = (byte) (num >>> 0);
+
+    return byteNum;
   }
 
   /**
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/utils/NoSyncBufferedInputStream.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/utils/NoSyncBufferedInputStream.java
new file mode 100644
index 00000000..b798a75a
--- /dev/null
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/utils/NoSyncBufferedInputStream.java
@@ -0,0 +1,354 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tsfile.utils;
+
+import java.io.FilterInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/** A copy of java.io.BufferedInputStream, but without the thread safety. */
+public class NoSyncBufferedInputStream extends FilterInputStream {
+
+  private static int DEFAULT_BUFFER_SIZE = 8192;
+
+  /**
+   * The internal buffer array where the data is stored. When necessary, it 
may be replaced by
+   * another array of a different size.
+   */
+  /*
+   * We null this out with a CAS on close(), which is necessary since
+   * closes can be asynchronous. We use nullness of buf[] as primary
+   * indicator that this stream is closed. (The "in" field is also
+   * nulled out on close.)
+   */
+  protected volatile byte[] buf;
+
+  /**
+   * The index one greater than the index of the last valid byte in the 
buffer. This value is always
+   * in the range {@code 0} through {@code buf.length}; elements {@code 
buf[0]} through {@code
+   * buf[count-1]} contain buffered input data obtained from the underlying 
input stream.
+   */
+  protected int count;
+
+  /**
+   * The current position in the buffer. This is the index of the next 
character to be read from the
+   * {@code buf} array.
+   *
+   * <p>This value is always in the range {@code 0} through {@code count}. If 
it is less than {@code
+   * count}, then {@code buf[pos]} is the next byte to be supplied as input; 
if it is equal to
+   * {@code count}, then the next {@code read} or {@code skip} operation will 
require more bytes to
+   * be read from the contained input stream.
+   *
+   * @see NoSyncBufferedInputStream#buf
+   */
+  protected int pos;
+
+  /**
+   * The value of the {@code pos} field at the time the last {@code mark} 
method was called.
+   *
+   * <p>This value is always in the range {@code -1} through {@code pos}. If 
there is no marked
+   * position in the input stream, this field is {@code -1}. If there is a 
marked position in the
+   * input stream, then {@code buf[markpos]} is the first byte to be supplied 
as input after a
+   * {@code reset} operation. If {@code markpos} is not {@code -1}, then all 
bytes from positions
+   * {@code buf[markpos]} through {@code buf[pos-1]} must remain in the buffer 
array (though they
+   * may be moved to another place in the buffer array, with suitable 
adjustments to the values of
+   * {@code count}, {@code pos}, and {@code markpos}); they may not be 
discarded unless and until
+   * the difference between {@code pos} and {@code markpos} exceeds {@code 
marklimit}.
+   *
+   * @see NoSyncBufferedInputStream#mark(int)
+   * @see NoSyncBufferedInputStream#pos
+   */
+  protected int markpos = -1;
+
+  /**
+   * The maximum read ahead allowed after a call to the {@code mark} method 
before subsequent calls
+   * to the {@code reset} method fail. Whenever the difference between {@code 
pos} and {@code
+   * markpos} exceeds {@code marklimit}, then the mark may be dropped by 
setting {@code markpos} to
+   * {@code -1}.
+   *
+   * @see NoSyncBufferedInputStream#mark(int)
+   * @see NoSyncBufferedInputStream#reset()
+   */
+  protected int marklimit;
+
+  /**
+   * Check to make sure that underlying input stream has not been nulled out 
due to close; if not
+   * return it;
+   */
+  private InputStream getInIfOpen() throws IOException {
+    InputStream input = in;
+    if (input == null) throw new IOException("Stream closed");
+    return input;
+  }
+
+  /** Check to make sure that buffer has not been nulled out due to close; if 
not return it; */
+  private byte[] getBufIfOpen() throws IOException {
+    byte[] buffer = buf;
+    if (buffer == null) throw new IOException("Stream closed");
+    return buffer;
+  }
+
+  /**
+   * Creates a {@code BufferedInputStream} and saves its argument, the input 
stream {@code in}, for
+   * later use. An internal buffer array is created and stored in {@code buf}.
+   *
+   * @param in the underlying input stream.
+   */
+  public NoSyncBufferedInputStream(InputStream in) {
+    this(in, DEFAULT_BUFFER_SIZE);
+  }
+
+  /**
+   * Creates a {@code BufferedInputStream} with the specified buffer size, and 
saves its argument,
+   * the input stream {@code in}, for later use. An internal buffer array of 
length {@code size} is
+   * created and stored in {@code buf}.
+   *
+   * @param in the underlying input stream.
+   * @param size the buffer size.
+   * @throws IllegalArgumentException if {@code size <= 0}.
+   */
+  public NoSyncBufferedInputStream(InputStream in, int size) {
+    super(in);
+    if (size <= 0) {
+      throw new IllegalArgumentException("Buffer size <= 0");
+    }
+    buf = new byte[size];
+  }
+
+  /**
+   * Fills the buffer with more data, taking into account shuffling and other 
tricks for dealing
+   * with marks. Assumes that it is being called by a synchronized method. 
This method also assumes
+   * that all data has already been read in, hence pos > count.
+   */
+  private void fill() throws IOException {
+    byte[] buffer = getBufIfOpen();
+    if (markpos < 0) pos = 0; /* no mark: throw away the buffer */
+    else if (pos >= buffer.length) {
+      /* no room left in buffer */
+      if (markpos > 0) {
+        /* can throw away early part of the buffer */
+        int sz = pos - markpos;
+        System.arraycopy(buffer, markpos, buffer, 0, sz);
+        pos = sz;
+        markpos = 0;
+      } else if (buffer.length >= marklimit) {
+        markpos = -1; /* buffer got too big, invalidate mark */
+        pos = 0; /* drop buffer contents */
+      }
+    }
+    count = pos;
+    int n = getInIfOpen().read(buffer, pos, buffer.length - pos);
+    if (n > 0) count = n + pos;
+  }
+
+  /**
+   * See the general contract of the {@code read} method of {@code 
InputStream}.
+   *
+   * @return the next byte of data, or {@code -1} if the end of the stream is 
reached.
+   * @throws IOException if this input stream has been closed by invoking its 
{@link #close()}
+   *     method, or an I/O error occurs.
+   * @see FilterInputStream#in
+   */
+  public int read() throws IOException {
+    if (pos >= count) {
+      fill();
+      if (pos >= count) return -1;
+    }
+    return getBufIfOpen()[pos++] & 0xff;
+  }
+
+  /**
+   * Read characters into a portion of an array, reading from the underlying 
stream at most once if
+   * necessary.
+   */
+  private int read1(byte[] b, int off, int len) throws IOException {
+    int avail = count - pos;
+    if (avail <= 0) {
+      /* If the requested length is at least as large as the buffer, and
+      if there is no mark/reset activity, do not bother to copy the
+      bytes into the local buffer.  In this way buffered streams will
+      cascade harmlessly. */
+      if (len >= getBufIfOpen().length && markpos < 0) {
+        return getInIfOpen().read(b, off, len);
+      }
+      fill();
+      avail = count - pos;
+      if (avail <= 0) return -1;
+    }
+    int cnt = (avail < len) ? avail : len;
+    System.arraycopy(getBufIfOpen(), pos, b, off, cnt);
+    pos += cnt;
+    return cnt;
+  }
+
+  /**
+   * Reads bytes from this byte-input stream into the specified byte array, 
starting at the given
+   * offset.
+   *
+   * <p>This method implements the general contract of the corresponding {@link
+   * InputStream#read(byte[], int, int) read} method of the {@link 
InputStream} class. As an
+   * additional convenience, it attempts to read as many bytes as possible by 
repeatedly invoking
+   * the {@code read} method of the underlying stream. This iterated {@code 
read} continues until
+   * one of the following conditions becomes true:
+   *
+   * <ul>
+   *   <li>The specified number of bytes have been read,
+   *   <li>The {@code read} method of the underlying stream returns {@code 
-1}, indicating
+   *       end-of-file, or
+   *   <li>The {@code available} method of the underlying stream returns zero, 
indicating that
+   *       further input requests would block.
+   * </ul>
+   *
+   * If the first {@code read} on the underlying stream returns {@code -1} to 
indicate end-of-file
+   * then this method returns {@code -1}. Otherwise this method returns the 
number of bytes actually
+   * read.
+   *
+   * <p>Subclasses of this class are encouraged, but not required, to attempt 
to read as many bytes
+   * as possible in the same fashion.
+   *
+   * @param b destination buffer.
+   * @param off offset at which to start storing bytes.
+   * @param len maximum number of bytes to read.
+   * @return the number of bytes read, or {@code -1} if the end of the stream 
has been reached.
+   * @throws IOException if this input stream has been closed by invoking its 
{@link #close()}
+   *     method, or an I/O error occurs.
+   */
+  public int read(byte[] b, int off, int len) throws IOException {
+    getBufIfOpen(); // Check for closed stream
+    if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
+      throw new IndexOutOfBoundsException();
+    } else if (len == 0) {
+      return 0;
+    }
+
+    int n = 0;
+    for (; ; ) {
+      int nread = read1(b, off + n, len - n);
+      if (nread <= 0) return (n == 0) ? nread : n;
+      n += nread;
+      if (n >= len) return n;
+      // if not closed but no bytes available, return
+      InputStream input = in;
+      if (input != null && input.available() <= 0) return n;
+    }
+  }
+
+  /**
+   * See the general contract of the {@code skip} method of {@code 
InputStream}.
+   *
+   * @throws IOException if this input stream has been closed by invoking its 
{@link #close()}
+   *     method, {@code in.skip(n)} throws an IOException, or an I/O error 
occurs.
+   */
+  public long skip(long n) throws IOException {
+    getBufIfOpen(); // Check for closed stream
+    if (n <= 0) {
+      return 0;
+    }
+    long avail = count - pos;
+
+    if (avail <= 0) {
+      // If no mark position set then don't keep in buffer
+      if (markpos < 0) return getInIfOpen().skip(n);
+
+      // Fill in buffer to save bytes for reset
+      fill();
+      avail = count - pos;
+      if (avail <= 0) return 0;
+    }
+
+    long skipped = (avail < n) ? avail : n;
+    pos += skipped;
+    return skipped;
+  }
+
+  /**
+   * Returns an estimate of the number of bytes that can be read (or skipped 
over) from this input
+   * stream without blocking by the next invocation of a method for this input 
stream. The next
+   * invocation might be the same thread or another thread. A single read or 
skip of this many bytes
+   * will not block, but may read or skip fewer bytes.
+   *
+   * <p>This method returns the sum of the number of bytes remaining to be 
read in the buffer
+   * ({@code count - pos}) and the result of calling the {@link 
FilterInputStream#in in}{@code
+   * .available()}.
+   *
+   * @return an estimate of the number of bytes that can be read (or skipped 
over) from this input
+   *     stream without blocking.
+   * @throws IOException if this input stream has been closed by invoking its 
{@link #close()}
+   *     method, or an I/O error occurs.
+   */
+  public int available() throws IOException {
+    int n = count - pos;
+    int avail = getInIfOpen().available();
+    return n > (Integer.MAX_VALUE - avail) ? Integer.MAX_VALUE : n + avail;
+  }
+
+  /**
+   * See the general contract of the {@code mark} method of {@code 
InputStream}.
+   *
+   * @param readlimit the maximum limit of bytes that can be read before the 
mark position becomes
+   *     invalid.
+   * @see NoSyncBufferedInputStream#reset()
+   */
+  public void mark(int readlimit) {
+    marklimit = readlimit;
+    markpos = pos;
+  }
+
+  /**
+   * See the general contract of the {@code reset} method of {@code 
InputStream}.
+   *
+   * <p>If {@code markpos} is {@code -1} (no mark has been set or the mark has 
been invalidated), an
+   * {@code IOException} is thrown. Otherwise, {@code pos} is set equal to 
{@code markpos}.
+   *
+   * @throws IOException if this stream has not been marked or, if the mark 
has been invalidated, or
+   *     the stream has been closed by invoking its {@link #close()} method, 
or an I/O error occurs.
+   * @see NoSyncBufferedInputStream#mark(int)
+   */
+  public void reset() throws IOException {
+    getBufIfOpen(); // Cause exception if closed
+    if (markpos < 0) throw new IOException("Resetting to invalid mark");
+    pos = markpos;
+  }
+
+  /**
+   * Tests if this input stream supports the {@code mark} and {@code reset} 
methods. The {@code
+   * markSupported} method of {@code BufferedInputStream} returns {@code true}.
+   *
+   * @return a {@code boolean} indicating if this stream type supports the 
{@code mark} and {@code
+   *     reset} methods.
+   * @see InputStream#mark(int)
+   * @see InputStream#reset()
+   */
+  public boolean markSupported() {
+    return false;
+  }
+
+  /**
+   * Closes this input stream and releases any system resources associated 
with the stream. Once the
+   * stream has been closed, further read(), available(), reset(), or skip() 
invocations will throw
+   * an IOException. Closing a previously closed stream has no effect.
+   *
+   * @throws IOException if an I/O error occurs.
+   */
+  public void close() throws IOException {
+    InputStream input = in;
+    in = null;
+    if (input != null) input.close();
+  }
+}
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/utils/NoSyncBufferedOutputStream.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/utils/NoSyncBufferedOutputStream.java
new file mode 100644
index 00000000..5a7cd216
--- /dev/null
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/utils/NoSyncBufferedOutputStream.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tsfile.utils;
+
+import java.io.FilterOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * The class is a copy of java.io.BufferedOutputStream, without adding 
synchronized to the methods.
+ * Therefore, the caller must guarantee the concurrent safety.
+ */
+public class NoSyncBufferedOutputStream extends FilterOutputStream {
+  /** The internal buffer where data is stored. */
+  protected byte buf[];
+
+  /**
+   * The number of valid bytes in the buffer. This value is always in the 
range {@code 0} through
+   * {@code buf.length}; elements {@code buf[0]} through {@code buf[count-1]} 
contain valid byte
+   * data.
+   */
+  protected int count;
+
+  /**
+   * Creates a new buffered output stream to write data to the specified 
underlying output stream.
+   *
+   * @param out the underlying output stream.
+   */
+  public NoSyncBufferedOutputStream(OutputStream out) {
+    this(out, 8192);
+  }
+
+  /**
+   * Creates a new buffered output stream to write data to the specified 
underlying output stream
+   * with the specified buffer size.
+   *
+   * @param out the underlying output stream.
+   * @param size the buffer size.
+   * @throws IllegalArgumentException if size &lt;= 0.
+   */
+  public NoSyncBufferedOutputStream(OutputStream out, int size) {
+    super(out);
+    if (size <= 0) {
+      throw new IllegalArgumentException("Buffer size <= 0");
+    }
+    buf = new byte[size];
+  }
+
+  /** Flush the internal buffer */
+  private void flushBuffer() throws IOException {
+    if (count > 0) {
+      out.write(buf, 0, count);
+      count = 0;
+    }
+  }
+
+  /**
+   * Writes the specified byte to this buffered output stream.
+   *
+   * @param b the byte to be written.
+   * @throws IOException if an I/O error occurs.
+   */
+  @Override
+  public void write(int b) throws IOException {
+    if (count >= buf.length) {
+      flushBuffer();
+    }
+    buf[count++] = (byte) b;
+  }
+
+  /**
+   * Writes {@code len} bytes from the specified byte array starting at offset 
{@code off} to this
+   * buffered output stream.
+   *
+   * <p>Ordinarily this method stores bytes from the given array into this 
stream's buffer, flushing
+   * the buffer to the underlying output stream as needed. If the requested 
length is at least as
+   * large as this stream's buffer, however, then this method will flush the 
buffer and write the
+   * bytes directly to the underlying output stream. Thus redundant {@code 
BufferedOutputStream}s
+   * will not copy data unnecessarily.
+   *
+   * @param b the data.
+   * @param off the start offset in the data.
+   * @param len the number of bytes to write.
+   * @throws IOException if an I/O error occurs.
+   */
+  @Override
+  public void write(byte[] b, int off, int len) throws IOException {
+    if (len >= buf.length) {
+      /* If the request length exceeds the size of the output buffer,
+      flush the output buffer and then write the data directly.
+      In this way buffered streams will cascade harmlessly. */
+      flushBuffer();
+      out.write(b, off, len);
+      return;
+    }
+    if (len > buf.length - count) {
+      flushBuffer();
+    }
+    System.arraycopy(b, off, buf, count, len);
+    count += len;
+  }
+
+  /**
+   * Flushes this buffered output stream. This forces any buffered output 
bytes to be written out to
+   * the underlying output stream.
+   *
+   * @throws IOException if an I/O error occurs.
+   * @see FilterOutputStream#out
+   */
+  @Override
+  public void flush() throws IOException {
+    flushBuffer();
+    out.flush();
+  }
+}
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/LocalTsFileOutput.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/LocalTsFileOutput.java
index 9b7a18a0..e6a3d9c8 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/LocalTsFileOutput.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/LocalTsFileOutput.java
@@ -18,7 +18,8 @@
  */
 package org.apache.tsfile.write.writer;
 
-import java.io.BufferedOutputStream;
+import org.apache.tsfile.utils.NoSyncBufferedOutputStream;
+
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
@@ -31,12 +32,12 @@ import java.nio.ByteBuffer;
 public class LocalTsFileOutput extends OutputStream implements TsFileOutput {
 
   private FileOutputStream outputStream;
-  private BufferedOutputStream bufferedStream;
+  private OutputStream bufferedStream;
   private long position;
 
   public LocalTsFileOutput(FileOutputStream outputStream) {
     this.outputStream = outputStream;
-    this.bufferedStream = new BufferedOutputStream(outputStream);
+    this.bufferedStream = new NoSyncBufferedOutputStream(outputStream);
     position = 0;
   }
 
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriter.java 
b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriter.java
index e5d465f8..cf3c62ba 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriter.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/TsFileIOWriter.java
@@ -450,7 +450,7 @@ public class TsFileIOWriter implements AutoCloseable {
         new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT);
     int seriesIdxForCurrDevice = 0;
     BloomFilter filter =
-        BloomFilter.getEmptyBloomFilter(
+        BloomFilter.getEmptyBloomFilterWithCheapHash(
             
TSFileDescriptor.getInstance().getConfig().getBloomFilterErrorRate(), 
pathCount);
 
     while (tsmIterator.hasNext()) {
@@ -461,7 +461,7 @@ public class TsFileIOWriter implements AutoCloseable {
       currentPath = timeseriesMetadataPair.left;
 
       // build bloom filter
-      filter.add(currentPath.getFullPath());
+      filter.add(currentPath);
       // construct the index tree node for the series
       currentDevice = currentPath.getIDeviceID();
       if (!currentDevice.equals(prevDevice)) {
@@ -726,7 +726,7 @@ public class TsFileIOWriter implements AutoCloseable {
   protected int sortAndFlushChunkMetadata() throws IOException {
     int writtenSize = 0;
     // group by series
-    final List<Pair<Path, List<IChunkMetadata>>> sortedChunkMetadataList =
+    final Iterable<Pair<Path, List<IChunkMetadata>>> sortedChunkMetadataList =
         TSMIterator.sortChunkMetadata(
             chunkGroupMetadataList, currentChunkGroupDeviceId, 
chunkMetadataList);
     if (tempOutput == null) {
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/tsmiterator/TSMIterator.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/tsmiterator/TSMIterator.java
index 620cc2da..3be8b3ac 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/write/writer/tsmiterator/TSMIterator.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/write/writer/tsmiterator/TSMIterator.java
@@ -23,8 +23,12 @@ import org.apache.tsfile.file.metadata.ChunkGroupMetadata;
 import org.apache.tsfile.file.metadata.ChunkMetadata;
 import org.apache.tsfile.file.metadata.IChunkMetadata;
 import org.apache.tsfile.file.metadata.IDeviceID;
+import org.apache.tsfile.file.metadata.IDeviceID.Factory;
 import org.apache.tsfile.file.metadata.TimeseriesMetadata;
+import org.apache.tsfile.file.metadata.enums.CompressionType;
+import org.apache.tsfile.file.metadata.enums.TSEncoding;
 import org.apache.tsfile.file.metadata.statistics.Statistics;
+import org.apache.tsfile.read.common.FullPath;
 import org.apache.tsfile.read.common.Path;
 import org.apache.tsfile.utils.Pair;
 import org.apache.tsfile.utils.PublicBAOS;
@@ -35,10 +39,11 @@ import org.slf4j.LoggerFactory;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Comparator;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.Map;
+import java.util.SortedMap;
 import java.util.TreeMap;
 
 /**
@@ -48,7 +53,7 @@ import java.util.TreeMap;
  */
 public class TSMIterator {
   private static final Logger LOG = LoggerFactory.getLogger(TSMIterator.class);
-  protected List<Pair<Path, List<IChunkMetadata>>> sortedChunkMetadataList;
+  protected Iterable<Pair<Path, List<IChunkMetadata>>> sortedChunkMetadataList;
   protected Iterator<Pair<Path, List<IChunkMetadata>>> iterator;
 
   protected TSMIterator(List<ChunkGroupMetadata> chunkGroupMetadataList) {
@@ -108,43 +113,116 @@ public class TSMIterator {
     return timeseriesMetadata;
   }
 
-  public static List<Pair<Path, List<IChunkMetadata>>> sortChunkMetadata(
+  // entries in a device map have the same device, so only compare measurement
+  private static final Comparator<Path> deviceMapComparator =
+      Comparator.comparing(Path::getMeasurement);
+
+  public static Iterable<Pair<Path, List<IChunkMetadata>>> sortChunkMetadata(
       List<ChunkGroupMetadata> chunkGroupMetadataList,
       IDeviceID currentDevice,
       List<ChunkMetadata> chunkMetadataList) {
-    Map<IDeviceID, Map<Path, List<IChunkMetadata>>> chunkMetadataMap = new 
TreeMap<>();
-    List<Pair<Path, List<IChunkMetadata>>> sortedChunkMetadataList = new 
LinkedList<>();
+
+    SortedMap<IDeviceID, SortedMap<Path, List<IChunkMetadata>>> 
chunkMetadataMap = new TreeMap<>();
     for (ChunkGroupMetadata chunkGroupMetadata : chunkGroupMetadataList) {
-      chunkMetadataMap.computeIfAbsent(chunkGroupMetadata.getDevice(), x -> 
new TreeMap<>());
+      SortedMap<Path, List<IChunkMetadata>> deviceMap =
+          chunkMetadataMap.computeIfAbsent(
+              chunkGroupMetadata.getDevice(), x -> new 
TreeMap<>(deviceMapComparator));
       for (IChunkMetadata chunkMetadata : 
chunkGroupMetadata.getChunkMetadataList()) {
-        chunkMetadataMap
-            .get(chunkGroupMetadata.getDevice())
+        deviceMap
             .computeIfAbsent(
-                new Path(chunkGroupMetadata.getDevice(), 
chunkMetadata.getMeasurementUid(), false),
-                x -> new ArrayList<>())
+                new FullPath(chunkGroupMetadata.getDevice(), 
chunkMetadata.getMeasurementUid()),
+                x -> new ArrayList<>(1))
             .add(chunkMetadata);
       }
     }
+
     if (currentDevice != null) {
+      SortedMap<Path, List<IChunkMetadata>> deviceMap =
+          chunkMetadataMap.computeIfAbsent(currentDevice, x -> new 
TreeMap<>());
       for (IChunkMetadata chunkMetadata : chunkMetadataList) {
-        chunkMetadataMap
-            .computeIfAbsent(currentDevice, x -> new TreeMap<>())
+        deviceMap
             .computeIfAbsent(
-                new Path(currentDevice, chunkMetadata.getMeasurementUid(), 
false),
-                x -> new ArrayList<>())
+                new FullPath(currentDevice, chunkMetadata.getMeasurementUid()),
+                x -> new ArrayList<>(1))
             .add(chunkMetadata);
       }
     }
 
-    for (Map.Entry<IDeviceID, Map<Path, List<IChunkMetadata>>> entry :
-        chunkMetadataMap.entrySet()) {
-      Map<Path, List<IChunkMetadata>> seriesChunkMetadataMap = 
entry.getValue();
-      for (Map.Entry<Path, List<IChunkMetadata>> seriesChunkMetadataEntry :
-          seriesChunkMetadataMap.entrySet()) {
-        sortedChunkMetadataList.add(
-            new Pair<>(seriesChunkMetadataEntry.getKey(), 
seriesChunkMetadataEntry.getValue()));
+    //      SortedMap<Path, List<IChunkMetadata>>
+    return () ->
+        chunkMetadataMap.values().stream()
+            //  Pair<Path, List<IChunkMetadata>>
+            .flatMap(deviceMap -> deviceMap.entrySet().stream())
+            .map(e -> new Pair<>(e.getKey(), e.getValue()))
+            .iterator();
+  }
+
+  private static void testSortChunkMetadata() {
+    int deviceNum = 100;
+    int measurementNum = 10000;
+
+    List<ChunkGroupMetadata> chunkGroupMetadataList = new ArrayList<>();
+    for (int i = 0; i < deviceNum; i++) {
+      List<ChunkMetadata> chunkMetadataList = new ArrayList<>();
+      for (int j = 0; j < measurementNum; j++) {
+        chunkMetadataList.add(
+            new ChunkMetadata(
+                "s" + j,
+                TSDataType.INT32,
+                TSEncoding.PLAIN,
+                CompressionType.UNCOMPRESSED,
+                0,
+                null));
+      }
+      IDeviceID deviceID = Factory.DEFAULT_FACTORY.create("root.db1.d" + i);
+      chunkGroupMetadataList.add(new ChunkGroupMetadata(deviceID, 
chunkMetadataList));
+    }
+
+    int repeat = 100;
+    long start = System.currentTimeMillis();
+    for (int i = 0; i < repeat; i++) {
+      long sortStart = System.nanoTime();
+      Iterable<Pair<Path, List<IChunkMetadata>>> pairs =
+          sortChunkMetadata(chunkGroupMetadataList, null, null);
+      long sortEnd = System.nanoTime();
+      for (Pair<Path, List<IChunkMetadata>> pair : pairs) {}
+
+      long iterationEnd = System.nanoTime();
+      System.out.println(
+          "Sort " + (sortEnd - sortStart) + ", iteration" + (iterationEnd - 
sortEnd));
+    }
+    System.out.println(System.currentTimeMillis() - start);
+  }
+
+  public static void main(String[] args) throws IOException {
+    int deviceNum = 100;
+    int measurementNum = 10000;
+
+    List<ChunkGroupMetadata> chunkGroupMetadataList = new ArrayList<>();
+    for (int i = 0; i < deviceNum; i++) {
+      List<ChunkMetadata> chunkMetadataList = new ArrayList<>();
+      for (int j = 0; j < measurementNum; j++) {
+        chunkMetadataList.add(
+            new ChunkMetadata(
+                "s" + j,
+                TSDataType.INT64,
+                TSEncoding.PLAIN,
+                CompressionType.UNCOMPRESSED,
+                0,
+                Statistics.getStatsByType(TSDataType.INT64)));
+      }
+      IDeviceID deviceID = Factory.DEFAULT_FACTORY.create("root.db1.d" + i);
+      chunkGroupMetadataList.add(new ChunkGroupMetadata(deviceID, 
chunkMetadataList));
+    }
+
+    int repeat = 100;
+    long start = System.currentTimeMillis();
+    for (int i = 0; i < repeat; i++) {
+      TSMIterator tsmIterator = new TSMIterator(chunkGroupMetadataList);
+      while (tsmIterator.hasNext()) {
+        tsmIterator.next();
       }
     }
-    return sortedChunkMetadataList;
+    System.out.println(System.currentTimeMillis() - start);
   }
 }

(tsfile) 01/02: multiple optimizations

Reply via email to