This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 8157be98e3 [common] Using a faster deserialization method in
RoaringBitmap32 (#4765)
8157be98e3 is described below
commit 8157be98e3e43e7fa95b3f52c3645823b7b3a569
Author: Tan-JiaLiang <[email protected]>
AuthorDate: Sat Dec 28 17:38:25 2024 +0800
[common] Using a faster deserialization method in RoaringBitmap32 (#4765)
---
.../benchmark/bitmap/RoaringBitmapBenchmark.java | 111 +++++++++++++++++++++
.../org/apache/paimon/utils/RoaringBitmap32.java | 24 ++---
.../deletionvectors/BitmapDeletionVector.java | 10 +-
.../paimon/deletionvectors/DeletionVector.java | 27 +++--
.../compact/aggregate/FieldRoaringBitmap32Agg.java | 5 +-
5 files changed, 144 insertions(+), 33 deletions(-)
diff --git
a/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/bitmap/RoaringBitmapBenchmark.java
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/bitmap/RoaringBitmapBenchmark.java
new file mode 100644
index 0000000000..4b989e96e5
--- /dev/null
+++
b/paimon-benchmark/paimon-micro-benchmarks/src/test/java/org/apache/paimon/benchmark/bitmap/RoaringBitmapBenchmark.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.benchmark.bitmap;
+
+import org.apache.paimon.benchmark.Benchmark;
+import org.apache.paimon.fs.local.LocalFileIO;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.roaringbitmap.RoaringBitmap;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.file.Path;
+import java.util.Random;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/** Benchmark for {@link RoaringBitmap}. */
+public class RoaringBitmapBenchmark {
+
+ public static final int ROW_COUNT = 10000000;
+
+ @TempDir Path tempDir;
+
+ @Test
+ public void testDeserialize() throws Exception {
+ Random random = new Random();
+ RoaringBitmap bitmap = new RoaringBitmap();
+ for (int i = 0; i < ROW_COUNT; i++) {
+ if (random.nextBoolean()) {
+ bitmap.add(i);
+ }
+ }
+
+ File file = new File(tempDir.toFile(),
"bitmap32-deserialize-benchmark");
+ assertThat(file.createNewFile()).isTrue();
+ try (FileOutputStream output = new FileOutputStream(file);
+ DataOutputStream dos = new DataOutputStream(output)) {
+ bitmap.serialize(dos);
+ }
+
+ Benchmark benchmark =
+ new Benchmark("bitmap32-deserialize-benchmark", 100)
+ .setNumWarmupIters(1)
+ .setOutputPerIteration(true);
+
+ benchmark.addCase(
+ "deserialize(DataInput)",
+ 10,
+ () -> {
+ try (LocalFileIO.LocalSeekableInputStream seekableStream =
+ new
LocalFileIO.LocalSeekableInputStream(file);
+ DataInputStream input = new
DataInputStream(seekableStream)) {
+ new RoaringBitmap().deserialize(input);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+
+ benchmark.addCase(
+ "deserialize(DataInput, byte[])",
+ 10,
+ () -> {
+ try (LocalFileIO.LocalSeekableInputStream seekableStream =
+ new
LocalFileIO.LocalSeekableInputStream(file);
+ DataInputStream input = new
DataInputStream(seekableStream)) {
+ new RoaringBitmap().deserialize(input, null);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+
+ benchmark.addCase(
+ "deserialize(ByteBuffer)",
+ 10,
+ () -> {
+ try (LocalFileIO.LocalSeekableInputStream seekableStream =
+ new
LocalFileIO.LocalSeekableInputStream(file);
+ DataInputStream input = new
DataInputStream(seekableStream)) {
+ byte[] bytes = new byte[(int) file.length()];
+ input.readFully(bytes);
+ new
RoaringBitmap().deserialize(ByteBuffer.wrap(bytes));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+
+ benchmark.run();
+ }
+}
diff --git
a/paimon-common/src/main/java/org/apache/paimon/utils/RoaringBitmap32.java
b/paimon-common/src/main/java/org/apache/paimon/utils/RoaringBitmap32.java
index 5f352f61cd..6496b7003e 100644
--- a/paimon-common/src/main/java/org/apache/paimon/utils/RoaringBitmap32.java
+++ b/paimon-common/src/main/java/org/apache/paimon/utils/RoaringBitmap32.java
@@ -85,8 +85,19 @@ public class RoaringBitmap32 {
roaringBitmap.serialize(out);
}
+ public byte[] serialize() {
+ roaringBitmap.runOptimize();
+ ByteBuffer buffer =
ByteBuffer.allocate(roaringBitmap.serializedSizeInBytes());
+ roaringBitmap.serialize(buffer);
+ return buffer.array();
+ }
+
public void deserialize(DataInput in) throws IOException {
- roaringBitmap.deserialize(in);
+ roaringBitmap.deserialize(in, null);
+ }
+
+ public void deserialize(ByteBuffer buffer) throws IOException {
+ roaringBitmap.deserialize(buffer);
}
@Override
@@ -105,17 +116,6 @@ public class RoaringBitmap32 {
roaringBitmap.clear();
}
- public byte[] serialize() {
- roaringBitmap.runOptimize();
- ByteBuffer buffer =
ByteBuffer.allocate(roaringBitmap.serializedSizeInBytes());
- roaringBitmap.serialize(buffer);
- return buffer.array();
- }
-
- public void deserialize(byte[] rbmBytes) throws IOException {
- roaringBitmap.deserialize(ByteBuffer.wrap(rbmBytes));
- }
-
public void flip(final long rangeStart, final long rangeEnd) {
roaringBitmap.flip(rangeStart, rangeEnd);
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java
b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java
index 51ae729c21..55e0a975e3 100644
---
a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java
+++
b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java
@@ -21,9 +21,9 @@ package org.apache.paimon.deletionvectors;
import org.apache.paimon.utils.RoaringBitmap32;
import java.io.ByteArrayOutputStream;
-import java.io.DataInput;
import java.io.DataOutputStream;
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.Objects;
/**
@@ -93,10 +93,10 @@ public class BitmapDeletionVector implements DeletionVector
{
}
}
- public static DeletionVector deserializeFromDataInput(DataInput bis)
throws IOException {
- RoaringBitmap32 roaringBitmap = new RoaringBitmap32();
- roaringBitmap.deserialize(bis);
- return new BitmapDeletionVector(roaringBitmap);
+ public static DeletionVector deserializeFromByteBuffer(ByteBuffer buffer)
throws IOException {
+ RoaringBitmap32 bitmap = new RoaringBitmap32();
+ bitmap.deserialize(buffer);
+ return new BitmapDeletionVector(bitmap);
}
private void checkPosition(long position) {
diff --git
a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java
b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java
index 8feeac63f7..967e80b0ba 100644
---
a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java
+++
b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java
@@ -26,9 +26,9 @@ import org.apache.paimon.table.source.DeletionFile;
import javax.annotation.Nullable;
-import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.List;
import java.util.Optional;
@@ -99,11 +99,11 @@ public interface DeletionVector {
* @return A DeletionVector instance that represents the deserialized data.
*/
static DeletionVector deserializeFromBytes(byte[] bytes) {
- try (ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
- DataInputStream dis = new DataInputStream(bis)) {
- int magicNum = dis.readInt();
+ try {
+ ByteBuffer buffer = ByteBuffer.wrap(bytes);
+ int magicNum = buffer.getInt();
if (magicNum == BitmapDeletionVector.MAGIC_NUMBER) {
- return BitmapDeletionVector.deserializeFromDataInput(dis);
+ return BitmapDeletionVector.deserializeFromByteBuffer(buffer);
} else {
throw new RuntimeException("Invalid magic number: " +
magicNum);
}
@@ -117,22 +117,21 @@ public interface DeletionVector {
try (SeekableInputStream input = fileIO.newInputStream(path)) {
input.seek(deletionFile.offset());
DataInputStream dis = new DataInputStream(input);
- int actualLength = dis.readInt();
- if (actualLength != deletionFile.length()) {
+ int actualSize = dis.readInt();
+ if (actualSize != deletionFile.length()) {
throw new RuntimeException(
"Size not match, actual size: "
- + actualLength
+ + actualSize
+ ", expert size: "
+ deletionFile.length()
+ ", file path: "
+ path);
}
- int magicNum = dis.readInt();
- if (magicNum == BitmapDeletionVector.MAGIC_NUMBER) {
- return BitmapDeletionVector.deserializeFromDataInput(dis);
- } else {
- throw new RuntimeException("Invalid magic number: " +
magicNum);
- }
+
+ // read DeletionVector bytes
+ byte[] bytes = new byte[actualSize];
+ dis.readFully(bytes);
+ return deserializeFromBytes(bytes);
}
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldRoaringBitmap32Agg.java
b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldRoaringBitmap32Agg.java
index ef7ac20e83..436a88a3cc 100644
---
a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldRoaringBitmap32Agg.java
+++
b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldRoaringBitmap32Agg.java
@@ -22,6 +22,7 @@ import org.apache.paimon.types.VarBinaryType;
import org.apache.paimon.utils.RoaringBitmap32;
import java.io.IOException;
+import java.nio.ByteBuffer;
/** roaring bitmap aggregate a field of a row. */
public class FieldRoaringBitmap32Agg extends FieldAggregator {
@@ -43,8 +44,8 @@ public class FieldRoaringBitmap32Agg extends FieldAggregator {
}
try {
- roaringBitmapAcc.deserialize((byte[]) accumulator);
- roaringBitmapInput.deserialize((byte[]) inputField);
+ roaringBitmapAcc.deserialize(ByteBuffer.wrap((byte[])
accumulator));
+ roaringBitmapInput.deserialize(ByteBuffer.wrap((byte[])
inputField));
roaringBitmapAcc.or(roaringBitmapInput);
return roaringBitmapAcc.serialize();
} catch (IOException e) {