This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 8a8a9532cc [core] Correct bitmap types support in writer (#5309)
8a8a9532cc is described below
commit 8a8a9532ccf25c4690fed64e6e489d1a97b35d8a
Author: Jingsong Lee <[email protected]>
AuthorDate: Tue Mar 18 20:25:52 2025 +0800
[core] Correct bitmap types support in writer (#5309)
---
docs/content/concepts/spec/fileindex.md | 60 ----
.../apache/paimon/fileindex/FileIndexWriter.java | 1 +
.../paimon/fileindex/bitmap/BitmapFileIndex.java | 55 +++-
.../fileindex/bitmap/BitmapFileIndexMeta.java | 331 ++++++---------------
.../fileindex/bitmap/BitmapFileIndexMetaV2.java | 2 +-
.../paimon/fileindex/bitmap/BitmapTypeVisitor.java | 167 +++++++++++
.../fileindex/bitmapindex/BitmapFileIndexTest.java | 37 +++
.../org/apache/paimon/io/DataFileIndexWriter.java | 10 +-
8 files changed, 348 insertions(+), 315 deletions(-)
diff --git a/docs/content/concepts/spec/fileindex.md
b/docs/content/concepts/spec/fileindex.md
index 230c01939c..117d90b206 100644
--- a/docs/content/concepts/spec/fileindex.md
+++ b/docs/content/concepts/spec/fileindex.md
@@ -261,34 +261,6 @@ Bitmap only support the following data type:
<td><code>BooleanType</code></td>
<td>true</td>
</tr>
- <tr>
- <td><code>DecimalType(precision, scale)</code></td>
- <td>false</td>
- </tr>
- <tr>
- <td><code>FloatType</code></td>
- <td>Not recommended</td>
- </tr>
- <tr>
- <td><code>DoubleType</code></td>
- <td>Not recommended</td>
- </tr>
- <tr>
- <td><code>VarBinaryType</code>, <code>BinaryType</code></td>
- <td>false</td>
- </tr>
- <tr>
- <td><code>RowType</code></td>
- <td>false</td>
- </tr>
- <tr>
- <td><code>MapType</code></td>
- <td>false</td>
- </tr>
- <tr>
- <td><code>ArrayType</code></td>
- <td>false</td>
- </tr>
</tbody>
</table>
@@ -384,37 +356,5 @@ BSI only support the following data type:
<td><code>DecimalType(precision, scale)</code></td>
<td>true</td>
</tr>
- <tr>
- <td><code>FloatType</code></td>
- <td>false</td>
- </tr>
- <tr>
- <td><code>DoubleType</code></td>
- <td>false</td>
- </tr>
- <tr>
- <td><code>String</code></td>
- <td>false</td>
- </tr>
- <tr>
- <td><code>VarBinaryType</code>, <code>BinaryType</code></td>
- <td>false</td>
- </tr>
- <tr>
- <td><code>RowType</code></td>
- <td>false</td>
- </tr>
- <tr>
- <td><code>MapType</code></td>
- <td>false</td>
- </tr>
- <tr>
- <td><code>ArrayType</code></td>
- <td>false</td>
- </tr>
- <tr>
- <td><code>BooleanType</code></td>
- <td>false</td>
- </tr>
</tbody>
</table>
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexWriter.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexWriter.java
index cfb05d20d1..ede678353a 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexWriter.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/FileIndexWriter.java
@@ -28,6 +28,7 @@ public abstract class FileIndexWriter {
write(key);
}
+ /** The key object may be reused, if saved in memory, please be sure to
manually copy it. */
public abstract void write(Object key);
public abstract byte[] serializedBytes();
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndex.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndex.java
index 38b9e13054..3678f05ded 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndex.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndex.java
@@ -18,6 +18,7 @@
package org.apache.paimon.fileindex.bitmap;
+import org.apache.paimon.data.BinaryString;
import org.apache.paimon.data.Timestamp;
import org.apache.paimon.fileindex.FileIndexReader;
import org.apache.paimon.fileindex.FileIndexResult;
@@ -27,7 +28,6 @@ import org.apache.paimon.fs.SeekableInputStream;
import org.apache.paimon.options.Options;
import org.apache.paimon.predicate.FieldRef;
import org.apache.paimon.types.DataType;
-import org.apache.paimon.types.DataTypeDefaultVisitor;
import org.apache.paimon.types.LocalZonedTimestampType;
import org.apache.paimon.types.TimestampType;
import org.apache.paimon.utils.RoaringBitmap32;
@@ -315,7 +315,53 @@ public class BitmapFileIndex implements FileIndexer {
// Currently, it is mainly used to convert timestamps to long
public static Function<Object, Object> getValueMapper(DataType dataType) {
return dataType.accept(
- new DataTypeDefaultVisitor<Function<Object, Object>>() {
+ new BitmapTypeVisitor<Function<Object, Object>>() {
+
+ @Override
+ public Function<Object, Object> visitBinaryString() {
+ return o -> {
+ if (o instanceof BinaryString) {
+ return ((BinaryString) o).copy();
+ }
+ return o;
+ };
+ }
+
+ @Override
+ public Function<Object, Object> visitByte() {
+ return Function.identity();
+ }
+
+ @Override
+ public Function<Object, Object> visitShort() {
+ return Function.identity();
+ }
+
+ @Override
+ public Function<Object, Object> visitInt() {
+ return Function.identity();
+ }
+
+ @Override
+ public Function<Object, Object> visitLong() {
+ return Function.identity();
+ }
+
+ @Override
+ public Function<Object, Object> visitFloat() {
+ return Function.identity();
+ }
+
+ @Override
+ public Function<Object, Object> visitDouble() {
+ return Function.identity();
+ }
+
+ @Override
+ public Function<Object, Object> visitBoolean() {
+ return Function.identity();
+ }
+
@Override
public Function<Object, Object> visit(TimestampType
timestampType) {
return
getTimeStampMapper(timestampType.getPrecision());
@@ -327,11 +373,6 @@ public class BitmapFileIndex implements FileIndexer {
return
getTimeStampMapper(localZonedTimestampType.getPrecision());
}
- @Override
- protected Function<Object, Object> defaultMethod(DataType
dataType) {
- return Function.identity();
- }
-
private Function<Object, Object> getTimeStampMapper(int
precision) {
return o -> {
if (o == null) {
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexMeta.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexMeta.java
index c631911885..0a947324c2 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexMeta.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexMeta.java
@@ -21,29 +21,7 @@ package org.apache.paimon.fileindex.bitmap;
import org.apache.paimon.data.BinaryString;
import org.apache.paimon.fs.SeekableInputStream;
import org.apache.paimon.options.Options;
-import org.apache.paimon.types.ArrayType;
-import org.apache.paimon.types.BigIntType;
-import org.apache.paimon.types.BinaryType;
-import org.apache.paimon.types.BooleanType;
-import org.apache.paimon.types.CharType;
import org.apache.paimon.types.DataType;
-import org.apache.paimon.types.DataTypeVisitor;
-import org.apache.paimon.types.DateType;
-import org.apache.paimon.types.DecimalType;
-import org.apache.paimon.types.DoubleType;
-import org.apache.paimon.types.FloatType;
-import org.apache.paimon.types.IntType;
-import org.apache.paimon.types.LocalZonedTimestampType;
-import org.apache.paimon.types.MapType;
-import org.apache.paimon.types.MultisetType;
-import org.apache.paimon.types.RowType;
-import org.apache.paimon.types.SmallIntType;
-import org.apache.paimon.types.TimeType;
-import org.apache.paimon.types.TimestampType;
-import org.apache.paimon.types.TinyIntType;
-import org.apache.paimon.types.VarBinaryType;
-import org.apache.paimon.types.VarCharType;
-import org.apache.paimon.types.VariantType;
import java.io.BufferedInputStream;
import java.io.DataInput;
@@ -214,7 +192,7 @@ public class BitmapFileIndexMeta {
protected Function<Object, Integer> getSerializeSizeMeasure() {
return dataType.accept(
- new DataTypeVisitorAdapter<Function<Object, Integer>>() {
+ new BitmapTypeVisitor<Function<Object, Integer>>() {
@Override
public Function<Object, Integer> visitBinaryString() {
return o -> Integer.BYTES + ((BinaryString)
o).getSizeInBytes();
@@ -258,241 +236,112 @@ public class BitmapFileIndexMeta {
}
protected ThrowableConsumer getValueWriter(DataOutput out) {
- ThrowableConsumer valueWriter =
- dataType.accept(
- new DataTypeVisitorAdapter<ThrowableConsumer>() {
- @Override
- public ThrowableConsumer visitBinaryString() {
- return o -> {
- byte[] bytes = ((BinaryString)
o).toBytes();
- out.writeInt(bytes.length);
- out.write(bytes);
- };
- }
-
- @Override
- public ThrowableConsumer visitByte() {
- return o -> out.writeByte((byte) o);
- }
-
- @Override
- public ThrowableConsumer visitShort() {
- return o -> out.writeShort((short) o);
- }
-
- @Override
- public ThrowableConsumer visitInt() {
- return o -> out.writeInt((int) o);
- }
-
- @Override
- public ThrowableConsumer visitLong() {
- return o -> out.writeLong((long) o);
- }
-
- @Override
- public ThrowableConsumer visitFloat() {
- return o -> out.writeFloat((float) o);
- }
-
- @Override
- public ThrowableConsumer visitDouble() {
- return o -> out.writeDouble((double) o);
- }
-
- @Override
- public ThrowableConsumer visitBoolean() {
- return o -> out.writeBoolean((Boolean) o);
- }
- });
- return valueWriter;
- }
-
- protected ThrowableSupplier getValueReader(DataInput in) {
- ThrowableSupplier valueReader =
- dataType.accept(
- new DataTypeVisitorAdapter<ThrowableSupplier>() {
- @Override
- public ThrowableSupplier visitBinaryString() {
- return () -> {
- int length = in.readInt();
- byte[] bytes = new byte[length];
- in.readFully(bytes);
- return BinaryString.fromBytes(bytes);
- };
- }
-
- @Override
- public ThrowableSupplier visitByte() {
- return in::readByte;
- }
-
- @Override
- public ThrowableSupplier visitShort() {
- return in::readShort;
- }
-
- @Override
- public ThrowableSupplier visitInt() {
- return in::readInt;
- }
-
- @Override
- public ThrowableSupplier visitLong() {
- return in::readLong;
- }
-
- @Override
- public ThrowableSupplier visitFloat() {
- return in::readFloat;
- }
-
- @Override
- public ThrowableSupplier visitDouble() {
- return in::readDouble;
- }
-
- @Override
- public ThrowableSupplier visitBoolean() {
- return in::readBoolean;
- }
- });
- return valueReader;
- }
-
- /** functional interface. */
- public interface ThrowableConsumer {
- void accept(Object o) throws Exception;
- }
-
- /** functional interface. */
- public interface ThrowableSupplier {
- Object get() throws Exception;
- }
-
- /** simplified visitor. */
- public abstract static class DataTypeVisitorAdapter<R> implements
DataTypeVisitor<R> {
-
- public abstract R visitBinaryString();
-
- public abstract R visitByte();
-
- public abstract R visitShort();
-
- public abstract R visitInt();
-
- public abstract R visitLong();
-
- public abstract R visitFloat();
-
- public abstract R visitDouble();
-
- public abstract R visitBoolean();
-
- @Override
- public final R visit(CharType charType) {
- return visitBinaryString();
- }
-
- @Override
- public final R visit(VarCharType varCharType) {
- return visitBinaryString();
- }
-
- @Override
- public final R visit(BooleanType booleanType) {
- return visitBoolean();
- }
-
- @Override
- public final R visit(BinaryType binaryType) {
- throw new UnsupportedOperationException("Does not support type
binary");
- }
+ return dataType.accept(
+ new BitmapTypeVisitor<ThrowableConsumer>() {
+ @Override
+ public ThrowableConsumer visitBinaryString() {
+ return o -> {
+ byte[] bytes = ((BinaryString) o).toBytes();
+ out.writeInt(bytes.length);
+ out.write(bytes);
+ };
+ }
- @Override
- public final R visit(VarBinaryType varBinaryType) {
- throw new UnsupportedOperationException("Does not support type
binary");
- }
+ @Override
+ public ThrowableConsumer visitByte() {
+ return o -> out.writeByte((byte) o);
+ }
- @Override
- public final R visit(DecimalType decimalType) {
- throw new UnsupportedOperationException("Does not support
decimal");
- }
+ @Override
+ public ThrowableConsumer visitShort() {
+ return o -> out.writeShort((short) o);
+ }
- @Override
- public final R visit(TinyIntType tinyIntType) {
- return visitByte();
- }
+ @Override
+ public ThrowableConsumer visitInt() {
+ return o -> out.writeInt((int) o);
+ }
- @Override
- public final R visit(SmallIntType smallIntType) {
- return visitShort();
- }
+ @Override
+ public ThrowableConsumer visitLong() {
+ return o -> out.writeLong((long) o);
+ }
- @Override
- public final R visit(IntType intType) {
- return visitInt();
- }
+ @Override
+ public ThrowableConsumer visitFloat() {
+ return o -> out.writeFloat((float) o);
+ }
- @Override
- public final R visit(BigIntType bigIntType) {
- return visitLong();
- }
+ @Override
+ public ThrowableConsumer visitDouble() {
+ return o -> out.writeDouble((double) o);
+ }
- @Override
- public final R visit(FloatType floatType) {
- return visitFloat();
- }
+ @Override
+ public ThrowableConsumer visitBoolean() {
+ return o -> out.writeBoolean((Boolean) o);
+ }
+ });
+ }
- @Override
- public final R visit(DoubleType doubleType) {
- return visitDouble();
- }
+ protected ThrowableSupplier getValueReader(DataInput in) {
+ return dataType.accept(
+ new BitmapTypeVisitor<ThrowableSupplier>() {
+ @Override
+ public ThrowableSupplier visitBinaryString() {
+ return () -> {
+ int length = in.readInt();
+ byte[] bytes = new byte[length];
+ in.readFully(bytes);
+ return BinaryString.fromBytes(bytes);
+ };
+ }
- @Override
- public final R visit(DateType dateType) {
- return visitInt();
- }
+ @Override
+ public ThrowableSupplier visitByte() {
+ return in::readByte;
+ }
- @Override
- public final R visit(TimeType timeType) {
- return visitInt();
- }
+ @Override
+ public ThrowableSupplier visitShort() {
+ return in::readShort;
+ }
- @Override
- public final R visit(ArrayType arrayType) {
- throw new UnsupportedOperationException("Does not support type
array");
- }
+ @Override
+ public ThrowableSupplier visitInt() {
+ return in::readInt;
+ }
- @Override
- public final R visit(MultisetType multisetType) {
- throw new UnsupportedOperationException("Does not support type
mutiset");
- }
+ @Override
+ public ThrowableSupplier visitLong() {
+ return in::readLong;
+ }
- @Override
- public final R visit(TimestampType timestampType) {
- return visitLong();
- }
+ @Override
+ public ThrowableSupplier visitFloat() {
+ return in::readFloat;
+ }
- @Override
- public final R visit(LocalZonedTimestampType localZonedTimestampType) {
- return visitLong();
- }
+ @Override
+ public ThrowableSupplier visitDouble() {
+ return in::readDouble;
+ }
- @Override
- public final R visit(MapType mapType) {
- throw new UnsupportedOperationException("Does not support type
map");
- }
+ @Override
+ public ThrowableSupplier visitBoolean() {
+ return in::readBoolean;
+ }
+ });
+ }
- @Override
- public final R visit(RowType rowType) {
- throw new UnsupportedOperationException("Does not support type
row");
- }
+ /** functional interface. */
+ public interface ThrowableConsumer {
+ void accept(Object o) throws Exception;
+ }
- @Override
- public final R visit(VariantType rowType) {
- throw new UnsupportedOperationException("Does not support type
variant");
- }
+ /** functional interface. */
+ public interface ThrowableSupplier {
+ Object get() throws Exception;
}
/** Bitmap entry. */
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexMetaV2.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexMetaV2.java
index 41e0fb42a7..1b367adada 100644
---
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexMetaV2.java
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapFileIndexMetaV2.java
@@ -151,7 +151,7 @@ public class BitmapFileIndexMetaV2 extends
BitmapFileIndexMeta {
public static Comparator<Object> getComparator(DataType dataType) {
return dataType.accept(
- new DataTypeVisitorAdapter<Comparator<Object>>() {
+ new BitmapTypeVisitor<Comparator<Object>>() {
@Override
public Comparator<Object> visitBinaryString() {
return Comparator.comparing(o -> ((BinaryString) o));
diff --git
a/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapTypeVisitor.java
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapTypeVisitor.java
new file mode 100644
index 0000000000..fd3ea653d7
--- /dev/null
+++
b/paimon-common/src/main/java/org/apache/paimon/fileindex/bitmap/BitmapTypeVisitor.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.fileindex.bitmap;
+
+import org.apache.paimon.types.ArrayType;
+import org.apache.paimon.types.BigIntType;
+import org.apache.paimon.types.BinaryType;
+import org.apache.paimon.types.BooleanType;
+import org.apache.paimon.types.CharType;
+import org.apache.paimon.types.DataTypeVisitor;
+import org.apache.paimon.types.DateType;
+import org.apache.paimon.types.DecimalType;
+import org.apache.paimon.types.DoubleType;
+import org.apache.paimon.types.FloatType;
+import org.apache.paimon.types.IntType;
+import org.apache.paimon.types.LocalZonedTimestampType;
+import org.apache.paimon.types.MapType;
+import org.apache.paimon.types.MultisetType;
+import org.apache.paimon.types.RowType;
+import org.apache.paimon.types.SmallIntType;
+import org.apache.paimon.types.TimeType;
+import org.apache.paimon.types.TimestampType;
+import org.apache.paimon.types.TinyIntType;
+import org.apache.paimon.types.VarBinaryType;
+import org.apache.paimon.types.VarCharType;
+import org.apache.paimon.types.VariantType;
+
+/** Simplified visitor for bitmap index. */
+public abstract class BitmapTypeVisitor<R> implements DataTypeVisitor<R> {
+
+ public abstract R visitBinaryString();
+
+ public abstract R visitByte();
+
+ public abstract R visitShort();
+
+ public abstract R visitInt();
+
+ public abstract R visitLong();
+
+ public abstract R visitFloat();
+
+ public abstract R visitDouble();
+
+ public abstract R visitBoolean();
+
+ @Override
+ public final R visit(CharType charType) {
+ return visitBinaryString();
+ }
+
+ @Override
+ public final R visit(VarCharType varCharType) {
+ return visitBinaryString();
+ }
+
+ @Override
+ public final R visit(BooleanType booleanType) {
+ return visitBoolean();
+ }
+
+ @Override
+ public final R visit(BinaryType binaryType) {
+ throw new UnsupportedOperationException("Does not support type
binary");
+ }
+
+ @Override
+ public final R visit(VarBinaryType varBinaryType) {
+ throw new UnsupportedOperationException("Does not support type
binary");
+ }
+
+ @Override
+ public final R visit(DecimalType decimalType) {
+ throw new UnsupportedOperationException("Does not support decimal");
+ }
+
+ @Override
+ public final R visit(TinyIntType tinyIntType) {
+ return visitByte();
+ }
+
+ @Override
+ public final R visit(SmallIntType smallIntType) {
+ return visitShort();
+ }
+
+ @Override
+ public final R visit(IntType intType) {
+ return visitInt();
+ }
+
+ @Override
+ public final R visit(BigIntType bigIntType) {
+ return visitLong();
+ }
+
+ @Override
+ public final R visit(FloatType floatType) {
+ return visitFloat();
+ }
+
+ @Override
+ public final R visit(DoubleType doubleType) {
+ return visitDouble();
+ }
+
+ @Override
+ public final R visit(DateType dateType) {
+ return visitInt();
+ }
+
+ @Override
+ public final R visit(TimeType timeType) {
+ return visitInt();
+ }
+
+ @Override
+ public R visit(TimestampType timestampType) {
+ return visitLong();
+ }
+
+ @Override
+ public R visit(LocalZonedTimestampType localZonedTimestampType) {
+ return visitLong();
+ }
+
+ @Override
+ public final R visit(ArrayType arrayType) {
+ throw new UnsupportedOperationException("Does not support type array");
+ }
+
+ @Override
+ public final R visit(MultisetType multisetType) {
+ throw new UnsupportedOperationException("Does not support type
mutiset");
+ }
+
+ @Override
+ public final R visit(MapType mapType) {
+ throw new UnsupportedOperationException("Does not support type map");
+ }
+
+ @Override
+ public final R visit(RowType rowType) {
+ throw new UnsupportedOperationException("Does not support type row");
+ }
+
+ @Override
+ public final R visit(VariantType rowType) {
+ throw new UnsupportedOperationException("Does not support type
variant");
+ }
+}
diff --git
a/paimon-common/src/test/java/org/apache/paimon/fileindex/bitmapindex/BitmapFileIndexTest.java
b/paimon-common/src/test/java/org/apache/paimon/fileindex/bitmapindex/BitmapFileIndexTest.java
index 991c6cc885..a13e876c08 100644
---
a/paimon-common/src/test/java/org/apache/paimon/fileindex/bitmapindex/BitmapFileIndexTest.java
+++
b/paimon-common/src/test/java/org/apache/paimon/fileindex/bitmapindex/BitmapFileIndexTest.java
@@ -86,6 +86,7 @@ public class BitmapFileIndexTest {
testStringType(BitmapFileIndex.VERSION_1);
testBooleanType(BitmapFileIndex.VERSION_1);
testHighCardinality(BitmapFileIndex.VERSION_1, 1000000, 100000, null);
+ testStringTypeWithReusing(BitmapFileIndex.VERSION_1);
}
@Test
@@ -94,6 +95,7 @@ public class BitmapFileIndexTest {
testStringType(BitmapFileIndex.VERSION_2);
testBooleanType(BitmapFileIndex.VERSION_2);
testHighCardinality(BitmapFileIndex.VERSION_2, 1000000, 100000, null);
+ testStringTypeWithReusing(BitmapFileIndex.VERSION_2);
}
private FileIndexReader createTestReaderOnWriter(
@@ -240,4 +242,39 @@ public class BitmapFileIndexTest {
System.out.println("read null bitmap time: " +
(System.currentTimeMillis() - time3));
assert resultNullBm.equals(nullBm);
}
+
+ private void testStringTypeWithReusing(int version) throws Exception {
+ FieldRef fieldRef = new FieldRef(0, "", DataTypes.STRING());
+ BinaryString a = BinaryString.fromString("a");
+ BinaryString b = BinaryString.fromString("b");
+ BinaryString c = BinaryString.fromString("a");
+ FileIndexReader reader =
+ createTestReaderOnWriter(
+ version,
+ null,
+ DataTypes.STRING(),
+ writer -> {
+ writer.writeRecord(a);
+ writer.writeRecord(null);
+ a.pointTo(b.getSegments(), b.getOffset(),
b.getSizeInBytes());
+ writer.writeRecord(null);
+ writer.writeRecord(a);
+ writer.writeRecord(null);
+ a.pointTo(c.getSegments(), c.getOffset(),
c.getSizeInBytes());
+ writer.writeRecord(null);
+ });
+ assert ((BitmapIndexResult) reader.visitEqual(fieldRef, a))
+ .get()
+ .equals(RoaringBitmap32.bitmapOf(0));
+ assert ((BitmapIndexResult) reader.visitEqual(fieldRef, b))
+ .get()
+ .equals(RoaringBitmap32.bitmapOf(3));
+ assert ((BitmapIndexResult) reader.visitIsNull(fieldRef))
+ .get()
+ .equals(RoaringBitmap32.bitmapOf(1, 2, 4, 5));
+ assert ((BitmapIndexResult) reader.visitIn(fieldRef, Arrays.asList(a,
b)))
+ .get()
+ .equals(RoaringBitmap32.bitmapOf(0, 3));
+ assert !reader.visitEqual(fieldRef,
BinaryString.fromString("c")).remain();
+ }
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/io/DataFileIndexWriter.java
b/paimon-core/src/main/java/org/apache/paimon/io/DataFileIndexWriter.java
index 71ff6b7ba5..2ba0283b7b 100644
--- a/paimon-core/src/main/java/org/apache/paimon/io/DataFileIndexWriter.java
+++ b/paimon-core/src/main/java/org/apache/paimon/io/DataFileIndexWriter.java
@@ -146,9 +146,7 @@ public final class DataFileIndexWriter implements Closeable
{
}
public void write(InternalRow row) {
- indexMaintainers
- .values()
- .forEach(mapFileIndexMaintainer ->
mapFileIndexMaintainer.write(row));
+ indexMaintainers.values().forEach(index -> index.write(row));
}
@Override
@@ -313,19 +311,19 @@ public final class DataFileIndexWriter implements
Closeable {
InternalArray keyArray = internalMap.keyArray();
InternalArray valueArray = internalMap.valueArray();
- Set<String> writedKeys = new HashSet<>();
+ Set<String> writtenKeys = new HashSet<>();
for (int i = 0; i < keyArray.size(); i++) {
String key = keyArray.getString(i).toString();
org.apache.paimon.fileindex.FileIndexWriter writer =
indexWritersMap.getOrDefault(key, null);
if (writer != null) {
- writedKeys.add(key);
+ writtenKeys.add(key);
writer.writeRecord(valueElementGetter.getElementOrNull(valueArray, i));
}
}
for (Map.Entry<String, FileIndexWriter> writerEntry :
indexWritersMap.entrySet()) {
- if (!writedKeys.contains(writerEntry.getKey())) {
+ if (!writtenKeys.contains(writerEntry.getKey())) {
writerEntry.getValue().writeRecord(null);
}
}