This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 11f0c1d712 [format] Introduce null blob (#7125)
11f0c1d712 is described below
commit 11f0c1d712ea73bf887015d987a9ffaa5cd7e9c2
Author: YeJunHao <[email protected]>
AuthorDate: Tue Jan 27 19:12:37 2026 +0800
[format] Introduce null blob (#7125)
---
.../apache/paimon/format/blob/BlobFileMeta.java | 12 ++++++++--
.../paimon/format/blob/BlobFormatReader.java | 14 +++++++----
.../paimon/format/blob/BlobFormatWriter.java | 5 +++-
.../paimon/format/blob/BlobFileFormatTest.java | 28 +++++++++++++++-------
4 files changed, 42 insertions(+), 17 deletions(-)
diff --git
a/paimon-format/src/main/java/org/apache/paimon/format/blob/BlobFileMeta.java
b/paimon-format/src/main/java/org/apache/paimon/format/blob/BlobFileMeta.java
index 6fde55d485..02579b9597 100644
---
a/paimon-format/src/main/java/org/apache/paimon/format/blob/BlobFileMeta.java
+++
b/paimon-format/src/main/java/org/apache/paimon/format/blob/BlobFileMeta.java
@@ -55,8 +55,12 @@ public class BlobFileMeta {
long[] blobOffsets = new long[blobLengths.length];
long offset = 0;
for (int i = 0; i < blobLengths.length; i++) {
- blobOffsets[i] = offset;
- offset += blobLengths[i];
+ if (blobLengths[i] == -1) {
+ blobOffsets[i] = -1;
+ } else {
+ blobOffsets[i] = offset;
+ offset += blobLengths[i];
+ }
}
int[] returnedPositions = null;
@@ -81,6 +85,10 @@ public class BlobFileMeta {
this.blobOffsets = blobOffsets;
}
+ public boolean isNull(int i) {
+ return blobLengths[i] == -1;
+ }
+
public long blobLength(int i) {
return blobLengths[i];
}
diff --git
a/paimon-format/src/main/java/org/apache/paimon/format/blob/BlobFormatReader.java
b/paimon-format/src/main/java/org/apache/paimon/format/blob/BlobFormatReader.java
index b06fb05d96..2ba0169cdc 100644
---
a/paimon-format/src/main/java/org/apache/paimon/format/blob/BlobFormatReader.java
+++
b/paimon-format/src/main/java/org/apache/paimon/format/blob/BlobFormatReader.java
@@ -83,12 +83,16 @@ public class BlobFormatReader implements
FileRecordReader<InternalRow> {
}
Blob blob;
- long offset = fileMeta.blobOffset(currentPosition) + 4;
- long length = fileMeta.blobLength(currentPosition) - 16;
- if (in != null) {
- blob = Blob.fromData(readInlineBlob(in, offset, length));
+ if (fileMeta.isNull(currentPosition)) {
+ blob = null;
} else {
- blob = Blob.fromFile(fileIO, filePathString, offset,
length);
+ long offset = fileMeta.blobOffset(currentPosition) + 4;
+ long length = fileMeta.blobLength(currentPosition) - 16;
+ if (in != null) {
+ blob = Blob.fromData(readInlineBlob(in, offset,
length));
+ } else {
+ blob = Blob.fromFile(fileIO, filePathString, offset,
length);
+ }
}
currentPosition++;
return GenericRow.of(blob);
diff --git
a/paimon-format/src/main/java/org/apache/paimon/format/blob/BlobFormatWriter.java
b/paimon-format/src/main/java/org/apache/paimon/format/blob/BlobFormatWriter.java
index e7c84741f7..aa93708ff8 100644
---
a/paimon-format/src/main/java/org/apache/paimon/format/blob/BlobFormatWriter.java
+++
b/paimon-format/src/main/java/org/apache/paimon/format/blob/BlobFormatWriter.java
@@ -80,7 +80,10 @@ public class BlobFormatWriter implements
FileAwareFormatWriter {
@Override
public void addElement(InternalRow element) throws IOException {
checkArgument(element.getFieldCount() == 1, "BlobFormatWriter only
support one field.");
- checkArgument(!element.isNullAt(0), "BlobFormatWriter only support
non-null blob.");
+ if (element.isNullAt(0)) {
+ lengths.add(-1L);
+ return;
+ }
Blob blob = element.getBlob(0);
long previousPos = out.getPos();
diff --git
a/paimon-format/src/test/java/org/apache/paimon/format/blob/BlobFileFormatTest.java
b/paimon-format/src/test/java/org/apache/paimon/format/blob/BlobFileFormatTest.java
index feaaacc5d1..0132359c01 100644
---
a/paimon-format/src/test/java/org/apache/paimon/format/blob/BlobFileFormatTest.java
+++
b/paimon-format/src/test/java/org/apache/paimon/format/blob/BlobFileFormatTest.java
@@ -78,11 +78,17 @@ public class BlobFileFormatTest {
// write
FormatWriterFactory writerFactory =
format.createWriterFactory(rowType);
- List<byte[]> blobs = Arrays.asList("hello".getBytes(),
"world".getBytes());
+ List<byte[]> blobs =
+ Arrays.asList("hello".getBytes(), null, "world".getBytes(),
new byte[0]);
try (PositionOutputStream out = fileIO.newOutputStream(file, false)) {
FormatWriter formatWriter = writerFactory.create(out, null);
for (byte[] bytes : blobs) {
- formatWriter.addElement(GenericRow.of(new BlobData(bytes)));
+ if (bytes == null) {
+ formatWriter.addElement(GenericRow.of((Object) null));
+ continue;
+ } else {
+ formatWriter.addElement(GenericRow.of(new
BlobData(bytes)));
+ }
}
formatWriter.close();
}
@@ -96,13 +102,17 @@ public class BlobFileFormatTest {
.createReader(context)
.forEachRemaining(
row -> {
- Blob blob = row.getBlob(0);
- if (blobAsDescriptor) {
- assertThat(blob).isInstanceOf(BlobRef.class);
+ if (row.isNullAt(0)) {
+ result.add(null);
} else {
- assertThat(blob).isInstanceOf(BlobData.class);
+ Blob blob = row.getBlob(0);
+ if (blobAsDescriptor) {
+
assertThat(blob).isInstanceOf(BlobRef.class);
+ } else {
+
assertThat(blob).isInstanceOf(BlobData.class);
+ }
+ result.add(blob.toData());
}
- result.add(blob.toData());
});
// assert
@@ -110,7 +120,7 @@ public class BlobFileFormatTest {
// read with selection
RoaringBitmap32 selection = new RoaringBitmap32();
- selection.add(1);
+ selection.add(2);
context = new FormatReaderContext(fileIO, file,
fileIO.getFileSize(file), selection);
result.clear();
readerFactory
@@ -118,6 +128,6 @@ public class BlobFileFormatTest {
.forEachRemaining(row -> result.add(row.getBlob(0).toData()));
// assert
- assertThat(result).containsOnly(blobs.get(1));
+ assertThat(result).containsOnly(blobs.get(2));
}
}