This is an automated email from the ASF dual-hosted git repository.
etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 9c1dd3b3a5 Core: Wrong reported length of encrypted Puffin files
(#14645)
9c1dd3b3a5 is described below
commit 9c1dd3b3a51aded4a6a079dfd49d0f94ec88f3cd
Author: Adam Szita <[email protected]>
AuthorDate: Mon Nov 24 15:36:13 2025 +0100
Core: Wrong reported length of encrypted Puffin files (#14645)
Manifest files made for puffin files track the
length of new files as PuffinWriter#length().
The underlying PositionOutputStream provides this
as getPos(), but for encrypted files this is not
the true file length, rather the unencrypted
content length.
---
.../org/apache/iceberg/puffin/PuffinWriter.java | 7 ++--
.../apache/iceberg/puffin/TestPuffinWriter.java | 43 ++++++++++++++++++++++
2 files changed, 47 insertions(+), 3 deletions(-)
diff --git a/core/src/main/java/org/apache/iceberg/puffin/PuffinWriter.java
b/core/src/main/java/org/apache/iceberg/puffin/PuffinWriter.java
index cd44dab03b..eb76ec8548 100644
--- a/core/src/main/java/org/apache/iceberg/puffin/PuffinWriter.java
+++ b/core/src/main/java/org/apache/iceberg/puffin/PuffinWriter.java
@@ -124,8 +124,6 @@ public class PuffinWriter implements FileAppender<Blob> {
if (!finished) {
finish();
}
-
- outputStream.close();
}
private void writeHeaderIfNeeded() throws IOException {
@@ -144,7 +142,10 @@ public class PuffinWriter implements FileAppender<Blob> {
long footerOffset = outputStream.getPos();
writeFooter();
this.footerSize = Optional.of(Math.toIntExact(outputStream.getPos() -
footerOffset));
- this.fileSize = Optional.of(outputStream.getPos());
+ outputStream.close();
+ // some streams (e.g. AesGcmOutputStream) may only write the last bytes
upon
+ // having close() invoked
+ this.fileSize = Optional.of(outputStream.storedLength());
this.finished = true;
}
diff --git a/core/src/test/java/org/apache/iceberg/puffin/TestPuffinWriter.java
b/core/src/test/java/org/apache/iceberg/puffin/TestPuffinWriter.java
index 2a11849871..337fff817a 100644
--- a/core/src/test/java/org/apache/iceberg/puffin/TestPuffinWriter.java
+++ b/core/src/test/java/org/apache/iceberg/puffin/TestPuffinWriter.java
@@ -26,13 +26,25 @@ import static
org.apache.iceberg.puffin.PuffinFormatTestUtil.readTestResource;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import java.io.File;
import java.nio.ByteBuffer;
+import java.nio.file.Path;
+import java.util.Random;
+import org.apache.iceberg.Files;
+import org.apache.iceberg.encryption.AesGcmOutputFile;
import org.apache.iceberg.inmemory.InMemoryOutputFile;
+import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
public class TestPuffinWriter {
+
+ @TempDir private Path temp;
+
@Test
public void testEmptyFooterCompressed() {
InMemoryOutputFile outputFile = new InMemoryOutputFile();
@@ -86,6 +98,37 @@ public class TestPuffinWriter {
testWriteMetric(ZSTD, "v1/sample-metric-data-compressed-zstd.bin");
}
+ @ParameterizedTest
+ @CsvSource({"true, 158", "false, 122"})
+ public void testFileSizeCalculation(boolean isEncrypted, long expectedSize)
throws Exception {
+ final OutputFile outputFile;
+
+ if (isEncrypted) {
+ File testFile = temp.resolve("test" + System.nanoTime()).toFile();
+ Random random = new Random();
+ byte[] key = new byte[16];
+ random.nextBytes(key);
+ byte[] aadPrefix = new byte[16];
+ random.nextBytes(aadPrefix);
+ outputFile = new AesGcmOutputFile(Files.localOutput(testFile), key,
aadPrefix);
+ } else {
+ outputFile = new InMemoryOutputFile();
+ }
+
+ PuffinWriter writer = Puffin.write(outputFile).build();
+ writer.write(
+ new Blob(
+ "blob",
+ ImmutableList.of(1),
+ 2,
+ 1,
+ ByteBuffer.wrap("blob".getBytes()),
+ null,
+ ImmutableMap.of()));
+ writer.close();
+ assertThat(writer.length()).isEqualTo(expectedSize);
+ }
+
private void testWriteMetric(PuffinCompressionCodec compression, String
expectedResource)
throws Exception {
InMemoryOutputFile outputFile = new InMemoryOutputFile();