This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new cf2a39117e [core] File size should be extract from path if it is zero 
(#6541)
cf2a39117e is described below

commit cf2a39117e22670f031f057869ecf94c5d88f5ea
Author: YeJunHao <[email protected]>
AuthorDate: Fri Nov 7 15:49:20 2025 +0800

    [core] File size should be extract from path if it is zero (#6541)
---
 .../org/apache/paimon/iceberg/manifest/IcebergManifestFile.java   | 4 ++--
 .../main/java/org/apache/paimon/io/KeyValueDataFileWriter.java    | 2 +-
 .../src/main/java/org/apache/paimon/io/RowDataFileWriter.java     | 2 +-
 .../src/main/java/org/apache/paimon/io/SingleFileWriter.java      | 8 +++++++-
 .../src/main/java/org/apache/paimon/manifest/ManifestFile.java    | 2 +-
 .../test/scala/org/apache/paimon/spark/sql/LanceFormatTest.scala  | 6 ++++++
 6 files changed, 18 insertions(+), 6 deletions(-)

diff --git 
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
 
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
index 2e05b25a40..a717ca0da0 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
@@ -249,7 +249,7 @@ public class IcebergManifestFile extends 
ObjectsFile<IcebergManifestEntry> {
         }
 
         @Override
-        public IcebergManifestFileMeta result() {
+        public IcebergManifestFileMeta result() throws IOException {
             SimpleColStats[] stats = partitionStatsCollector.extract();
             List<IcebergPartitionSummary> partitionSummaries = new 
ArrayList<>();
             for (int i = 0; i < stats.length; i++) {
@@ -264,7 +264,7 @@ public class IcebergManifestFile extends 
ObjectsFile<IcebergManifestEntry> {
             }
             return new IcebergManifestFileMeta(
                     path.toString(),
-                    outputBytes,
+                    outputBytes(),
                     IcebergPartitionSpec.SPEC_ID,
                     content,
                     sequenceNumber,
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java 
b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java
index 0710e28cea..111f3dcf7e 100644
--- a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java
+++ b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java
@@ -145,7 +145,7 @@ public abstract class KeyValueDataFileWriter
             return null;
         }
 
-        long fileSize = outputBytes;
+        long fileSize = outputBytes();
         Pair<SimpleColStats[], SimpleColStats[]> keyValueStats =
                 fetchKeyValueStats(fieldStats(fileSize));
 
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/io/RowDataFileWriter.java 
b/paimon-core/src/main/java/org/apache/paimon/io/RowDataFileWriter.java
index c361de0c2c..2f2982ba85 100644
--- a/paimon-core/src/main/java/org/apache/paimon/io/RowDataFileWriter.java
+++ b/paimon-core/src/main/java/org/apache/paimon/io/RowDataFileWriter.java
@@ -97,7 +97,7 @@ public class RowDataFileWriter extends 
StatsCollectingSingleFileWriter<InternalR
 
     @Override
     public DataFileMeta result() throws IOException {
-        long fileSize = outputBytes;
+        long fileSize = outputBytes();
         Pair<List<String>, SimpleStats> statsPair =
                 statsArraySerializer.toBinary(fieldStats(fileSize));
         DataFileIndexWriter.FileIndexResult indexResult =
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/io/SingleFileWriter.java 
b/paimon-core/src/main/java/org/apache/paimon/io/SingleFileWriter.java
index 3c3b11d8f2..936f2409af 100644
--- a/paimon-core/src/main/java/org/apache/paimon/io/SingleFileWriter.java
+++ b/paimon-core/src/main/java/org/apache/paimon/io/SingleFileWriter.java
@@ -32,6 +32,8 @@ import org.apache.paimon.utils.IOUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.annotation.Nullable;
+
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.util.function.Function;
@@ -53,7 +55,7 @@ public abstract class SingleFileWriter<T, R> implements 
FileWriter<T, R> {
     private FormatWriter writer;
     private PositionOutputStream out;
 
-    protected long outputBytes;
+    @Nullable private Long outputBytes;
     private long recordCount;
     protected boolean closed;
 
@@ -198,4 +200,8 @@ public abstract class SingleFileWriter<T, R> implements 
FileWriter<T, R> {
             closed = true;
         }
     }
+
+    protected long outputBytes() throws IOException {
+        return outputBytes == null ? fileIO.getFileSize(path) : outputBytes;
+    }
 }
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java 
b/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
index 1a68b15dd5..e90a38bf6a 100644
--- a/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
+++ b/paimon-core/src/main/java/org/apache/paimon/manifest/ManifestFile.java
@@ -216,7 +216,7 @@ public class ManifestFile extends 
ObjectsFile<ManifestEntry> {
         public ManifestFileMeta result() throws IOException {
             return new ManifestFileMeta(
                     path.getName(),
-                    outputBytes,
+                    outputBytes(),
                     numAddedFiles,
                     numDeletedFiles,
                     
partitionStatsSerializer.toBinaryAllMode(partitionStatsCollector.extract()),
diff --git 
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/LanceFormatTest.scala
 
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/LanceFormatTest.scala
index 820f173901..667c2c85b9 100644
--- 
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/LanceFormatTest.scala
+++ 
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/LanceFormatTest.scala
@@ -34,6 +34,12 @@ class LanceFormatTest extends PaimonSparkTestBase {
       checkAnswer(
         sql("SELECT * FROM t LIMIT 10"),
         Seq(Row(1, "a", Array(90.5, 88.0)), Row(2, "b", Array(90.6, 88.1))))
+
+      assert(
+        sql("SELECT file_size_in_bytes FROM `t$files`")
+          .collect()
+          .map(s => s.get(0).asInstanceOf[Long])
+          .apply(0) > 0L)
     }
   }
 }

Reply via email to