This is an automated email from the ASF dual-hosted git repository.

jinsongzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/amoro.git


The following commit(s) were added to refs/heads/master by this push:
     new 08b5b0edb [AMORO-3645][Improvement]: Optimize target file size after 
self-optimizing (#3646)
08b5b0edb is described below

commit 08b5b0edbe91f6e0a466e2890921aa7dd35fb9fe
Author: cxxiii <[email protected]>
AuthorDate: Thu Jul 10 11:27:57 2025 +0800

    [AMORO-3645][Improvement]: Optimize target file size after self-optimizing 
(#3646)
    
    * fix the target file size after self-optimizing
    
    * correct the format
    
    * revise the implement of improvement
    
    * override targetSize method
    
    ---------
    
    Co-authored-by: Xu Bai <[email protected]>
---
 .../org/apache/amoro/optimizing/IcebergRewriteExecutor.java | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git 
a/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/IcebergRewriteExecutor.java
 
b/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/IcebergRewriteExecutor.java
index d17222172..10cbe1a26 100644
--- 
a/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/IcebergRewriteExecutor.java
+++ 
b/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/IcebergRewriteExecutor.java
@@ -23,6 +23,7 @@ import 
org.apache.amoro.io.writer.GenericIcebergPartitionedFanoutWriter;
 import org.apache.amoro.io.writer.IcebergFanoutPosDeleteWriter;
 import org.apache.amoro.table.MixedTable;
 import org.apache.amoro.utils.map.StructLikeCollections;
+import org.apache.iceberg.DataFile;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.data.GenericAppenderFactory;
@@ -35,6 +36,7 @@ import org.apache.iceberg.io.OutputFileFactory;
 import org.apache.iceberg.io.TaskWriter;
 import org.apache.iceberg.io.UnpartitionedWriter;
 
+import java.util.Arrays;
 import java.util.UUID;
 
 /** OptimizingExecutor for iceberg format. */
@@ -95,6 +97,17 @@ public class IcebergRewriteExecutor extends 
AbstractRewriteFilesExecutor {
     }
   }
 
+  @Override
+  protected long targetSize() {
+    long targetSize = super.targetSize();
+    long inputSize =
+        
Arrays.stream(input.rewrittenDataFiles()).mapToLong(DataFile::fileSizeInBytes).sum();
+    // When the input files’ total size is below targetSize, remove the output 
file size limit to
+    // avoid outputting multiple files.
+    // For more details, please refer to: 
https://github.com/apache/amoro/issues/3645
+    return inputSize < targetSize ? Long.MAX_VALUE : targetSize;
+  }
+
   private PartitionSpec fileSpec() {
     return table.asUnkeyedTable().specs().get(input.allFiles()[0].specId());
   }

Reply via email to