This is an automated email from the ASF dual-hosted git repository.
jinsongzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/amoro.git
The following commit(s) were added to refs/heads/master by this push:
new 08b5b0edb [AMORO-3645][Improvement]: Optimize target file size after
self-optimizing (#3646)
08b5b0edb is described below
commit 08b5b0edbe91f6e0a466e2890921aa7dd35fb9fe
Author: cxxiii <[email protected]>
AuthorDate: Thu Jul 10 11:27:57 2025 +0800
[AMORO-3645][Improvement]: Optimize target file size after self-optimizing
(#3646)
* fix the target file size after self-optimizing
* correct the format
* revise the implement of improvement
* override targetSize method
---------
Co-authored-by: Xu Bai <[email protected]>
---
.../org/apache/amoro/optimizing/IcebergRewriteExecutor.java | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git
a/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/IcebergRewriteExecutor.java
b/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/IcebergRewriteExecutor.java
index d17222172..10cbe1a26 100644
---
a/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/IcebergRewriteExecutor.java
+++
b/amoro-format-iceberg/src/main/java/org/apache/amoro/optimizing/IcebergRewriteExecutor.java
@@ -23,6 +23,7 @@ import
org.apache.amoro.io.writer.GenericIcebergPartitionedFanoutWriter;
import org.apache.amoro.io.writer.IcebergFanoutPosDeleteWriter;
import org.apache.amoro.table.MixedTable;
import org.apache.amoro.utils.map.StructLikeCollections;
+import org.apache.iceberg.DataFile;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.data.GenericAppenderFactory;
@@ -35,6 +36,7 @@ import org.apache.iceberg.io.OutputFileFactory;
import org.apache.iceberg.io.TaskWriter;
import org.apache.iceberg.io.UnpartitionedWriter;
+import java.util.Arrays;
import java.util.UUID;
/** OptimizingExecutor for iceberg format. */
@@ -95,6 +97,17 @@ public class IcebergRewriteExecutor extends
AbstractRewriteFilesExecutor {
}
}
+ @Override
+ protected long targetSize() {
+ long targetSize = super.targetSize();
+ long inputSize =
+
Arrays.stream(input.rewrittenDataFiles()).mapToLong(DataFile::fileSizeInBytes).sum();
+ // When the input files’ total size is below targetSize, remove the output
file size limit to
+ // avoid outputting multiple files.
+ // For more details, please refer to:
https://github.com/apache/amoro/issues/3645
+ return inputSize < targetSize ? Long.MAX_VALUE : targetSize;
+ }
+
private PartitionSpec fileSpec() {
return table.asUnkeyedTable().specs().get(input.allFiles()[0].specId());
}