This is an automated email from the ASF dual-hosted git repository. forwardxu pushed a commit to branch release-0.12.1 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 700717c7344128f60a42795006b12daa063fc6d1 Author: zhuanshenbsj1 <34104400+zhuanshenb...@users.noreply.github.com> AuthorDate: Tue Jan 24 13:14:45 2023 +0800 [HUDI-5235] Clustering target size should larger than small file limit (#7232) * target size should larger than small file limit (cherry picked from commit 4f6b831ea11d8a99e828b4cb37dd770f8075dd43) --- .../java/org/apache/hudi/sink/clustering/ClusteringOperator.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java index 308679e78a6..a414729724e 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java @@ -132,6 +132,12 @@ public class ClusteringOperator extends TableStreamOperator<ClusteringCommitEven // override max parquet file size in conf this.conf.setLong(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.key(), this.conf.getLong(FlinkOptions.CLUSTERING_PLAN_STRATEGY_TARGET_FILE_MAX_BYTES)); + + // target size should larger than small file limit + this.conf.setLong(FlinkOptions.CLUSTERING_PLAN_STRATEGY_SMALL_FILE_LIMIT.key(), + this.conf.getLong(FlinkOptions.CLUSTERING_PLAN_STRATEGY_TARGET_FILE_MAX_BYTES) > this.conf.getLong(FlinkOptions.CLUSTERING_PLAN_STRATEGY_SMALL_FILE_LIMIT) + ? this.conf.getLong(FlinkOptions.CLUSTERING_PLAN_STRATEGY_SMALL_FILE_LIMIT) + : this.conf.getLong(FlinkOptions.CLUSTERING_PLAN_STRATEGY_TARGET_FILE_MAX_BYTES)); } @Override