This is an automated email from the ASF dual-hosted git repository.

forwardxu pushed a commit to branch release-0.12.1
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 700717c7344128f60a42795006b12daa063fc6d1
Author: zhuanshenbsj1 <34104400+zhuanshenb...@users.noreply.github.com>
AuthorDate: Tue Jan 24 13:14:45 2023 +0800

    [HUDI-5235] Clustering target size should larger than small file limit 
(#7232)
    
    * target size should larger than small file limit
    
    (cherry picked from commit 4f6b831ea11d8a99e828b4cb37dd770f8075dd43)
---
 .../java/org/apache/hudi/sink/clustering/ClusteringOperator.java    | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git 
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java
 
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java
index 308679e78a6..a414729724e 100644
--- 
a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java
+++ 
b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java
@@ -132,6 +132,12 @@ public class ClusteringOperator extends 
TableStreamOperator<ClusteringCommitEven
     // override max parquet file size in conf
     this.conf.setLong(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.key(),
         
this.conf.getLong(FlinkOptions.CLUSTERING_PLAN_STRATEGY_TARGET_FILE_MAX_BYTES));
+
+    // target size should larger than small file limit
+    
this.conf.setLong(FlinkOptions.CLUSTERING_PLAN_STRATEGY_SMALL_FILE_LIMIT.key(),
+        
this.conf.getLong(FlinkOptions.CLUSTERING_PLAN_STRATEGY_TARGET_FILE_MAX_BYTES) 
> this.conf.getLong(FlinkOptions.CLUSTERING_PLAN_STRATEGY_SMALL_FILE_LIMIT)
+          ? 
this.conf.getLong(FlinkOptions.CLUSTERING_PLAN_STRATEGY_SMALL_FILE_LIMIT)
+            : 
this.conf.getLong(FlinkOptions.CLUSTERING_PLAN_STRATEGY_TARGET_FILE_MAX_BYTES));
   }
 
   @Override

Reply via email to