This is an automated email from the ASF dual-hosted git repository.

vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 650c445  [HUDI-2122] Improvement in packaging insert into smallfiles 
(#3213)
650c445 is described below

commit 650c4455c600b0346fed8b5b6aa4cc0bf3452e8c
Author: wangxianghu <wangxian...@apache.org>
AuthorDate: Tue Jul 6 00:30:57 2021 +0800

    [HUDI-2122] Improvement in packaging insert into smallfiles (#3213)
---
 .../java/org/apache/hudi/table/action/commit/UpsertPartitioner.java | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git 
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java
 
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java
index 9d60cde..3ac8151 100644
--- 
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java
+++ 
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java
@@ -187,7 +187,7 @@ public class UpsertPartitioner<T extends 
HoodieRecordPayload<T>> extends Partiti
         for (SmallFile smallFile : smallFiles) {
           long recordsToAppend = Math.min((config.getParquetMaxFileSize() - 
smallFile.sizeBytes) / averageRecordSize,
               totalUnassignedInserts);
-          if (recordsToAppend > 0 && totalUnassignedInserts > 0) {
+          if (recordsToAppend > 0) {
             // create a new bucket or re-use an existing bucket
             int bucket;
             if 
(updateLocationToBucket.containsKey(smallFile.location.getFileId())) {
@@ -200,6 +200,10 @@ public class UpsertPartitioner<T extends 
HoodieRecordPayload<T>> extends Partiti
             bucketNumbers.add(bucket);
             recordsPerBucket.add(recordsToAppend);
             totalUnassignedInserts -= recordsToAppend;
+            if (totalUnassignedInserts <= 0) {
+              // stop the loop when all the inserts are assigned
+              break;
+            }
           }
         }
 

Reply via email to