This is an automated email from the ASF dual-hosted git repository. vinoth pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push: new 650c445 [HUDI-2122] Improvement in packaging insert into smallfiles (#3213) 650c445 is described below commit 650c4455c600b0346fed8b5b6aa4cc0bf3452e8c Author: wangxianghu <wangxian...@apache.org> AuthorDate: Tue Jul 6 00:30:57 2021 +0800 [HUDI-2122] Improvement in packaging insert into smallfiles (#3213) --- .../java/org/apache/hudi/table/action/commit/UpsertPartitioner.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java index 9d60cde..3ac8151 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java @@ -187,7 +187,7 @@ public class UpsertPartitioner<T extends HoodieRecordPayload<T>> extends Partiti for (SmallFile smallFile : smallFiles) { long recordsToAppend = Math.min((config.getParquetMaxFileSize() - smallFile.sizeBytes) / averageRecordSize, totalUnassignedInserts); - if (recordsToAppend > 0 && totalUnassignedInserts > 0) { + if (recordsToAppend > 0) { // create a new bucket or re-use an existing bucket int bucket; if (updateLocationToBucket.containsKey(smallFile.location.getFileId())) { @@ -200,6 +200,10 @@ public class UpsertPartitioner<T extends HoodieRecordPayload<T>> extends Partiti bucketNumbers.add(bucket); recordsPerBucket.add(recordsToAppend); totalUnassignedInserts -= recordsToAppend; + if (totalUnassignedInserts <= 0) { + // stop the loop when all the inserts are assigned + break; + } } }