yihua commented on code in PR #7359: URL: https://github.com/apache/hudi/pull/7359#discussion_r1275296193
########## hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaWriteHelper.java: ########## @@ -69,17 +70,26 @@ public List<HoodieRecord<T>> deduplicateRecords( }).collect(Collectors.groupingBy(Pair::getLeft)); final Schema schema = new Schema.Parser().parse(schemaStr); - return keyedRecords.values().stream().map(x -> x.stream().map(Pair::getRight).reduce((rec1, rec2) -> { - HoodieRecord<T> reducedRecord; - try { - reducedRecord = merger.merge(rec1, schema, rec2, schema, props).get().getLeft(); - } catch (IOException e) { - throw new HoodieException(String.format("Error to merge two records, %s, %s", rec1, rec2), e); + boolean sortBeforePrecombine = merger.useSortedMerge(props); + return keyedRecords.values().stream().map(hoodieRecords -> { + Stream<HoodieRecord<T>> recordStream; + if (sortBeforePrecombine) { + recordStream = hoodieRecords.stream().map(Pair::getRight).sorted(new SortedPrecombineComparator(schema, props)); Review Comment: same question on the sorting here. ########## hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java: ########## @@ -99,24 +100,34 @@ public List<HoodieRecord<T>> deduplicateRecords( // caution that the avro schema is not serializable final Schema schema = new Schema.Parser().parse(schemaStr); - return keyedRecords.values().stream().map(x -> x.stream().reduce((rec1, rec2) -> { - HoodieRecord<T> reducedRecord; - try { - // Precombine do not need schema and do not return null - reducedRecord = merger.merge(rec1, schema, rec2, schema, props).get().getLeft(); - } catch (IOException e) { - throw new HoodieException(String.format("Error to merge two records, %s, %s", rec1, rec2), e); + boolean sortBeforePrecombine = merger.useSortedMerge(props); + return keyedRecords.values().stream().map(hoodieRecords -> { + Stream<HoodieRecord<T>> recordStream; + if (sortBeforePrecombine) { + recordStream = hoodieRecords.stream().sorted(new SortedPrecombineComparator(schema, props)); Review Comment: Why is the sorting required here? The `reduce` happens afterwards regardless. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org