This is an automated email from the ASF dual-hosted git repository. forwardxu pushed a commit to branch release-0.12.1 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 3c364bdf721651ed20980c30ee9b521e3535286e Author: xiarixiaoyao <mengtao0...@qq.com> AuthorDate: Wed Sep 28 15:05:26 2022 +0800 [MINOR] add integrity check of merged parquet file for HoodieMergeHandle. --- .../src/main/java/org/apache/hudi/io/HoodieMergeHandle.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java index e629c6a51e..88db25bac4 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java @@ -34,6 +34,7 @@ import org.apache.hudi.common.model.IOType; import org.apache.hudi.common.util.DefaultSizeEstimator; import org.apache.hudi.common.util.HoodieRecordSizeEstimator; import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.ParquetUtils; import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.common.util.collection.ExternalSpillableMap; import org.apache.hudi.config.HoodieWriteConfig; @@ -65,6 +66,8 @@ import java.util.NoSuchElementException; import java.util.Map; import java.util.Set; +import static org.apache.hudi.common.model.HoodieFileFormat.PARQUET; + @SuppressWarnings("Duplicates") /** * Handle to merge incoming records to those in storage. @@ -447,6 +450,13 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H return; } + // Fast verify the integrity of the parquet file. + // only check the readable of parquet metadata. + final String extension = FSUtils.getFileExtension(newFilePath.toString()); + if (PARQUET.getFileExtension().equals(extension)) { + new ParquetUtils().readMetadata(hoodieTable.getHadoopConf(), newFilePath); + } + long oldNumWrites = 0; try { HoodieFileReader reader = HoodieFileReaderFactory.getFileReader(hoodieTable.getHadoopConf(), oldFilePath);