This is an automated email from the ASF dual-hosted git repository.

forwardxu pushed a commit to branch release-0.12.1
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 3c364bdf721651ed20980c30ee9b521e3535286e
Author: xiarixiaoyao <mengtao0...@qq.com>
AuthorDate: Wed Sep 28 15:05:26 2022 +0800

    [MINOR] add integrity check of merged parquet file for HoodieMergeHandle.
---
 .../src/main/java/org/apache/hudi/io/HoodieMergeHandle.java    | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
index e629c6a51e..88db25bac4 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
@@ -34,6 +34,7 @@ import org.apache.hudi.common.model.IOType;
 import org.apache.hudi.common.util.DefaultSizeEstimator;
 import org.apache.hudi.common.util.HoodieRecordSizeEstimator;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ParquetUtils;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.ExternalSpillableMap;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -65,6 +66,8 @@ import java.util.NoSuchElementException;
 import java.util.Map;
 import java.util.Set;
 
+import static org.apache.hudi.common.model.HoodieFileFormat.PARQUET;
+
 @SuppressWarnings("Duplicates")
 /**
  * Handle to merge incoming records to those in storage.
@@ -447,6 +450,13 @@ public class HoodieMergeHandle<T extends 
HoodieRecordPayload, I, K, O> extends H
       return;
     }
 
+    // Fast verify the integrity of the parquet file.
+    // only check the readable of parquet metadata.
+    final String extension = FSUtils.getFileExtension(newFilePath.toString());
+    if (PARQUET.getFileExtension().equals(extension)) {
+      new ParquetUtils().readMetadata(hoodieTable.getHadoopConf(), 
newFilePath);
+    }
+
     long oldNumWrites = 0;
     try {
       HoodieFileReader reader = 
HoodieFileReaderFactory.getFileReader(hoodieTable.getHadoopConf(), oldFilePath);

Reply via email to