This is an automated email from the ASF dual-hosted git repository.

satish pushed a commit to branch release-0.12.2
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit b975295ca26360eb942b90ee9127082807059df4
Author: RexAn <bonean...@gmail.com>
AuthorDate: Tue Nov 29 20:51:07 2022 +0800

    [HUDI-5253] HoodieMergeOnReadTableInputFormat could have duplicate records 
issue if it contains delta files while still splittable (#7264)
---
 .../org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java |  2 +-
 .../realtime/TestHoodieMergeOnReadTableInputFormat.java     | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git 
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java
 
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java
index 1f1dd1b9274..e88a1eecfe3 100644
--- 
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java
+++ 
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java
@@ -89,7 +89,7 @@ public class HoodieRealtimePath extends Path {
   }
 
   public boolean isSplitable() {
-    return !toString().isEmpty() && !includeBootstrapFilePath();
+    return !toString().isEmpty() && !toString().contains(".log") && 
deltaLogFiles.isEmpty() && !includeBootstrapFilePath();
   }
 
   public PathWithBootstrapFileStatus getPathWithBootstrapFileStatus() {
diff --git 
a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadTableInputFormat.java
 
b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadTableInputFormat.java
index d44f5fbf635..6a5404762a9 100644
--- 
a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadTableInputFormat.java
+++ 
b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadTableInputFormat.java
@@ -19,6 +19,7 @@
 
 package org.apache.hudi.hadoop.realtime;
 
+import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.hadoop.PathWithBootstrapFileStatus;
 
@@ -65,4 +66,16 @@ public class TestHoodieMergeOnReadTableInputFormat {
     rtPath.setPathWithBootstrapFileStatus(path);
     assertFalse(new HoodieMergeOnReadTableInputFormat().isSplitable(fs, 
rtPath), "Path for bootstrap should not be splitable.");
   }
+
+  @Test
+  void pathNotSplitableIfContainsDeltaFiles() throws IOException {
+    URI basePath = Files.createTempFile(tempDir, "target", ".parquet").toUri();
+    HoodieRealtimePath rtPath = new HoodieRealtimePath(new Path("foo"), "bar", 
basePath.toString(), Collections.emptyList(), "000", false, Option.empty());
+    assertTrue(new HoodieMergeOnReadTableInputFormat().isSplitable(fs, 
rtPath), "Path only contains the base file should be splittable");
+
+    URI logPath = Files.createTempFile(tempDir, ".test", 
".log.4_1-149-180").toUri();
+    HoodieLogFile logFile = new HoodieLogFile(fs.getFileStatus(new 
Path(logPath)));
+    rtPath = new HoodieRealtimePath(new Path("foo"), "bar", 
basePath.toString(), Collections.singletonList(logFile), "000", false, 
Option.empty());
+    assertFalse(new HoodieMergeOnReadTableInputFormat().isSplitable(fs, 
rtPath), "Path contains log files should not be splittable.");
+  }
 }

Reply via email to