This is an automated email from the ASF dual-hosted git repository. satish pushed a commit to branch release-0.12.2 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit b975295ca26360eb942b90ee9127082807059df4 Author: RexAn <bonean...@gmail.com> AuthorDate: Tue Nov 29 20:51:07 2022 +0800 [HUDI-5253] HoodieMergeOnReadTableInputFormat could have duplicate records issue if it contains delta files while still splittable (#7264) --- .../org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java | 2 +- .../realtime/TestHoodieMergeOnReadTableInputFormat.java | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java index 1f1dd1b9274..e88a1eecfe3 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java @@ -89,7 +89,7 @@ public class HoodieRealtimePath extends Path { } public boolean isSplitable() { - return !toString().isEmpty() && !includeBootstrapFilePath(); + return !toString().isEmpty() && !toString().contains(".log") && deltaLogFiles.isEmpty() && !includeBootstrapFilePath(); } public PathWithBootstrapFileStatus getPathWithBootstrapFileStatus() { diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadTableInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadTableInputFormat.java index d44f5fbf635..6a5404762a9 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadTableInputFormat.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadTableInputFormat.java @@ -19,6 +19,7 @@ package org.apache.hudi.hadoop.realtime; +import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.util.Option; import org.apache.hudi.hadoop.PathWithBootstrapFileStatus; @@ -65,4 +66,16 @@ public class TestHoodieMergeOnReadTableInputFormat { rtPath.setPathWithBootstrapFileStatus(path); assertFalse(new HoodieMergeOnReadTableInputFormat().isSplitable(fs, rtPath), "Path for bootstrap should not be splitable."); } + + @Test + void pathNotSplitableIfContainsDeltaFiles() throws IOException { + URI basePath = Files.createTempFile(tempDir, "target", ".parquet").toUri(); + HoodieRealtimePath rtPath = new HoodieRealtimePath(new Path("foo"), "bar", basePath.toString(), Collections.emptyList(), "000", false, Option.empty()); + assertTrue(new HoodieMergeOnReadTableInputFormat().isSplitable(fs, rtPath), "Path only contains the base file should be splittable"); + + URI logPath = Files.createTempFile(tempDir, ".test", ".log.4_1-149-180").toUri(); + HoodieLogFile logFile = new HoodieLogFile(fs.getFileStatus(new Path(logPath))); + rtPath = new HoodieRealtimePath(new Path("foo"), "bar", basePath.toString(), Collections.singletonList(logFile), "000", false, Option.empty()); + assertFalse(new HoodieMergeOnReadTableInputFormat().isSplitable(fs, rtPath), "Path contains log files should not be splittable."); + } }