This is an automated email from the ASF dual-hosted git repository.
nsivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new fedf24ffc933 Fix NPE in getInputFileSlices when RO path filter returns
empty partition (#18639)
fedf24ffc933 is described below
commit fedf24ffc933f7f2c962ae091420fddbb543b53b
Author: Prashant Wason <[email protected]>
AuthorDate: Fri Jun 12 16:34:13 2026 -0700
Fix NPE in getInputFileSlices when RO path filter returns empty partition
(#18639)
generatePartitionFileSlicesPostROTablePathFilter built its result map by
iterating over the file list, so a partition with no files produced no
entry.
The caller getInputFileSlices then did Collectors.toMap(identity,
cache::get)
where cache::get returned null for the missing partition, and
Collectors.toMap
rejects null values via Objects.requireNonNull.
Pre-populate the result map with empty file-slice lists for every input
partition before processing files. This restores the contract already
honored
by filterFiles (the non-RO path), which iterates over partitions and
naturally
returns an entry per partition.
Co-authored-by: Claude Opus 4.7 <[email protected]>
---
.../org/apache/hudi/BaseHoodieTableFileIndex.java | 11 +++-
.../apache/hudi/BaseHoodieTableFileIndexTest.java | 69 ++++++++++++++++++++++
2 files changed, 79 insertions(+), 1 deletion(-)
diff --git
a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
index da30e0dd923a..a6653ab88c36 100644
--- a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
+++ b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
@@ -314,6 +314,11 @@ public abstract class BaseHoodieTableFileIndex implements
AutoCloseable {
Map<String, PartitionPath> partitionsMap = new HashMap<>();
partitions.forEach(p -> partitionsMap.put(p.path, p));
Map<PartitionPath, List<FileSlice>> partitionToFileSlices = new
HashMap<>();
+ // Pre-populate so partitions with no files still appear in the result map.
+ // Without this, the caller's Collectors.toMap(identity, cache::get) NPEs
on empty partitions
+ // because cache.get returns null and toMap rejects null values. This
matches the contract
+ // already honored by filterFiles, which iterates over partitions rather
than over files.
+ partitions.forEach(p -> partitionToFileSlices.put(p,
Collections.emptyList()));
for (StoragePathInfo pathInfo : allFiles) {
// Create FileSlice obj from StoragePathInfo.
@@ -326,7 +331,11 @@ public abstract class BaseHoodieTableFileIndex implements
AutoCloseable {
// Add the FileSlice to partitionToFileSlices
PartitionPath partitionPathObj = partitionsMap.get(relPartitionPath);
if (partitionPathObj != null) {
- List<FileSlice> fileSlices =
partitionToFileSlices.computeIfAbsent(partitionPathObj, k -> new ArrayList<>());
+ List<FileSlice> fileSlices =
partitionToFileSlices.get(partitionPathObj);
+ if (fileSlices.isEmpty()) {
+ fileSlices = new ArrayList<>();
+ partitionToFileSlices.put(partitionPathObj, fileSlices);
+ }
fileSlices.add(fileSlice);
} else {
log.warn("Could not find partition path object for relative path: {}.
Skipping file: {}",
diff --git
a/hudi-common/src/test/java/org/apache/hudi/BaseHoodieTableFileIndexTest.java
b/hudi-common/src/test/java/org/apache/hudi/BaseHoodieTableFileIndexTest.java
index a6edb8c64a88..285e7698c5f5 100644
---
a/hudi-common/src/test/java/org/apache/hudi/BaseHoodieTableFileIndexTest.java
+++
b/hudi-common/src/test/java/org/apache/hudi/BaseHoodieTableFileIndexTest.java
@@ -18,15 +18,25 @@
package org.apache.hudi;
+import org.apache.hudi.BaseHoodieTableFileIndex.PartitionPath;
import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.storage.StoragePath;
+import org.apache.hudi.storage.StoragePathInfo;
import org.junit.jupiter.api.Test;
import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
public class BaseHoodieTableFileIndexTest {
@@ -58,4 +68,63 @@ public class BaseHoodieTableFileIndexTest {
assertEquals(true, result.isBloomFilterIndexEnabled(), "Bloom filter index
should be enabled");
assertEquals(true, result.isColumnStatsIndexEnabled(), "Column stats index
should be enabled");
}
+
+ /**
+ * Regression test for the empty-partition NPE that surfaces in {@code
getInputFileSlices}
+ * when the {@code
hoodie.datasource.read.file.index.list.file.statuses.using.ro.path.filter}
+ * code path is exercised on a COW (or READ_OPTIMIZED) table that contains a
partition
+ * holding zero base files.
+ *
+ * <p>Before the fix, {@link
BaseHoodieTableFileIndex#generatePartitionFileSlicesPostROTablePathFilter}
+ * built its result map by iterating over the file list, so a partition with
no files received
+ * no entry. The downstream {@code Collectors.toMap(identity, p ->
cache.get(p))} in
+ * {@code getInputFileSlices} then dereferenced a null value and threw NPE
inside
+ * {@code Collectors.uniqKeysMapAccumulator}.
+ *
+ * <p>After the fix, every input partition appears in the returned map (with
an empty list
+ * for empty partitions), preserving the contract already honored by the
non-RO path
+ * ({@code filterFiles}).
+ */
+ @Test
+ public void
testGeneratePartitionFileSlicesPostROTablePathFilterIncludesEmptyPartitions()
throws Exception {
+ BaseHoodieTableFileIndex fileIndex = mock(BaseHoodieTableFileIndex.class,
+ org.mockito.Mockito.CALLS_REAL_METHODS);
+
+ StoragePath basePath = new StoragePath("/tmp/hudi_empty_partition_test");
+ Field basePathField =
BaseHoodieTableFileIndex.class.getDeclaredField("basePath");
+ basePathField.setAccessible(true);
+ basePathField.set(fileIndex, basePath);
+
+ PartitionPath partitionWithFiles = new PartitionPath("dt=2026-01-01", new
Object[]{"2026-01-01"});
+ PartitionPath emptyPartition = new PartitionPath("dt=2026-01-02", new
Object[]{"2026-01-02"});
+ PartitionPath anotherEmpty = new PartitionPath("dt=2026-01-03", new
Object[]{"2026-01-03"});
+ List<PartitionPath> partitions = Arrays.asList(partitionWithFiles,
emptyPartition, anotherEmpty);
+
+ StoragePathInfo file = new StoragePathInfo(
+ new StoragePath(basePath,
"dt=2026-01-01/file-0_0-0-0_20260101000000001.parquet"),
+ 100L, false, (short) 1, 1024L, 0L);
+ List<StoragePathInfo> allFiles = Collections.singletonList(file);
+
+ Method generateMethod = BaseHoodieTableFileIndex.class.getDeclaredMethod(
+ "generatePartitionFileSlicesPostROTablePathFilter", List.class,
List.class);
+ generateMethod.setAccessible(true);
+ @SuppressWarnings("unchecked")
+ Map<PartitionPath, List<FileSlice>> result =
+ (Map<PartitionPath, List<FileSlice>>) generateMethod.invoke(fileIndex,
partitions, allFiles);
+
+ assertNotNull(result, "Result map must not be null");
+ assertEquals(3, result.size(),
+ "Result map must contain an entry for every input partition, including
empty ones");
+ assertTrue(result.containsKey(partitionWithFiles));
+ assertTrue(result.containsKey(emptyPartition),
+ "Empty partition must appear in the result so getInputFileSlices does
not NPE");
+ assertTrue(result.containsKey(anotherEmpty),
+ "Empty partition must appear in the result so getInputFileSlices does
not NPE");
+ assertEquals(1, result.get(partitionWithFiles).size(),
+ "Partition with files should retain its file slice");
+ assertTrue(result.get(emptyPartition).isEmpty(),
+ "Empty partition's file slice list must be present and empty (not
null, not missing)");
+ assertTrue(result.get(anotherEmpty).isEmpty(),
+ "Empty partition's file slice list must be present and empty (not
null, not missing)");
+ }
}
\ No newline at end of file