This is an automated email from the ASF dual-hosted git repository.

nsivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new fedf24ffc933 Fix NPE in getInputFileSlices when RO path filter returns 
empty partition (#18639)
fedf24ffc933 is described below

commit fedf24ffc933f7f2c962ae091420fddbb543b53b
Author: Prashant Wason <[email protected]>
AuthorDate: Fri Jun 12 16:34:13 2026 -0700

    Fix NPE in getInputFileSlices when RO path filter returns empty partition 
(#18639)
    
    generatePartitionFileSlicesPostROTablePathFilter built its result map by
    iterating over the file list, so a partition with no files produced no 
entry.
    The caller getInputFileSlices then did Collectors.toMap(identity, 
cache::get)
    where cache::get returned null for the missing partition, and 
Collectors.toMap
    rejects null values via Objects.requireNonNull.
    
    Pre-populate the result map with empty file-slice lists for every input
    partition before processing files. This restores the contract already 
honored
    by filterFiles (the non-RO path), which iterates over partitions and 
naturally
    returns an entry per partition.
    
    Co-authored-by: Claude Opus 4.7 <[email protected]>
---
 .../org/apache/hudi/BaseHoodieTableFileIndex.java  | 11 +++-
 .../apache/hudi/BaseHoodieTableFileIndexTest.java  | 69 ++++++++++++++++++++++
 2 files changed, 79 insertions(+), 1 deletion(-)

diff --git 
a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java 
b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
index da30e0dd923a..a6653ab88c36 100644
--- a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
+++ b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
@@ -314,6 +314,11 @@ public abstract class BaseHoodieTableFileIndex implements 
AutoCloseable {
     Map<String, PartitionPath> partitionsMap = new HashMap<>();
     partitions.forEach(p -> partitionsMap.put(p.path, p));
     Map<PartitionPath, List<FileSlice>> partitionToFileSlices = new 
HashMap<>();
+    // Pre-populate so partitions with no files still appear in the result map.
+    // Without this, the caller's Collectors.toMap(identity, cache::get) NPEs 
on empty partitions
+    // because cache.get returns null and toMap rejects null values. This 
matches the contract
+    // already honored by filterFiles, which iterates over partitions rather 
than over files.
+    partitions.forEach(p -> partitionToFileSlices.put(p, 
Collections.emptyList()));
 
     for (StoragePathInfo pathInfo : allFiles) {
       // Create FileSlice obj from StoragePathInfo.
@@ -326,7 +331,11 @@ public abstract class BaseHoodieTableFileIndex implements 
AutoCloseable {
       // Add the FileSlice to partitionToFileSlices
       PartitionPath partitionPathObj = partitionsMap.get(relPartitionPath);
       if (partitionPathObj != null) {
-        List<FileSlice> fileSlices = 
partitionToFileSlices.computeIfAbsent(partitionPathObj, k -> new ArrayList<>());
+        List<FileSlice> fileSlices = 
partitionToFileSlices.get(partitionPathObj);
+        if (fileSlices.isEmpty()) {
+          fileSlices = new ArrayList<>();
+          partitionToFileSlices.put(partitionPathObj, fileSlices);
+        }
         fileSlices.add(fileSlice);
       } else {
         log.warn("Could not find partition path object for relative path: {}. 
Skipping file: {}",
diff --git 
a/hudi-common/src/test/java/org/apache/hudi/BaseHoodieTableFileIndexTest.java 
b/hudi-common/src/test/java/org/apache/hudi/BaseHoodieTableFileIndexTest.java
index a6edb8c64a88..285e7698c5f5 100644
--- 
a/hudi-common/src/test/java/org/apache/hudi/BaseHoodieTableFileIndexTest.java
+++ 
b/hudi-common/src/test/java/org/apache/hudi/BaseHoodieTableFileIndexTest.java
@@ -18,15 +18,25 @@
 
 package org.apache.hudi;
 
+import org.apache.hudi.BaseHoodieTableFileIndex.PartitionPath;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.storage.StoragePath;
+import org.apache.hudi.storage.StoragePathInfo;
 
 import org.junit.jupiter.api.Test;
 
 import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.mockito.Mockito.mock;
 
 public class BaseHoodieTableFileIndexTest {
@@ -58,4 +68,63 @@ public class BaseHoodieTableFileIndexTest {
     assertEquals(true, result.isBloomFilterIndexEnabled(), "Bloom filter index 
should be enabled");
     assertEquals(true, result.isColumnStatsIndexEnabled(), "Column stats index 
should be enabled");
   }
+
+  /**
+   * Regression test for the empty-partition NPE that surfaces in {@code 
getInputFileSlices}
+   * when the {@code 
hoodie.datasource.read.file.index.list.file.statuses.using.ro.path.filter}
+   * code path is exercised on a COW (or READ_OPTIMIZED) table that contains a 
partition
+   * holding zero base files.
+   *
+   * <p>Before the fix, {@link 
BaseHoodieTableFileIndex#generatePartitionFileSlicesPostROTablePathFilter}
+   * built its result map by iterating over the file list, so a partition with 
no files received
+   * no entry. The downstream {@code Collectors.toMap(identity, p -> 
cache.get(p))} in
+   * {@code getInputFileSlices} then dereferenced a null value and threw NPE 
inside
+   * {@code Collectors.uniqKeysMapAccumulator}.
+   *
+   * <p>After the fix, every input partition appears in the returned map (with 
an empty list
+   * for empty partitions), preserving the contract already honored by the 
non-RO path
+   * ({@code filterFiles}).
+   */
+  @Test
+  public void 
testGeneratePartitionFileSlicesPostROTablePathFilterIncludesEmptyPartitions() 
throws Exception {
+    BaseHoodieTableFileIndex fileIndex = mock(BaseHoodieTableFileIndex.class,
+        org.mockito.Mockito.CALLS_REAL_METHODS);
+
+    StoragePath basePath = new StoragePath("/tmp/hudi_empty_partition_test");
+    Field basePathField = 
BaseHoodieTableFileIndex.class.getDeclaredField("basePath");
+    basePathField.setAccessible(true);
+    basePathField.set(fileIndex, basePath);
+
+    PartitionPath partitionWithFiles = new PartitionPath("dt=2026-01-01", new 
Object[]{"2026-01-01"});
+    PartitionPath emptyPartition = new PartitionPath("dt=2026-01-02", new 
Object[]{"2026-01-02"});
+    PartitionPath anotherEmpty = new PartitionPath("dt=2026-01-03", new 
Object[]{"2026-01-03"});
+    List<PartitionPath> partitions = Arrays.asList(partitionWithFiles, 
emptyPartition, anotherEmpty);
+
+    StoragePathInfo file = new StoragePathInfo(
+        new StoragePath(basePath, 
"dt=2026-01-01/file-0_0-0-0_20260101000000001.parquet"),
+        100L, false, (short) 1, 1024L, 0L);
+    List<StoragePathInfo> allFiles = Collections.singletonList(file);
+
+    Method generateMethod = BaseHoodieTableFileIndex.class.getDeclaredMethod(
+        "generatePartitionFileSlicesPostROTablePathFilter", List.class, 
List.class);
+    generateMethod.setAccessible(true);
+    @SuppressWarnings("unchecked")
+    Map<PartitionPath, List<FileSlice>> result =
+        (Map<PartitionPath, List<FileSlice>>) generateMethod.invoke(fileIndex, 
partitions, allFiles);
+
+    assertNotNull(result, "Result map must not be null");
+    assertEquals(3, result.size(),
+        "Result map must contain an entry for every input partition, including 
empty ones");
+    assertTrue(result.containsKey(partitionWithFiles));
+    assertTrue(result.containsKey(emptyPartition),
+        "Empty partition must appear in the result so getInputFileSlices does 
not NPE");
+    assertTrue(result.containsKey(anotherEmpty),
+        "Empty partition must appear in the result so getInputFileSlices does 
not NPE");
+    assertEquals(1, result.get(partitionWithFiles).size(),
+        "Partition with files should retain its file slice");
+    assertTrue(result.get(emptyPartition).isEmpty(),
+        "Empty partition's file slice list must be present and empty (not 
null, not missing)");
+    assertTrue(result.get(anotherEmpty).isEmpty(),
+        "Empty partition's file slice list must be present and empty (not 
null, not missing)");
+  }
 }
\ No newline at end of file

Reply via email to