This is an automated email from the ASF dual-hosted git repository.

nsivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 652d95253648 fix: RLI bootstrap fails due to NPE with cleaner table 
service (#18836)
652d95253648 is described below

commit 652d95253648f1d7b5437e803a405cfa9d1f6f86
Author: Lokesh Jain <[email protected]>
AuthorDate: Tue May 26 11:27:06 2026 +0530

    fix: RLI bootstrap fails due to NPE with cleaner table service (#18836)
    
    Co-authored-by: Lokesh Jain <[email protected]>
---
 .../metadata/HoodieBackedTableMetadataWriter.java  | 24 +++++++++++-
 .../TestHoodieBackedTableMetadataWriter.java       | 45 ++++++++++++++++++++++
 2 files changed, 68 insertions(+), 1 deletion(-)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
index 5d09762909be..7035c40c9b42 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
@@ -58,6 +58,7 @@ import org.apache.hudi.common.schema.HoodieSchema;
 import org.apache.hudi.common.schema.HoodieSchemaCache;
 import org.apache.hudi.common.schema.HoodieSchemaUtils;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.table.log.HoodieLogFormat;
 import org.apache.hudi.common.table.log.block.HoodieDeleteBlock;
 import 
org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
@@ -934,6 +935,27 @@ public abstract class HoodieBackedTableMetadataWriter<I, 
O> implements HoodieTab
         .sum();
   }
 
+  /**
+   * Resolves the data schema (with metadata fields added) for use during 
record index bootstrap.
+   * When the write config does not carry a schema (e.g. table-service 
operations such as clean),
+   * falls back to resolving the schema from the table's commit history / data 
files.
+   */
+  static HoodieSchema resolveDataSchemaForRLIBootstrap(HoodieTableMetaClient 
metaClient, HoodieWriteConfig dataWriteConfig) {
+    String writeSchemaStr = dataWriteConfig.getWriteSchema();
+    HoodieSchema rawSchema;
+    if (writeSchemaStr != null) {
+      rawSchema = HoodieSchema.parse(writeSchemaStr);
+    } else {
+      try {
+        rawSchema = new TableSchemaResolver(metaClient).getTableSchema(false);
+      } catch (Exception e) {
+        throw new HoodieException(
+            String.format("Could not resolve schema for table %s for record 
index bootstrap", metaClient.getBasePath()), e);
+      }
+    }
+    return 
HoodieSchemaCache.intern(HoodieSchemaUtils.addMetadataFields(rawSchema, 
dataWriteConfig.allowOperationMetadataField()));
+  }
+
   /**
    * Fetch record locations from FileSlice snapshot.
    *
@@ -971,7 +993,7 @@ public abstract class HoodieBackedTableMetadataWriter<I, O> 
implements HoodieTab
       final FileSlice fileSlice = partitionAndFileSlice.getValue();
       final String fileId = fileSlice.getFileId();
       HoodieReaderContext<T> readerContext = readerContextFactory.getContext();
-      HoodieSchema dataSchema = 
HoodieSchemaCache.intern(HoodieSchemaUtils.addMetadataFields(HoodieSchema.parse(dataWriteConfig.getWriteSchema()),
 dataWriteConfig.allowOperationMetadataField()));
+      HoodieSchema dataSchema = resolveDataSchemaForRLIBootstrap(metaClient, 
dataWriteConfig);
       HoodieSchema requestedSchema = 
metaClient.getTableConfig().populateMetaFields() ? getRecordKeySchema()
           : HoodieSchemaUtils.projectSchema(dataSchema, 
Arrays.asList(metaClient.getTableConfig().getRecordKeyFields().orElse(new 
String[0])));
       Option<InternalSchema> internalSchemaOption = 
SerDeHelper.fromJson(dataWriteConfig.getInternalSchema());
diff --git 
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java
 
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java
index 4065f113d518..7fcc2ce9a5d8 100644
--- 
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java
+++ 
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java
@@ -25,7 +25,9 @@ import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.schema.HoodieSchema;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -54,6 +56,7 @@ import java.util.stream.Stream;
 import static 
org.apache.hudi.common.testutils.HoodieTestUtils.INSTANT_GENERATOR;
 import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertSame;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -64,6 +67,7 @@ import static org.mockito.Mockito.RETURNS_DEEP_STUBS;
 import static org.mockito.Mockito.doCallRealMethod;
 import static org.mockito.Mockito.doThrow;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockConstruction;
 import static org.mockito.Mockito.mockStatic;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
@@ -377,6 +381,47 @@ class TestHoodieBackedTableMetadataWriter {
     assertDoesNotThrow(() -> validateRollbackMethod.invoke(writer, 
instantToRollback));
   }
 
+  // ---- resolveDataSchemaForRLIBootstrap tests ----
+
+  private static final String SIMPLE_SCHEMA_JSON =
+      "{\"type\":\"record\",\"name\":\"Test\",\"namespace\":\"test\","
+          + "\"fields\":[{\"name\":\"id\",\"type\":\"string\"}]}";
+
+  @Test
+  void resolveDataSchemaForRLIBootstrap_usesConfigSchemaWhenPresent() {
+    HoodieTableMetaClient metaClient = mock(HoodieTableMetaClient.class);
+    HoodieWriteConfig writeConfig = mock(HoodieWriteConfig.class);
+    when(writeConfig.getWriteSchema()).thenReturn(SIMPLE_SCHEMA_JSON);
+    when(writeConfig.allowOperationMetadataField()).thenReturn(false);
+
+    HoodieSchema result = 
HoodieBackedTableMetadataWriter.resolveDataSchemaForRLIBootstrap(metaClient, 
writeConfig);
+
+    assertNotNull(result);
+    // metadata fields (_hoodie_*) should have been prepended
+    assertTrue(result.getFields().stream().anyMatch(f -> 
f.name().startsWith("_hoodie_")));
+  }
+
+  @Test
+  void 
resolveDataSchemaForRLIBootstrap_fallsBackToTableSchemaResolverWhenNull() {
+    HoodieTableMetaClient metaClient = mock(HoodieTableMetaClient.class);
+    HoodieWriteConfig writeConfig = mock(HoodieWriteConfig.class);
+    when(writeConfig.getWriteSchema()).thenReturn(null);
+    when(writeConfig.allowOperationMetadataField()).thenReturn(false);
+
+    HoodieSchema tableSchema = HoodieSchema.parse(SIMPLE_SCHEMA_JSON);
+    try (org.mockito.MockedConstruction<TableSchemaResolver> mockedResolver =
+        mockConstruction(TableSchemaResolver.class,
+            (resolver, ctx) -> 
when(resolver.getTableSchema(false)).thenReturn(tableSchema))) {
+
+      HoodieSchema result = 
HoodieBackedTableMetadataWriter.resolveDataSchemaForRLIBootstrap(metaClient, 
writeConfig);
+
+      assertNotNull(result);
+      assertTrue(result.getFields().stream().anyMatch(f -> 
f.name().startsWith("_hoodie_")));
+      // exactly one TableSchemaResolver was constructed (with metaClient)
+      assertEquals(1, mockedResolver.constructed().size());
+    }
+  }
+
   @SuppressWarnings("deprecation")
   private HoodieActiveTimeline createMockTimeline(List<HoodieInstant> 
instants) {
     ActiveTimelineV2 timeline = new ActiveTimelineV2();

Reply via email to