This is an automated email from the ASF dual-hosted git repository.
nsivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 652d95253648 fix: RLI bootstrap fails due to NPE with cleaner table
service (#18836)
652d95253648 is described below
commit 652d95253648f1d7b5437e803a405cfa9d1f6f86
Author: Lokesh Jain <[email protected]>
AuthorDate: Tue May 26 11:27:06 2026 +0530
fix: RLI bootstrap fails due to NPE with cleaner table service (#18836)
Co-authored-by: Lokesh Jain <[email protected]>
---
.../metadata/HoodieBackedTableMetadataWriter.java | 24 +++++++++++-
.../TestHoodieBackedTableMetadataWriter.java | 45 ++++++++++++++++++++++
2 files changed, 68 insertions(+), 1 deletion(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
index 5d09762909be..7035c40c9b42 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
@@ -58,6 +58,7 @@ import org.apache.hudi.common.schema.HoodieSchema;
import org.apache.hudi.common.schema.HoodieSchemaCache;
import org.apache.hudi.common.schema.HoodieSchemaUtils;
import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.block.HoodieDeleteBlock;
import
org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
@@ -934,6 +935,27 @@ public abstract class HoodieBackedTableMetadataWriter<I,
O> implements HoodieTab
.sum();
}
+ /**
+ * Resolves the data schema (with metadata fields added) for use during
record index bootstrap.
+ * When the write config does not carry a schema (e.g. table-service
operations such as clean),
+ * falls back to resolving the schema from the table's commit history / data
files.
+ */
+ static HoodieSchema resolveDataSchemaForRLIBootstrap(HoodieTableMetaClient
metaClient, HoodieWriteConfig dataWriteConfig) {
+ String writeSchemaStr = dataWriteConfig.getWriteSchema();
+ HoodieSchema rawSchema;
+ if (writeSchemaStr != null) {
+ rawSchema = HoodieSchema.parse(writeSchemaStr);
+ } else {
+ try {
+ rawSchema = new TableSchemaResolver(metaClient).getTableSchema(false);
+ } catch (Exception e) {
+ throw new HoodieException(
+ String.format("Could not resolve schema for table %s for record
index bootstrap", metaClient.getBasePath()), e);
+ }
+ }
+ return
HoodieSchemaCache.intern(HoodieSchemaUtils.addMetadataFields(rawSchema,
dataWriteConfig.allowOperationMetadataField()));
+ }
+
/**
* Fetch record locations from FileSlice snapshot.
*
@@ -971,7 +993,7 @@ public abstract class HoodieBackedTableMetadataWriter<I, O>
implements HoodieTab
final FileSlice fileSlice = partitionAndFileSlice.getValue();
final String fileId = fileSlice.getFileId();
HoodieReaderContext<T> readerContext = readerContextFactory.getContext();
- HoodieSchema dataSchema =
HoodieSchemaCache.intern(HoodieSchemaUtils.addMetadataFields(HoodieSchema.parse(dataWriteConfig.getWriteSchema()),
dataWriteConfig.allowOperationMetadataField()));
+ HoodieSchema dataSchema = resolveDataSchemaForRLIBootstrap(metaClient,
dataWriteConfig);
HoodieSchema requestedSchema =
metaClient.getTableConfig().populateMetaFields() ? getRecordKeySchema()
: HoodieSchemaUtils.projectSchema(dataSchema,
Arrays.asList(metaClient.getTableConfig().getRecordKeyFields().orElse(new
String[0])));
Option<InternalSchema> internalSchemaOption =
SerDeHelper.fromJson(dataWriteConfig.getInternalSchema());
diff --git
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java
index 4065f113d518..7fcc2ce9a5d8 100644
---
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java
+++
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java
@@ -25,7 +25,9 @@ import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.schema.HoodieSchema;
import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -54,6 +56,7 @@ import java.util.stream.Stream;
import static
org.apache.hudi.common.testutils.HoodieTestUtils.INSTANT_GENERATOR;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -64,6 +67,7 @@ import static org.mockito.Mockito.RETURNS_DEEP_STUBS;
import static org.mockito.Mockito.doCallRealMethod;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockConstruction;
import static org.mockito.Mockito.mockStatic;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
@@ -377,6 +381,47 @@ class TestHoodieBackedTableMetadataWriter {
assertDoesNotThrow(() -> validateRollbackMethod.invoke(writer,
instantToRollback));
}
+ // ---- resolveDataSchemaForRLIBootstrap tests ----
+
+ private static final String SIMPLE_SCHEMA_JSON =
+ "{\"type\":\"record\",\"name\":\"Test\",\"namespace\":\"test\","
+ + "\"fields\":[{\"name\":\"id\",\"type\":\"string\"}]}";
+
+ @Test
+ void resolveDataSchemaForRLIBootstrap_usesConfigSchemaWhenPresent() {
+ HoodieTableMetaClient metaClient = mock(HoodieTableMetaClient.class);
+ HoodieWriteConfig writeConfig = mock(HoodieWriteConfig.class);
+ when(writeConfig.getWriteSchema()).thenReturn(SIMPLE_SCHEMA_JSON);
+ when(writeConfig.allowOperationMetadataField()).thenReturn(false);
+
+ HoodieSchema result =
HoodieBackedTableMetadataWriter.resolveDataSchemaForRLIBootstrap(metaClient,
writeConfig);
+
+ assertNotNull(result);
+ // metadata fields (_hoodie_*) should have been prepended
+ assertTrue(result.getFields().stream().anyMatch(f ->
f.name().startsWith("_hoodie_")));
+ }
+
+ @Test
+ void
resolveDataSchemaForRLIBootstrap_fallsBackToTableSchemaResolverWhenNull() {
+ HoodieTableMetaClient metaClient = mock(HoodieTableMetaClient.class);
+ HoodieWriteConfig writeConfig = mock(HoodieWriteConfig.class);
+ when(writeConfig.getWriteSchema()).thenReturn(null);
+ when(writeConfig.allowOperationMetadataField()).thenReturn(false);
+
+ HoodieSchema tableSchema = HoodieSchema.parse(SIMPLE_SCHEMA_JSON);
+ try (org.mockito.MockedConstruction<TableSchemaResolver> mockedResolver =
+ mockConstruction(TableSchemaResolver.class,
+ (resolver, ctx) ->
when(resolver.getTableSchema(false)).thenReturn(tableSchema))) {
+
+ HoodieSchema result =
HoodieBackedTableMetadataWriter.resolveDataSchemaForRLIBootstrap(metaClient,
writeConfig);
+
+ assertNotNull(result);
+ assertTrue(result.getFields().stream().anyMatch(f ->
f.name().startsWith("_hoodie_")));
+ // exactly one TableSchemaResolver was constructed (with metaClient)
+ assertEquals(1, mockedResolver.constructed().size());
+ }
+ }
+
@SuppressWarnings("deprecation")
private HoodieActiveTimeline createMockTimeline(List<HoodieInstant>
instants) {
ActiveTimelineV2 timeline = new ActiveTimelineV2();