This is an automated email from the ASF dual-hosted git repository.
nsivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new cde86ffd64a4 feat: Add HUDI version and engine properties to commit
metadata (#18183)
cde86ffd64a4 is described below
commit cde86ffd64a4aadb55ec049940713d199d92eb03
Author: Prashant Wason <[email protected]>
AuthorDate: Fri Jun 12 16:14:30 2026 -0700
feat: Add HUDI version and engine properties to commit metadata (#18183)
Add hudi.version and engine (SPARK/FLINK/JAVA) to every commit's
extraMetadata
so post-hoc debugging can answer "which writer wrote this commit?".
Engine-
specific properties (spark.application.id, java.version, os.name, etc.)
and a
configurable allowlist of HoodieWriteConfig values are also embeddable
under
new opt-in configs:
- hoodie.commit.metadata.engine.properties.embed.enable (default false)
- hoodie.write.config.keys.to.serialize.to.commit.metadata
(default: small allowlist of operationally useful keys)
Enrichment is centralized in CommitMetadataProperties.enrich() and invoked
once via BaseHoodieClient.updateExtraMetadata() from the write-client
commit
path and the table-service schedule path (after early-return checks).
Also enrich the hoodie.properties file comment line with hostname and
hudi version. Fires only on properties (re)writes — table creation, schema
updates, upgrades — not on every commit. Hostname is resolved once per JVM
with a "unknown" fallback for restricted environments. HoodieVersion.get()
memoizes the manifest read.
No storage-format changes; no public-API breakage (HoodieEngineContext
.getEngineProperties() ships with a default empty-map implementation).
Co-Authored-By: Claude Opus 4.7 <[email protected]>
---------
Co-authored-by: Claude Opus 4.6 <[email protected]>
Co-authored-by: sivabalan <[email protected]>
---
.../org/apache/hudi/client/BaseHoodieClient.java | 4 +
.../hudi/client/BaseHoodieTableServiceClient.java | 2 +
.../apache/hudi/client/BaseHoodieWriteClient.java | 1 +
.../hudi/client/CommitMetadataProperties.java | 145 ++++++++++++++++++
.../hudi/client/TestCommitMetadataProperties.java | 170 +++++++++++++++++++++
.../client/common/HoodieJavaEngineContext.java | 25 +++
.../client/common/HoodieSparkEngineContext.java | 14 ++
.../main/java/org/apache/hudi/HoodieVersion.java | 43 ++++--
.../hudi/common/engine/HoodieEngineContext.java | 16 ++
.../hudi/common/table/HoodieTableConfig.java | 34 ++++-
.../streamer/TestHoodieIncrSourceE2E.java | 6 +-
.../TestHoodieIncrSourceE2EAutoUpgrade.java | 6 +-
12 files changed, 452 insertions(+), 14 deletions(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
index de110a905176..30aa9f770b5f 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
@@ -458,4 +458,8 @@ public abstract class BaseHoodieClient implements
Serializable, AutoCloseable {
return foundRollingMetadata;
}
+
+ protected Option<Map<String, String>> updateExtraMetadata(Option<Map<String,
String>> extraMetadata) {
+ return CommitMetadataProperties.enrich(extraMetadata, config, context);
+ }
}
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java
index eb418627d8aa..d29fc0bbd32d 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java
@@ -725,6 +725,8 @@ public abstract class BaseHoodieTableServiceClient<I, T, O>
extends BaseHoodieCl
// so it is handled differently to avoid locking for planning.
return scheduleCleaning(createTable(config, storageConf),
providedInstantTime);
}
+ // Only enrich metadata after early-return checks, when we're actually
going to use it
+ extraMetadata = updateExtraMetadata(extraMetadata);
Option<HoodieInstant> lastCompletedInstant =
lastCompletedTxnAndMetadata.isPresent()
? Option.of(lastCompletedTxnAndMetadata.get().getLeft())
: Option.empty();
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
index 7a7ec2caf949..14789a4c667d 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
@@ -254,6 +254,7 @@ public abstract class BaseHoodieWriteClient<T, I, K, O>
extends BaseHoodieClient
if (!config.allowEmptyCommit() &&
tableWriteStats.isEmptyDataTableWriteStats()) {
return true;
}
+ extraMetadata = updateExtraMetadata(extraMetadata);
log.info("Committing {} action {}", instantTime, commitActionType);
// Create a Hoodie table which encapsulated the commits and files visible
HoodieTable table = hoodieTableOpt.orElse(createTable(config));
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CommitMetadataProperties.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CommitMetadataProperties.java
new file mode 100644
index 000000000000..3a7b79812c8e
--- /dev/null
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CommitMetadataProperties.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client;
+
+import org.apache.hudi.HoodieVersion;
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * Enriches the {@code extraMetadata} map persisted with every commit, with
version, engine, and
+ * (optionally) engine-specific properties and a configurable subset of {@link
HoodieWriteConfig}
+ * values.
+ *
+ * <p>Key namespacing:
+ * <ul>
+ * <li>{@code hudi.version} — writer version. Always emitted.</li>
+ * <li>{@code engine} — engine type (SPARK/FLINK/JAVA). Always emitted.</li>
+ * <li>Engine-supplied keys (Spark: {@code spark.*}, Java: {@code
java.*}/{@code os.*}, etc.)
+ * — gated by {@link #EMBED_ENGINE_PROPERTIES_IN_COMMIT_METADATA}.</li>
+ * <li>{@code config.<key>} — values of {@link HoodieWriteConfig} entries
whose keys are listed
+ * in {@link #WRITE_CONFIG_KEYS_TO_SERIALIZE_TO_COMMIT_METADATA}.</li>
+ * </ul>
+ */
+public class CommitMetadataProperties {
+
+ static final String HUDI_VERSION_KEY = "hudi.version";
+ static final String ENGINE_KEY = "engine";
+ static final String CONFIG_KEY_PREFIX = "config.";
+
+ /**
+ * Default allowlist of write-config keys serialized into commit metadata.
These are values that
+ * change across jobs/runs but aren't already captured in {@code
hoodie.properties}, so they're
+ * useful for after-the-fact debugging. Intentionally excludes immutable
table identity
+ * (already in {@code hoodie.properties}) and per-record/sensitive values.
+ */
+ private static final String DEFAULT_WRITE_CONFIG_KEYS = String.join(",",
+ Arrays.asList(
+ "hoodie.datasource.write.operation",
+ "hoodie.insert.shuffle.parallelism",
+ "hoodie.upsert.shuffle.parallelism",
+ "hoodie.bulkinsert.shuffle.parallelism",
+ "hoodie.delete.shuffle.parallelism",
+ "hoodie.write.concurrency.mode",
+ "hoodie.metadata.enable"));
+
+ /**
+ * When enabled, engine-specific properties supplied by
+ * {@link HoodieEngineContext#getEngineProperties()} are embedded into
commit metadata for
+ * debugging (e.g. {@code spark.application.id}, {@code spark.user}). {@code
hudi.version} and
+ * {@code engine} are always embedded regardless of this flag.
+ *
+ * <p>Default is {@code false} since these add per-commit growth to the
timeline. Long-running
+ * ingestion workloads writing many commits should leave this off unless
debugging.
+ */
+ public static final ConfigProperty<Boolean>
EMBED_ENGINE_PROPERTIES_IN_COMMIT_METADATA =
+ ConfigProperty
+ .key("hoodie.commit.metadata.engine.properties.embed.enable")
+ .defaultValue(false)
+ .markAdvanced()
+ .sinceVersion("1.3.0")
+ .withDocumentation("When enabled, engine-specific properties (e.g.
spark.application.id, "
+ + "spark.user, java.version) are embedded into commit metadata
for debugging. "
+ + "hudi.version and engine name are always embedded regardless
of this flag.");
+
+ /**
+ * Comma-separated list of {@link HoodieWriteConfig} keys whose values
should be serialized into
+ * commit metadata under the {@code config.<key>} prefix. Use with care:
every key listed here
+ * adds an entry to every commit, which lives forever in the active and
archived timeline.
+ *
+ * <p>Empty value disables config-key serialization entirely (only {@code
hudi.version} and
+ * {@code engine} are emitted).
+ */
+ public static final ConfigProperty<String>
WRITE_CONFIG_KEYS_TO_SERIALIZE_TO_COMMIT_METADATA =
+ ConfigProperty
+ .key("hoodie.write.config.keys.to.serialize.to.commit.metadata")
+ .defaultValue(DEFAULT_WRITE_CONFIG_KEYS)
+ .markAdvanced()
+ .sinceVersion("1.3.0")
+ .withDocumentation("Comma-separated list of write-config keys whose
values are "
+ + "serialized into the extraMetadata map of every commit (under
the 'config.' "
+ + "prefix). Set to empty to skip config-key serialization
entirely. Avoid adding "
+ + "keys whose values may contain credentials or large payloads,
since commit "
+ + "metadata is persisted in the timeline.");
+
+ public static Option<Map<String, String>> enrich(Option<Map<String, String>>
extraMetadata,
+ HoodieWriteConfig config,
+ HoodieEngineContext
context) {
+ Map<String, String> newMetadata = new HashMap<>();
+ if (extraMetadata.isPresent()) {
+ newMetadata.putAll(extraMetadata.get());
+ }
+
+ newMetadata.put(HUDI_VERSION_KEY, HoodieVersion.get());
+ newMetadata.put(ENGINE_KEY, config.getEngineType().name());
+
+ if (config.getBoolean(EMBED_ENGINE_PROPERTIES_IN_COMMIT_METADATA)) {
+ newMetadata.putAll(context.getEngineProperties());
+ }
+
+ for (String key :
parseConfigKeys(config.getString(WRITE_CONFIG_KEYS_TO_SERIALIZE_TO_COMMIT_METADATA)))
{
+ String value = config.getString(key);
+ if (!StringUtils.isNullOrEmpty(value)) {
+ newMetadata.put(CONFIG_KEY_PREFIX + key, value);
+ }
+ }
+
+ return Option.of(newMetadata);
+ }
+
+ private static List<String> parseConfigKeys(String csv) {
+ if (StringUtils.isNullOrEmpty(csv)) {
+ return Collections.emptyList();
+ }
+ return Arrays.stream(csv.split(","))
+ .map(String::trim)
+ .filter(s -> !s.isEmpty())
+ .collect(Collectors.toList());
+ }
+}
diff --git
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/TestCommitMetadataProperties.java
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/TestCommitMetadataProperties.java
new file mode 100644
index 000000000000..acd83424ff3e
--- /dev/null
+++
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/TestCommitMetadataProperties.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client;
+
+import org.apache.hudi.common.engine.EngineType;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieWriteConfig;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import static
org.apache.hudi.client.CommitMetadataProperties.CONFIG_KEY_PREFIX;
+import static
org.apache.hudi.client.CommitMetadataProperties.EMBED_ENGINE_PROPERTIES_IN_COMMIT_METADATA;
+import static org.apache.hudi.client.CommitMetadataProperties.ENGINE_KEY;
+import static org.apache.hudi.client.CommitMetadataProperties.HUDI_VERSION_KEY;
+import static
org.apache.hudi.client.CommitMetadataProperties.WRITE_CONFIG_KEYS_TO_SERIALIZE_TO_COMMIT_METADATA;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+class TestCommitMetadataProperties {
+
+ /** Always-emitted keys (hudi.version, engine) are present even when input
is empty. */
+ @Test
+ void enrich_emptyInput_emitsVersionAndEngine() {
+ HoodieWriteConfig config = newConfig(new Properties());
+ HoodieEngineContext context = newContext(Collections.emptyMap());
+
+ Map<String, String> result =
CommitMetadataProperties.enrich(Option.empty(), config, context).get();
+
+ assertNotNull(result.get(HUDI_VERSION_KEY));
+ assertEquals(EngineType.SPARK.name(), result.get(ENGINE_KEY));
+ }
+
+ /** Passing an immutable map must not throw — yihua's defensive-copy fix. */
+ @Test
+ void enrich_immutableInputMap_doesNotThrow() {
+ HoodieWriteConfig config = newConfig(new Properties());
+ HoodieEngineContext context = newContext(Collections.emptyMap());
+ Map<String, String> input = Collections.unmodifiableMap(
+ Collections.singletonMap("caller.key", "caller.value"));
+
+ Map<String, String> result =
CommitMetadataProperties.enrich(Option.of(input), config, context).get();
+
+ assertEquals("caller.value", result.get("caller.key"));
+ assertTrue(input.equals(Collections.singletonMap("caller.key",
"caller.value")),
+ "Input map must not be mutated");
+ }
+
+ /** Default (opt-in flag = false) suppresses engine-supplied keys. */
+ @Test
+ void enrich_engineEmbedFlagOff_omitsEngineSuppliedKeys() {
+ HoodieWriteConfig config = newConfig(new Properties());
+ Map<String, String> engineProps = new HashMap<>();
+ engineProps.put("spark.application.id", "app-123");
+ HoodieEngineContext context = newContext(engineProps);
+
+ Map<String, String> result =
CommitMetadataProperties.enrich(Option.empty(), config, context).get();
+
+ assertFalse(result.containsKey("spark.application.id"),
+ "Engine-supplied keys must be omitted when embed flag is off");
+ assertNotNull(result.get(HUDI_VERSION_KEY));
+ assertEquals(EngineType.SPARK.name(), result.get(ENGINE_KEY));
+ }
+
+ /** Opt-in flag = true emits engine-supplied keys. */
+ @Test
+ void enrich_engineEmbedFlagOn_emitsEngineSuppliedKeys() {
+ Properties props = new Properties();
+ props.put(EMBED_ENGINE_PROPERTIES_IN_COMMIT_METADATA.key(), "true");
+ HoodieWriteConfig config = newConfig(props);
+ Map<String, String> engineProps = new HashMap<>();
+ engineProps.put("spark.application.id", "app-123");
+ engineProps.put("spark.user", "sivabalan");
+ HoodieEngineContext context = newContext(engineProps);
+
+ Map<String, String> result =
CommitMetadataProperties.enrich(Option.empty(), config, context).get();
+
+ assertEquals("app-123", result.get("spark.application.id"));
+ assertEquals("sivabalan", result.get("spark.user"));
+ }
+
+ /** Config-key allowlist serializes present values; skips truly absent keys
(no default). */
+ @Test
+ void enrich_writeConfigKeyAllowlist_emitsPresentNonEmptyValues() {
+ Properties props = new Properties();
+ props.put(WRITE_CONFIG_KEYS_TO_SERIALIZE_TO_COMMIT_METADATA.key(),
+ "hoodie.metadata.enable,hoodie.does.not.exist");
+ props.put("hoodie.metadata.enable", "true");
+ HoodieWriteConfig config = newConfig(props);
+ HoodieEngineContext context = newContext(Collections.emptyMap());
+
+ Map<String, String> result =
CommitMetadataProperties.enrich(Option.empty(), config, context).get();
+
+ assertEquals("true", result.get(CONFIG_KEY_PREFIX +
"hoodie.metadata.enable"));
+ assertFalse(result.containsKey(CONFIG_KEY_PREFIX +
"hoodie.does.not.exist"),
+ "Keys absent from the config must not be serialized");
+ }
+
+ /** Empty allowlist disables config-key serialization entirely. */
+ @Test
+ void enrich_emptyConfigKeyList_emitsNoConfigKeys() {
+ Properties props = new Properties();
+ props.put(WRITE_CONFIG_KEYS_TO_SERIALIZE_TO_COMMIT_METADATA.key(), "");
+ props.put("hoodie.metadata.enable", "true");
+ HoodieWriteConfig config = newConfig(props);
+ HoodieEngineContext context = newContext(Collections.emptyMap());
+
+ Map<String, String> result =
CommitMetadataProperties.enrich(Option.empty(), config, context).get();
+
+ long configKeyCount = result.keySet().stream().filter(k ->
k.startsWith(CONFIG_KEY_PREFIX)).count();
+ assertEquals(0L, configKeyCount, "No config.* keys when allowlist is
empty");
+ // hudi.version and engine still always emitted
+ assertNotNull(result.get(HUDI_VERSION_KEY));
+ assertEquals(EngineType.SPARK.name(), result.get(ENGINE_KEY));
+ }
+
+ /** Caller-provided extra metadata is preserved alongside enrichment. */
+ @Test
+ void enrich_preservesCallerProvidedKeys() {
+ HoodieWriteConfig config = newConfig(new Properties());
+ HoodieEngineContext context = newContext(Collections.emptyMap());
+ Map<String, String> input = new HashMap<>();
+ input.put("caller.key1", "caller.value1");
+ input.put("caller.key2", "caller.value2");
+
+ Map<String, String> result =
CommitMetadataProperties.enrich(Option.of(input), config, context).get();
+
+ assertEquals("caller.value1", result.get("caller.key1"));
+ assertEquals("caller.value2", result.get("caller.key2"));
+ assertNotNull(result.get(HUDI_VERSION_KEY));
+ }
+
+ private static HoodieWriteConfig newConfig(Properties overrides) {
+ Properties props = new Properties();
+ props.put(HoodieWriteConfig.BASE_PATH.key(),
"/tmp/test-commit-metadata-properties");
+ props.putAll(overrides);
+ return HoodieWriteConfig.newBuilder().withProperties(props).build();
+ }
+
+ private static HoodieEngineContext newContext(Map<String, String>
engineProperties) {
+ HoodieEngineContext context = mock(HoodieEngineContext.class);
+ when(context.getEngineProperties()).thenReturn(engineProperties);
+ return context;
+ }
+}
diff --git
a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java
b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java
index 8da9e2aca0e1..a24d6fb4c25d 100644
---
a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java
+++
b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java
@@ -49,6 +49,7 @@ import org.apache.avro.generic.IndexedRecord;
import java.io.IOException;
import java.util.Collections;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -192,6 +193,30 @@ public class HoodieJavaEngineContext extends
HoodieEngineContext {
// no operation for now
}
+ // Allowlist of safe system properties to include in commit metadata. Avoid
wildcarding system
+ // properties since callers may pass credentials via -D flags (e.g.
-Ddb.password=...).
+ private static final String[] SAFE_SYSTEM_PROPERTIES = {
+ "java.version",
+ "java.vendor",
+ "java.vm.name",
+ "java.vm.version",
+ "os.name",
+ "os.version",
+ "os.arch"
+ };
+
+ @Override
+ public Map<String, String> getEngineProperties() {
+ Map<String, String> info = new HashMap<>();
+ for (String property : SAFE_SYSTEM_PROPERTIES) {
+ String value = System.getProperty(property);
+ if (value != null) {
+ info.put(property, value);
+ }
+ }
+ return info;
+ }
+
@Override
public <I, O> O aggregate(HoodieData<I> data, O zeroValue,
Functions.Function2<O, I, O> seqOp, Functions.Function2<O, O, O> combOp) {
return data.collectAsList().stream().reduce(zeroValue, seqOp::apply,
combOp::apply);
diff --git
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/common/HoodieSparkEngineContext.java
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/common/HoodieSparkEngineContext.java
index 3c5707d3358f..091961bb4ca7 100644
---
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/common/HoodieSparkEngineContext.java
+++
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/common/HoodieSparkEngineContext.java
@@ -408,4 +408,18 @@ public class HoodieSparkEngineContext extends
HoodieEngineContext {
new ConditionalRangePartitioner.CompositeKeyComparator<>())
.mapToPair(e -> e._1));
}
+
+ @Override
+ public Map<String, String> getEngineProperties() {
+ Map<String, String> info = new HashMap<>();
+ info.put("spark.application.id", javaSparkContext.sc().applicationId());
+ info.put("spark.user", javaSparkContext.sparkUser());
+ info.put("spark.master", javaSparkContext.master());
+ info.put("spark.application", javaSparkContext.appName());
+ info.put("spark.version", javaSparkContext.version());
+ info.put("spark.defaultParallelism",
String.valueOf(javaSparkContext.defaultParallelism()));
+ info.put("spark.defaultMinPartitions",
String.valueOf(javaSparkContext.defaultMinPartitions()));
+ info.put("spark.executor.instances",
javaSparkContext.getConf().get("spark.executor.instances", ""));
+ return info;
+ }
}
diff --git a/hudi-common/src/main/java/org/apache/hudi/HoodieVersion.java
b/hudi-common/src/main/java/org/apache/hudi/HoodieVersion.java
index dd88836468c9..23cee1015841 100644
--- a/hudi-common/src/main/java/org/apache/hudi/HoodieVersion.java
+++ b/hudi-common/src/main/java/org/apache/hudi/HoodieVersion.java
@@ -33,23 +33,46 @@ public final class HoodieVersion {
public static final String HOODIE_WRITER_VERSION = "hudi_writer_version";
+ // Cached result of reading version from the manifest. Null means "not
loaded yet". An empty
+ // string means "manifest absent or unreadable" — fall back to
HOODIE_DEFAULT_VERSION so tests
+ // that swap the default via setVersionOverride continue to work.
+ private static volatile String cachedManifestVersion = null;
+
/**
* Returns the complete version of HUDI code
* Example: 0.12.2 or 0.12.3-snapshot
*/
public static String get() {
- String hudiPropertiesFilePath =
"META-INF/maven/org.apache.hudi/hudi-common/pom.properties";
- try (InputStream inputStream =
HoodieVersion.class.getClassLoader().getResourceAsStream(hudiPropertiesFilePath))
{
- Properties properties = new Properties();
- if (inputStream != null) {
- properties.load(inputStream);
- // Access properties
- return properties.getProperty("version");
+ String fromManifest = loadManifestVersion();
+ return fromManifest.isEmpty() ? HOODIE_DEFAULT_VERSION : fromManifest;
+ }
+
+ private static String loadManifestVersion() {
+ String local = cachedManifestVersion;
+ if (local != null) {
+ return local;
+ }
+ synchronized (HoodieVersion.class) {
+ if (cachedManifestVersion != null) {
+ return cachedManifestVersion;
+ }
+ String hudiPropertiesFilePath =
"META-INF/maven/org.apache.hudi/hudi-common/pom.properties";
+ String resolved = "";
+ try (InputStream inputStream =
HoodieVersion.class.getClassLoader().getResourceAsStream(hudiPropertiesFilePath))
{
+ if (inputStream != null) {
+ Properties properties = new Properties();
+ properties.load(inputStream);
+ String version = properties.getProperty("version");
+ if (version != null) {
+ resolved = version;
+ }
+ }
+ } catch (Exception ignored) {
+ // Ignoring the exception as there is a fallback to default version
}
- } catch (Exception ignored) {
- // Ignoring the exception as there is as fallback to default version
+ cachedManifestVersion = resolved;
+ return resolved;
}
- return HOODIE_DEFAULT_VERSION;
}
/**
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieEngineContext.java
b/hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieEngineContext.java
index f56921797283..43bc9cc78b35 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieEngineContext.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieEngineContext.java
@@ -45,6 +45,7 @@ import lombok.AllArgsConstructor;
import lombok.Getter;
import java.io.IOException;
+import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -117,6 +118,21 @@ public abstract class HoodieEngineContext {
public abstract void cancelAllJobs();
+ /**
+ * Returns engine-specific properties to be included in commit metadata for
debugging.
+ * <p>Contract:
+ * <ul>
+ * <li>Implementations must only return safe, non-sensitive values (no
credentials, no PII).</li>
+ * <li>This is invoked on the driver, on every commit. It must be cheap
and free of side effects.</li>
+ * <li>Must not reach into checkpoint / runtime state (e.g. for streaming
engines like Flink,
+ * per-checkpoint metadata is set up via the coordinator, not
here).</li>
+ * </ul>
+ * Default returns an empty map so external subclasses are not forced to
implement this.
+ */
+ public Map<String, String> getEngineProperties() {
+ return Collections.emptyMap();
+ }
+
/**
* Returns the application id of the engine (e.g. Spark application id).
* Used to populate lock metadata so lock holders can be identified.
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
index 73703d70d85a..aef84c9f8adc 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
@@ -18,6 +18,7 @@
package org.apache.hudi.common.table;
+import org.apache.hudi.HoodieVersion;
import org.apache.hudi.common.HoodieTableFormat;
import org.apache.hudi.common.NativeTableFormat;
import org.apache.hudi.common.bootstrap.index.hfile.HFileBootstrapIndex;
@@ -51,6 +52,7 @@ import
org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.util.BinaryUtil;
import org.apache.hudi.common.util.ConfigUtils;
import org.apache.hudi.common.util.HoodieTableConfigUtils;
+import org.apache.hudi.common.util.NetworkUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.common.util.StringUtils;
@@ -126,6 +128,9 @@ import static
org.apache.hudi.common.util.ValidationUtils.checkArgument;
+ " initializing a path as hoodie base path and never changes during
the lifetime of a hoodie table.")
public class HoodieTableConfig extends HoodieConfig {
+ // Cached hostname to avoid repeated synchronized network calls
+ private static volatile String cachedHostname = null;
+
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
public static final String HOODIE_PROPERTIES_FILE_BACKUP =
"hoodie.properties.backup";
public static final String HOODIE_WRITE_TABLE_NAME_KEY =
"hoodie.datasource.write.table.name";
@@ -500,10 +505,10 @@ public class HoodieTableConfig extends HoodieConfig {
final String checksum;
if (isValidChecksum(props)) {
checksum = props.getProperty(TABLE_CHECKSUM.key());
- props.store(outputStream, "Updated at " + Instant.now());
+ props.store(outputStream, getFileComment());
} else {
Properties propsWithChecksum =
getOrderedPropertiesWithTableChecksum(props);
- propsWithChecksum.store(outputStream, "Properties saved on " +
Instant.now());
+ propsWithChecksum.store(outputStream, getFileComment());
checksum = propsWithChecksum.getProperty(TABLE_CHECKSUM.key());
props.setProperty(TABLE_CHECKSUM.key(), checksum);
}
@@ -1385,6 +1390,31 @@ public class HoodieTableConfig extends HoodieConfig {
.collect(Collectors.toMap(e -> String.valueOf(e.getKey()), e ->
String.valueOf(e.getValue())));
}
+ /**
+ * Returns the cached hostname, fetching it lazily on first call.
+ * Falls back to "unknown" if network resolution fails.
+ */
+ private static String getHostnameSafe() {
+ if (cachedHostname == null) {
+ synchronized (HoodieTableConfig.class) {
+ if (cachedHostname == null) {
+ try {
+ cachedHostname = NetworkUtils.getHostname();
+ } catch (Exception e) {
+ log.warn("Failed to resolve hostname, using 'unknown'", e);
+ cachedHostname = "unknown";
+ }
+ }
+ }
+ }
+ return cachedHostname;
+ }
+
+ public static String getFileComment() {
+ return String.format("Updated at %s, host=%s, hudi_version=%s",
+ Instant.now(), getHostnameSafe(), HoodieVersion.get());
+ }
+
/**
* @deprecated Use {@link #BASE_FILE_FORMAT} and its methods.
*/
diff --git
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2E.java
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2E.java
index 1b010efab7cb..64c432474953 100644
---
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2E.java
+++
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2E.java
@@ -108,7 +108,11 @@ public class TestHoodieIncrSourceE2E extends
S3EventsHoodieIncrSourceHarness {
Option<HoodieCommitMetadata> metadata =
HoodieClientTestUtils.getCommitMetadataForInstant(
metaClient, metaClient.getActiveTimeline().lastInstant().get());
assertFalse(metadata.isEmpty());
- assertEquals(metadata.get().getExtraMetadata(), expectedMetadata);
+ // Assert expected entries are a subset of the actual extra metadata.
CommitMetadataProperties
+ // also enriches commit metadata with hudi.version, engine, and config.*
entries on every write,
+ // so the actual map is a superset.
+ Map<String, String> actual = metadata.get().getExtraMetadata();
+ expectedMetadata.forEach((k, v) -> assertEquals(v, actual.get(k),
"extraMetadata[" + k + "]"));
}
diff --git
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2EAutoUpgrade.java
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2EAutoUpgrade.java
index 3fd2068c9100..52a6734ec331 100644
---
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2EAutoUpgrade.java
+++
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2EAutoUpgrade.java
@@ -137,7 +137,11 @@ public class TestHoodieIncrSourceE2EAutoUpgrade extends
S3EventsHoodieIncrSource
Option<HoodieCommitMetadata> metadata =
HoodieClientTestUtils.getCommitMetadataForInstant(
metaClient, metaClient.getActiveTimeline().lastInstant().get());
assertFalse(metadata.isEmpty());
- assertEquals(expectedMetadata, metadata.get().getExtraMetadata());
+ // Assert expected entries are a subset of the actual extra metadata.
CommitMetadataProperties
+ // also enriches commit metadata with hudi.version, engine, and config.*
entries on every write,
+ // so the actual map is a superset.
+ Map<String, String> actual = metadata.get().getExtraMetadata();
+ expectedMetadata.forEach((k, v) -> assertEquals(v, actual.get(k),
"extraMetadata[" + k + "]"));
}
/**