This is an automated email from the ASF dual-hosted git repository.

nsivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new cde86ffd64a4 feat: Add HUDI version and engine properties to commit 
metadata (#18183)
cde86ffd64a4 is described below

commit cde86ffd64a4aadb55ec049940713d199d92eb03
Author: Prashant Wason <[email protected]>
AuthorDate: Fri Jun 12 16:14:30 2026 -0700

    feat: Add HUDI version and engine properties to commit metadata (#18183)
    
    Add hudi.version and engine (SPARK/FLINK/JAVA) to every commit's 
extraMetadata
      so post-hoc debugging can answer "which writer wrote this commit?". 
Engine-
      specific properties (spark.application.id, java.version, os.name, etc.) 
and a
      configurable allowlist of HoodieWriteConfig values are also embeddable 
under
      new opt-in configs:
    
        - hoodie.commit.metadata.engine.properties.embed.enable (default false)
        - hoodie.write.config.keys.to.serialize.to.commit.metadata
            (default: small allowlist of operationally useful keys)
    
      Enrichment is centralized in CommitMetadataProperties.enrich() and invoked
      once via BaseHoodieClient.updateExtraMetadata() from the write-client 
commit
      path and the table-service schedule path (after early-return checks).
    
      Also enrich the hoodie.properties file comment line with hostname and
      hudi version. Fires only on properties (re)writes — table creation, schema
      updates, upgrades — not on every commit. Hostname is resolved once per JVM
      with a "unknown" fallback for restricted environments. HoodieVersion.get()
      memoizes the manifest read.
    
      No storage-format changes; no public-API breakage (HoodieEngineContext
      .getEngineProperties() ships with a default empty-map implementation).
    
    Co-Authored-By: Claude Opus 4.7 <[email protected]>
    
    ---------
    
    Co-authored-by: Claude Opus 4.6 <[email protected]>
    Co-authored-by: sivabalan <[email protected]>
---
 .../org/apache/hudi/client/BaseHoodieClient.java   |   4 +
 .../hudi/client/BaseHoodieTableServiceClient.java  |   2 +
 .../apache/hudi/client/BaseHoodieWriteClient.java  |   1 +
 .../hudi/client/CommitMetadataProperties.java      | 145 ++++++++++++++++++
 .../hudi/client/TestCommitMetadataProperties.java  | 170 +++++++++++++++++++++
 .../client/common/HoodieJavaEngineContext.java     |  25 +++
 .../client/common/HoodieSparkEngineContext.java    |  14 ++
 .../main/java/org/apache/hudi/HoodieVersion.java   |  43 ++++--
 .../hudi/common/engine/HoodieEngineContext.java    |  16 ++
 .../hudi/common/table/HoodieTableConfig.java       |  34 ++++-
 .../streamer/TestHoodieIncrSourceE2E.java          |   6 +-
 .../TestHoodieIncrSourceE2EAutoUpgrade.java        |   6 +-
 12 files changed, 452 insertions(+), 14 deletions(-)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
index de110a905176..30aa9f770b5f 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
@@ -458,4 +458,8 @@ public abstract class BaseHoodieClient implements 
Serializable, AutoCloseable {
 
     return foundRollingMetadata;
   }
+
+  protected Option<Map<String, String>> updateExtraMetadata(Option<Map<String, 
String>> extraMetadata) {
+    return CommitMetadataProperties.enrich(extraMetadata, config, context);
+  }
 }
diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java
index eb418627d8aa..d29fc0bbd32d 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java
@@ -725,6 +725,8 @@ public abstract class BaseHoodieTableServiceClient<I, T, O> 
extends BaseHoodieCl
       // so it is handled differently to avoid locking for planning.
       return scheduleCleaning(createTable(config, storageConf), 
providedInstantTime);
     }
+    // Only enrich metadata after early-return checks, when we're actually 
going to use it
+    extraMetadata = updateExtraMetadata(extraMetadata);
     Option<HoodieInstant> lastCompletedInstant = 
lastCompletedTxnAndMetadata.isPresent()
         ? Option.of(lastCompletedTxnAndMetadata.get().getLeft())
         : Option.empty();
diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
index 7a7ec2caf949..14789a4c667d 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
@@ -254,6 +254,7 @@ public abstract class BaseHoodieWriteClient<T, I, K, O> 
extends BaseHoodieClient
     if (!config.allowEmptyCommit() && 
tableWriteStats.isEmptyDataTableWriteStats()) {
       return true;
     }
+    extraMetadata = updateExtraMetadata(extraMetadata);
     log.info("Committing {} action {}", instantTime, commitActionType);
     // Create a Hoodie table which encapsulated the commits and files visible
     HoodieTable table = hoodieTableOpt.orElse(createTable(config));
diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CommitMetadataProperties.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CommitMetadataProperties.java
new file mode 100644
index 000000000000..3a7b79812c8e
--- /dev/null
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CommitMetadataProperties.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client;
+
+import org.apache.hudi.HoodieVersion;
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * Enriches the {@code extraMetadata} map persisted with every commit, with 
version, engine, and
+ * (optionally) engine-specific properties and a configurable subset of {@link 
HoodieWriteConfig}
+ * values.
+ *
+ * <p>Key namespacing:
+ * <ul>
+ *   <li>{@code hudi.version} — writer version. Always emitted.</li>
+ *   <li>{@code engine} — engine type (SPARK/FLINK/JAVA). Always emitted.</li>
+ *   <li>Engine-supplied keys (Spark: {@code spark.*}, Java: {@code 
java.*}/{@code os.*}, etc.)
+ *       — gated by {@link #EMBED_ENGINE_PROPERTIES_IN_COMMIT_METADATA}.</li>
+ *   <li>{@code config.<key>} — values of {@link HoodieWriteConfig} entries 
whose keys are listed
+ *       in {@link #WRITE_CONFIG_KEYS_TO_SERIALIZE_TO_COMMIT_METADATA}.</li>
+ * </ul>
+ */
+public class CommitMetadataProperties {
+
+  static final String HUDI_VERSION_KEY = "hudi.version";
+  static final String ENGINE_KEY = "engine";
+  static final String CONFIG_KEY_PREFIX = "config.";
+
+  /**
+   * Default allowlist of write-config keys serialized into commit metadata. 
These are values that
+   * change across jobs/runs but aren't already captured in {@code 
hoodie.properties}, so they're
+   * useful for after-the-fact debugging. Intentionally excludes immutable 
table identity
+   * (already in {@code hoodie.properties}) and per-record/sensitive values.
+   */
+  private static final String DEFAULT_WRITE_CONFIG_KEYS = String.join(",",
+      Arrays.asList(
+          "hoodie.datasource.write.operation",
+          "hoodie.insert.shuffle.parallelism",
+          "hoodie.upsert.shuffle.parallelism",
+          "hoodie.bulkinsert.shuffle.parallelism",
+          "hoodie.delete.shuffle.parallelism",
+          "hoodie.write.concurrency.mode",
+          "hoodie.metadata.enable"));
+
+  /**
+   * When enabled, engine-specific properties supplied by
+   * {@link HoodieEngineContext#getEngineProperties()} are embedded into 
commit metadata for
+   * debugging (e.g. {@code spark.application.id}, {@code spark.user}). {@code 
hudi.version} and
+   * {@code engine} are always embedded regardless of this flag.
+   *
+   * <p>Default is {@code false} since these add per-commit growth to the 
timeline. Long-running
+   * ingestion workloads writing many commits should leave this off unless 
debugging.
+   */
+  public static final ConfigProperty<Boolean> 
EMBED_ENGINE_PROPERTIES_IN_COMMIT_METADATA =
+      ConfigProperty
+          .key("hoodie.commit.metadata.engine.properties.embed.enable")
+          .defaultValue(false)
+          .markAdvanced()
+          .sinceVersion("1.3.0")
+          .withDocumentation("When enabled, engine-specific properties (e.g. 
spark.application.id, "
+              + "spark.user, java.version) are embedded into commit metadata 
for debugging. "
+              + "hudi.version and engine name are always embedded regardless 
of this flag.");
+
+  /**
+   * Comma-separated list of {@link HoodieWriteConfig} keys whose values 
should be serialized into
+   * commit metadata under the {@code config.<key>} prefix. Use with care: 
every key listed here
+   * adds an entry to every commit, which lives forever in the active and 
archived timeline.
+   *
+   * <p>Empty value disables config-key serialization entirely (only {@code 
hudi.version} and
+   * {@code engine} are emitted).
+   */
+  public static final ConfigProperty<String> 
WRITE_CONFIG_KEYS_TO_SERIALIZE_TO_COMMIT_METADATA =
+      ConfigProperty
+          .key("hoodie.write.config.keys.to.serialize.to.commit.metadata")
+          .defaultValue(DEFAULT_WRITE_CONFIG_KEYS)
+          .markAdvanced()
+          .sinceVersion("1.3.0")
+          .withDocumentation("Comma-separated list of write-config keys whose 
values are "
+              + "serialized into the extraMetadata map of every commit (under 
the 'config.' "
+              + "prefix). Set to empty to skip config-key serialization 
entirely. Avoid adding "
+              + "keys whose values may contain credentials or large payloads, 
since commit "
+              + "metadata is persisted in the timeline.");
+
+  public static Option<Map<String, String>> enrich(Option<Map<String, String>> 
extraMetadata,
+                                                   HoodieWriteConfig config,
+                                                   HoodieEngineContext 
context) {
+    Map<String, String> newMetadata = new HashMap<>();
+    if (extraMetadata.isPresent()) {
+      newMetadata.putAll(extraMetadata.get());
+    }
+
+    newMetadata.put(HUDI_VERSION_KEY, HoodieVersion.get());
+    newMetadata.put(ENGINE_KEY, config.getEngineType().name());
+
+    if (config.getBoolean(EMBED_ENGINE_PROPERTIES_IN_COMMIT_METADATA)) {
+      newMetadata.putAll(context.getEngineProperties());
+    }
+
+    for (String key : 
parseConfigKeys(config.getString(WRITE_CONFIG_KEYS_TO_SERIALIZE_TO_COMMIT_METADATA)))
 {
+      String value = config.getString(key);
+      if (!StringUtils.isNullOrEmpty(value)) {
+        newMetadata.put(CONFIG_KEY_PREFIX + key, value);
+      }
+    }
+
+    return Option.of(newMetadata);
+  }
+
+  private static List<String> parseConfigKeys(String csv) {
+    if (StringUtils.isNullOrEmpty(csv)) {
+      return Collections.emptyList();
+    }
+    return Arrays.stream(csv.split(","))
+        .map(String::trim)
+        .filter(s -> !s.isEmpty())
+        .collect(Collectors.toList());
+  }
+}
diff --git 
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/TestCommitMetadataProperties.java
 
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/TestCommitMetadataProperties.java
new file mode 100644
index 000000000000..acd83424ff3e
--- /dev/null
+++ 
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/TestCommitMetadataProperties.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client;
+
+import org.apache.hudi.common.engine.EngineType;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieWriteConfig;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import static 
org.apache.hudi.client.CommitMetadataProperties.CONFIG_KEY_PREFIX;
+import static 
org.apache.hudi.client.CommitMetadataProperties.EMBED_ENGINE_PROPERTIES_IN_COMMIT_METADATA;
+import static org.apache.hudi.client.CommitMetadataProperties.ENGINE_KEY;
+import static org.apache.hudi.client.CommitMetadataProperties.HUDI_VERSION_KEY;
+import static 
org.apache.hudi.client.CommitMetadataProperties.WRITE_CONFIG_KEYS_TO_SERIALIZE_TO_COMMIT_METADATA;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+class TestCommitMetadataProperties {
+
+  /** Always-emitted keys (hudi.version, engine) are present even when input 
is empty. */
+  @Test
+  void enrich_emptyInput_emitsVersionAndEngine() {
+    HoodieWriteConfig config = newConfig(new Properties());
+    HoodieEngineContext context = newContext(Collections.emptyMap());
+
+    Map<String, String> result = 
CommitMetadataProperties.enrich(Option.empty(), config, context).get();
+
+    assertNotNull(result.get(HUDI_VERSION_KEY));
+    assertEquals(EngineType.SPARK.name(), result.get(ENGINE_KEY));
+  }
+
+  /** Passing an immutable map must not throw — yihua's defensive-copy fix. */
+  @Test
+  void enrich_immutableInputMap_doesNotThrow() {
+    HoodieWriteConfig config = newConfig(new Properties());
+    HoodieEngineContext context = newContext(Collections.emptyMap());
+    Map<String, String> input = Collections.unmodifiableMap(
+        Collections.singletonMap("caller.key", "caller.value"));
+
+    Map<String, String> result = 
CommitMetadataProperties.enrich(Option.of(input), config, context).get();
+
+    assertEquals("caller.value", result.get("caller.key"));
+    assertTrue(input.equals(Collections.singletonMap("caller.key", 
"caller.value")),
+        "Input map must not be mutated");
+  }
+
+  /** Default (opt-in flag = false) suppresses engine-supplied keys. */
+  @Test
+  void enrich_engineEmbedFlagOff_omitsEngineSuppliedKeys() {
+    HoodieWriteConfig config = newConfig(new Properties());
+    Map<String, String> engineProps = new HashMap<>();
+    engineProps.put("spark.application.id", "app-123");
+    HoodieEngineContext context = newContext(engineProps);
+
+    Map<String, String> result = 
CommitMetadataProperties.enrich(Option.empty(), config, context).get();
+
+    assertFalse(result.containsKey("spark.application.id"),
+        "Engine-supplied keys must be omitted when embed flag is off");
+    assertNotNull(result.get(HUDI_VERSION_KEY));
+    assertEquals(EngineType.SPARK.name(), result.get(ENGINE_KEY));
+  }
+
+  /** Opt-in flag = true emits engine-supplied keys. */
+  @Test
+  void enrich_engineEmbedFlagOn_emitsEngineSuppliedKeys() {
+    Properties props = new Properties();
+    props.put(EMBED_ENGINE_PROPERTIES_IN_COMMIT_METADATA.key(), "true");
+    HoodieWriteConfig config = newConfig(props);
+    Map<String, String> engineProps = new HashMap<>();
+    engineProps.put("spark.application.id", "app-123");
+    engineProps.put("spark.user", "sivabalan");
+    HoodieEngineContext context = newContext(engineProps);
+
+    Map<String, String> result = 
CommitMetadataProperties.enrich(Option.empty(), config, context).get();
+
+    assertEquals("app-123", result.get("spark.application.id"));
+    assertEquals("sivabalan", result.get("spark.user"));
+  }
+
+  /** Config-key allowlist serializes present values; skips truly absent keys 
(no default). */
+  @Test
+  void enrich_writeConfigKeyAllowlist_emitsPresentNonEmptyValues() {
+    Properties props = new Properties();
+    props.put(WRITE_CONFIG_KEYS_TO_SERIALIZE_TO_COMMIT_METADATA.key(),
+        "hoodie.metadata.enable,hoodie.does.not.exist");
+    props.put("hoodie.metadata.enable", "true");
+    HoodieWriteConfig config = newConfig(props);
+    HoodieEngineContext context = newContext(Collections.emptyMap());
+
+    Map<String, String> result = 
CommitMetadataProperties.enrich(Option.empty(), config, context).get();
+
+    assertEquals("true", result.get(CONFIG_KEY_PREFIX + 
"hoodie.metadata.enable"));
+    assertFalse(result.containsKey(CONFIG_KEY_PREFIX + 
"hoodie.does.not.exist"),
+        "Keys absent from the config must not be serialized");
+  }
+
+  /** Empty allowlist disables config-key serialization entirely. */
+  @Test
+  void enrich_emptyConfigKeyList_emitsNoConfigKeys() {
+    Properties props = new Properties();
+    props.put(WRITE_CONFIG_KEYS_TO_SERIALIZE_TO_COMMIT_METADATA.key(), "");
+    props.put("hoodie.metadata.enable", "true");
+    HoodieWriteConfig config = newConfig(props);
+    HoodieEngineContext context = newContext(Collections.emptyMap());
+
+    Map<String, String> result = 
CommitMetadataProperties.enrich(Option.empty(), config, context).get();
+
+    long configKeyCount = result.keySet().stream().filter(k -> 
k.startsWith(CONFIG_KEY_PREFIX)).count();
+    assertEquals(0L, configKeyCount, "No config.* keys when allowlist is 
empty");
+    // hudi.version and engine still always emitted
+    assertNotNull(result.get(HUDI_VERSION_KEY));
+    assertEquals(EngineType.SPARK.name(), result.get(ENGINE_KEY));
+  }
+
+  /** Caller-provided extra metadata is preserved alongside enrichment. */
+  @Test
+  void enrich_preservesCallerProvidedKeys() {
+    HoodieWriteConfig config = newConfig(new Properties());
+    HoodieEngineContext context = newContext(Collections.emptyMap());
+    Map<String, String> input = new HashMap<>();
+    input.put("caller.key1", "caller.value1");
+    input.put("caller.key2", "caller.value2");
+
+    Map<String, String> result = 
CommitMetadataProperties.enrich(Option.of(input), config, context).get();
+
+    assertEquals("caller.value1", result.get("caller.key1"));
+    assertEquals("caller.value2", result.get("caller.key2"));
+    assertNotNull(result.get(HUDI_VERSION_KEY));
+  }
+
+  private static HoodieWriteConfig newConfig(Properties overrides) {
+    Properties props = new Properties();
+    props.put(HoodieWriteConfig.BASE_PATH.key(), 
"/tmp/test-commit-metadata-properties");
+    props.putAll(overrides);
+    return HoodieWriteConfig.newBuilder().withProperties(props).build();
+  }
+
+  private static HoodieEngineContext newContext(Map<String, String> 
engineProperties) {
+    HoodieEngineContext context = mock(HoodieEngineContext.class);
+    when(context.getEngineProperties()).thenReturn(engineProperties);
+    return context;
+  }
+}
diff --git 
a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java
 
b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java
index 8da9e2aca0e1..a24d6fb4c25d 100644
--- 
a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java
+++ 
b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/common/HoodieJavaEngineContext.java
@@ -49,6 +49,7 @@ import org.apache.avro.generic.IndexedRecord;
 
 import java.io.IOException;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -192,6 +193,30 @@ public class HoodieJavaEngineContext extends 
HoodieEngineContext {
     // no operation for now
   }
 
+  // Allowlist of safe system properties to include in commit metadata. Avoid 
wildcarding system
+  // properties since callers may pass credentials via -D flags (e.g. 
-Ddb.password=...).
+  private static final String[] SAFE_SYSTEM_PROPERTIES = {
+      "java.version",
+      "java.vendor",
+      "java.vm.name",
+      "java.vm.version",
+      "os.name",
+      "os.version",
+      "os.arch"
+  };
+
+  @Override
+  public Map<String, String> getEngineProperties() {
+    Map<String, String> info = new HashMap<>();
+    for (String property : SAFE_SYSTEM_PROPERTIES) {
+      String value = System.getProperty(property);
+      if (value != null) {
+        info.put(property, value);
+      }
+    }
+    return info;
+  }
+
   @Override
   public <I, O> O aggregate(HoodieData<I> data, O zeroValue, 
Functions.Function2<O, I, O> seqOp, Functions.Function2<O, O, O> combOp) {
     return data.collectAsList().stream().reduce(zeroValue, seqOp::apply, 
combOp::apply);
diff --git 
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/common/HoodieSparkEngineContext.java
 
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/common/HoodieSparkEngineContext.java
index 3c5707d3358f..091961bb4ca7 100644
--- 
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/common/HoodieSparkEngineContext.java
+++ 
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/common/HoodieSparkEngineContext.java
@@ -408,4 +408,18 @@ public class HoodieSparkEngineContext extends 
HoodieEngineContext {
                 new ConditionalRangePartitioner.CompositeKeyComparator<>())
             .mapToPair(e -> e._1));
   }
+
+  @Override
+  public Map<String, String> getEngineProperties() {
+    Map<String, String> info = new HashMap<>();
+    info.put("spark.application.id", javaSparkContext.sc().applicationId());
+    info.put("spark.user", javaSparkContext.sparkUser());
+    info.put("spark.master", javaSparkContext.master());
+    info.put("spark.application", javaSparkContext.appName());
+    info.put("spark.version", javaSparkContext.version());
+    info.put("spark.defaultParallelism", 
String.valueOf(javaSparkContext.defaultParallelism()));
+    info.put("spark.defaultMinPartitions", 
String.valueOf(javaSparkContext.defaultMinPartitions()));
+    info.put("spark.executor.instances", 
javaSparkContext.getConf().get("spark.executor.instances", ""));
+    return info;
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/HoodieVersion.java 
b/hudi-common/src/main/java/org/apache/hudi/HoodieVersion.java
index dd88836468c9..23cee1015841 100644
--- a/hudi-common/src/main/java/org/apache/hudi/HoodieVersion.java
+++ b/hudi-common/src/main/java/org/apache/hudi/HoodieVersion.java
@@ -33,23 +33,46 @@ public final class HoodieVersion {
 
   public static final String HOODIE_WRITER_VERSION = "hudi_writer_version";
 
+  // Cached result of reading version from the manifest. Null means "not 
loaded yet". An empty
+  // string means "manifest absent or unreadable" — fall back to 
HOODIE_DEFAULT_VERSION so tests
+  // that swap the default via setVersionOverride continue to work.
+  private static volatile String cachedManifestVersion = null;
+
   /**
    * Returns the complete version of HUDI code
    * Example: 0.12.2 or 0.12.3-snapshot
    */
   public static String get() {
-    String hudiPropertiesFilePath = 
"META-INF/maven/org.apache.hudi/hudi-common/pom.properties";
-    try (InputStream inputStream = 
HoodieVersion.class.getClassLoader().getResourceAsStream(hudiPropertiesFilePath))
 {
-      Properties properties = new Properties();
-      if (inputStream != null) {
-        properties.load(inputStream);
-        // Access properties
-        return properties.getProperty("version");
+    String fromManifest = loadManifestVersion();
+    return fromManifest.isEmpty() ? HOODIE_DEFAULT_VERSION : fromManifest;
+  }
+
+  private static String loadManifestVersion() {
+    String local = cachedManifestVersion;
+    if (local != null) {
+      return local;
+    }
+    synchronized (HoodieVersion.class) {
+      if (cachedManifestVersion != null) {
+        return cachedManifestVersion;
+      }
+      String hudiPropertiesFilePath = 
"META-INF/maven/org.apache.hudi/hudi-common/pom.properties";
+      String resolved = "";
+      try (InputStream inputStream = 
HoodieVersion.class.getClassLoader().getResourceAsStream(hudiPropertiesFilePath))
 {
+        if (inputStream != null) {
+          Properties properties = new Properties();
+          properties.load(inputStream);
+          String version = properties.getProperty("version");
+          if (version != null) {
+            resolved = version;
+          }
+        }
+      } catch (Exception ignored) {
+        // Ignoring the exception as there is a fallback to default version
       }
-    } catch (Exception ignored) {
-      // Ignoring the exception as there is as fallback to default version
+      cachedManifestVersion = resolved;
+      return resolved;
     }
-    return HOODIE_DEFAULT_VERSION;
   }
 
   /**
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieEngineContext.java
 
b/hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieEngineContext.java
index f56921797283..43bc9cc78b35 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieEngineContext.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieEngineContext.java
@@ -45,6 +45,7 @@ import lombok.AllArgsConstructor;
 import lombok.Getter;
 
 import java.io.IOException;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -117,6 +118,21 @@ public abstract class HoodieEngineContext {
 
   public abstract void cancelAllJobs();
 
+  /**
+   * Returns engine-specific properties to be included in commit metadata for 
debugging.
+   * <p>Contract:
+   * <ul>
+   *   <li>Implementations must only return safe, non-sensitive values (no 
credentials, no PII).</li>
+   *   <li>This is invoked on the driver, on every commit. It must be cheap 
and free of side effects.</li>
+   *   <li>Must not reach into checkpoint / runtime state (e.g. for streaming 
engines like Flink,
+   *       per-checkpoint metadata is set up via the coordinator, not 
here).</li>
+   * </ul>
+   * Default returns an empty map so external subclasses are not forced to 
implement this.
+   */
+  public Map<String, String> getEngineProperties() {
+    return Collections.emptyMap();
+  }
+
   /**
    * Returns the application id of the engine (e.g. Spark application id).
    * Used to populate lock metadata so lock holders can be identified.
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java 
b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
index 73703d70d85a..aef84c9f8adc 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.common.table;
 
+import org.apache.hudi.HoodieVersion;
 import org.apache.hudi.common.HoodieTableFormat;
 import org.apache.hudi.common.NativeTableFormat;
 import org.apache.hudi.common.bootstrap.index.hfile.HFileBootstrapIndex;
@@ -51,6 +52,7 @@ import 
org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
 import org.apache.hudi.common.util.BinaryUtil;
 import org.apache.hudi.common.util.ConfigUtils;
 import org.apache.hudi.common.util.HoodieTableConfigUtils;
+import org.apache.hudi.common.util.NetworkUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ReflectionUtils;
 import org.apache.hudi.common.util.StringUtils;
@@ -126,6 +128,9 @@ import static 
org.apache.hudi.common.util.ValidationUtils.checkArgument;
         + " initializing a path as hoodie base path and never changes during 
the lifetime of a hoodie table.")
 public class HoodieTableConfig extends HoodieConfig {
 
+  // Cached hostname to avoid repeated synchronized network calls
+  private static volatile String cachedHostname = null;
+
   public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
   public static final String HOODIE_PROPERTIES_FILE_BACKUP = 
"hoodie.properties.backup";
   public static final String HOODIE_WRITE_TABLE_NAME_KEY = 
"hoodie.datasource.write.table.name";
@@ -500,10 +505,10 @@ public class HoodieTableConfig extends HoodieConfig {
     final String checksum;
     if (isValidChecksum(props)) {
       checksum = props.getProperty(TABLE_CHECKSUM.key());
-      props.store(outputStream, "Updated at " + Instant.now());
+      props.store(outputStream, getFileComment());
     } else {
       Properties propsWithChecksum = 
getOrderedPropertiesWithTableChecksum(props);
-      propsWithChecksum.store(outputStream, "Properties saved on " + 
Instant.now());
+      propsWithChecksum.store(outputStream, getFileComment());
       checksum = propsWithChecksum.getProperty(TABLE_CHECKSUM.key());
       props.setProperty(TABLE_CHECKSUM.key(), checksum);
     }
@@ -1385,6 +1390,31 @@ public class HoodieTableConfig extends HoodieConfig {
         .collect(Collectors.toMap(e -> String.valueOf(e.getKey()), e -> 
String.valueOf(e.getValue())));
   }
 
+  /**
+   * Returns the cached hostname, fetching it lazily on first call.
+   * Falls back to "unknown" if network resolution fails.
+   */
+  private static String getHostnameSafe() {
+    if (cachedHostname == null) {
+      synchronized (HoodieTableConfig.class) {
+        if (cachedHostname == null) {
+          try {
+            cachedHostname = NetworkUtils.getHostname();
+          } catch (Exception e) {
+            log.warn("Failed to resolve hostname, using 'unknown'", e);
+            cachedHostname = "unknown";
+          }
+        }
+      }
+    }
+    return cachedHostname;
+  }
+
+  public static String getFileComment() {
+    return String.format("Updated at %s, host=%s, hudi_version=%s",
+        Instant.now(), getHostnameSafe(), HoodieVersion.get());
+  }
+
   /**
    * @deprecated Use {@link #BASE_FILE_FORMAT} and its methods.
    */
diff --git 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2E.java
 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2E.java
index 1b010efab7cb..64c432474953 100644
--- 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2E.java
+++ 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2E.java
@@ -108,7 +108,11 @@ public class TestHoodieIncrSourceE2E extends 
S3EventsHoodieIncrSourceHarness {
     Option<HoodieCommitMetadata> metadata = 
HoodieClientTestUtils.getCommitMetadataForInstant(
         metaClient, metaClient.getActiveTimeline().lastInstant().get());
     assertFalse(metadata.isEmpty());
-    assertEquals(metadata.get().getExtraMetadata(), expectedMetadata);
+    // Assert expected entries are a subset of the actual extra metadata. 
CommitMetadataProperties
+    // also enriches commit metadata with hudi.version, engine, and config.* 
entries on every write,
+    // so the actual map is a superset.
+    Map<String, String> actual = metadata.get().getExtraMetadata();
+    expectedMetadata.forEach((k, v) -> assertEquals(v, actual.get(k), 
"extraMetadata[" + k + "]"));
   }
 
 
diff --git 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2EAutoUpgrade.java
 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2EAutoUpgrade.java
index 3fd2068c9100..52a6734ec331 100644
--- 
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2EAutoUpgrade.java
+++ 
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/streamer/TestHoodieIncrSourceE2EAutoUpgrade.java
@@ -137,7 +137,11 @@ public class TestHoodieIncrSourceE2EAutoUpgrade extends 
S3EventsHoodieIncrSource
     Option<HoodieCommitMetadata> metadata = 
HoodieClientTestUtils.getCommitMetadataForInstant(
         metaClient, metaClient.getActiveTimeline().lastInstant().get());
     assertFalse(metadata.isEmpty());
-    assertEquals(expectedMetadata, metadata.get().getExtraMetadata());
+    // Assert expected entries are a subset of the actual extra metadata. 
CommitMetadataProperties
+    // also enriches commit metadata with hudi.version, engine, and config.* 
entries on every write,
+    // so the actual map is a superset.
+    Map<String, String> actual = metadata.get().getExtraMetadata();
+    expectedMetadata.forEach((k, v) -> assertEquals(v, actual.get(k), 
"extraMetadata[" + k + "]"));
   }
 
   /**

Reply via email to