This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 84c5b919d998 [SPARK-48131][CORE][FOLLOWUP] Add a new configuration for the MDC key of Task Name 84c5b919d998 is described below commit 84c5b919d99872858d2f98db21fd3482f27dcbfc Author: Gengliang Wang <gengli...@apache.org> AuthorDate: Tue May 7 19:18:50 2024 -0700 [SPARK-48131][CORE][FOLLOWUP] Add a new configuration for the MDC key of Task Name ### What changes were proposed in this pull request? Introduce a new Spark config `spark.log.legacyTaskNameMdc.enabled`: When true, the MDC key `mdc.taskName` will be set in the logs, which is consistent with the behavior of Spark 3.1 to Spark 3.5 releases. When false, the logging framework will use `task_name` as the MDC key for consistency with other new MDC keys. ### Why are the changes needed? As discussed in https://github.com/apache/spark/pull/46386#issuecomment-2098985001, we should add a configuration and migration guide about the change in the MDC key of Task Name. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual test ### Was this patch authored or co-authored using generative AI tooling? No Closes #46446 from gengliangwang/addConfig. Authored-by: Gengliang Wang <gengli...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- core/src/main/scala/org/apache/spark/executor/Executor.scala | 11 +++++++++-- .../main/scala/org/apache/spark/internal/config/package.scala | 10 ++++++++++ docs/core-migration-guide.md | 2 ++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index 3edba45ef89f..68c38fb6179f 100644 --- a/core/src/main/scala/org/apache/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -95,6 +95,13 @@ private[spark] class Executor( private[executor] val conf = env.conf + // SPARK-48131: Unify MDC key mdc.taskName and task_name in Spark 4.0 release. + private[executor] val taskNameMDCKey = if (conf.get(LEGACY_TASK_NAME_MDC_ENABLED)) { + "mdc.taskName" + } else { + LogKeys.TASK_NAME.name + } + // SPARK-40235: updateDependencies() uses a ReentrantLock instead of the `synchronized` keyword // so that tasks can exit quickly if they are interrupted while waiting on another task to // finish downloading dependencies. @@ -914,7 +921,7 @@ private[spark] class Executor( try { mdc.foreach { case (key, value) => MDC.put(key, value) } // avoid overriding the takName by the user - MDC.put(LogKeys.TASK_NAME.name, taskName) + MDC.put(taskNameMDCKey, taskName) } catch { case _: NoSuchFieldError => logInfo("MDC is not supported.") } @@ -923,7 +930,7 @@ private[spark] class Executor( private def cleanMDCForTask(taskName: String, mdc: Seq[(String, String)]): Unit = { try { mdc.foreach { case (key, _) => MDC.remove(key) } - MDC.remove(LogKeys.TASK_NAME.name) + MDC.remove(taskNameMDCKey) } catch { case _: NoSuchFieldError => logInfo("MDC is not supported.") } diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index a5be6084de36..87402d2cc17e 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -152,6 +152,16 @@ package object config { .booleanConf .createWithDefault(true) + private[spark] val LEGACY_TASK_NAME_MDC_ENABLED = + ConfigBuilder("spark.log.legacyTaskNameMdc.enabled") + .doc("When true, the MDC (Mapped Diagnostic Context) key `mdc.taskName` will be set in the " + + "log output, which is the behavior of Spark version 3.1 through Spark 3.5 releases. " + + "When false, the logging framework will use `task_name` as the MDC key, " + + "aligning it with the naming convention of newer MDC keys introduced in Spark 4.0 release.") + .version("4.0.0") + .booleanConf + .createWithDefault(false) + private[spark] val DRIVER_LOG_LOCAL_DIR = ConfigBuilder("spark.driver.log.localDir") .doc("Specifies a local directory to write driver logs and enable Driver Log UI Tab.") diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index 95c7929a6241..28a9dd0f4371 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -46,6 +46,8 @@ license: | - Set the Spark configuration `spark.log.structuredLogging.enabled` to `false`. - Use a custom log4j configuration file, such as renaming the template file `conf/log4j2.properties.pattern-layout-template` to `conf/log4j2.properties`. +- Since Spark 4.0, the MDC (Mapped Diagnostic Context) key for Spark task names in Spark logs has been changed from `mdc.taskName` to `task_name`. To use the key `mdc.taskName`, you can set `spark.log.legacyTaskNameMdc.enabled` to `true`. + - Since Spark 4.0, Spark performs speculative executions less agressively with `spark.speculation.multiplier=3` and `spark.speculation.quantile=0.9`. To restore the legacy behavior, you can set `spark.speculation.multiplier=1.5` and `spark.speculation.quantile=0.75`. ## Upgrading from Core 3.4 to 3.5 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org