This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 9d3e28f19458 [SPARK-53944][K8S][CORE][FOLLOWUP] Override
DRIVER_HOST_ADDRESS in SparkContext instead of SparkEnv when useDriverPodIP is
true
9d3e28f19458 is described below
commit 9d3e28f1945888540078d37fe0501805660a5633
Author: Cheng Pan <[email protected]>
AuthorDate: Sat Nov 8 19:47:51 2025 -0800
[SPARK-53944][K8S][CORE][FOLLOWUP] Override DRIVER_HOST_ADDRESS in
SparkContext instead of SparkEnv when useDriverPodIP is true
### What changes were proposed in this pull request?
This is an alternative to #52923.
In addition, this ensures `spark.kubernetes.executor.useDriverPodIP` only
takes effect when `spark.master` startsWith `k8s`
### Why are the changes needed?
This is a simpler way than https://github.com/apache/spark/pull/52923, see
more details at the discussion
https://github.com/apache/spark/pull/52923#discussion_r2501500911
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Pass the K8s IT case provided by #52923
```
[info] KubernetesSuite:
[info] - SPARK-53944: Run SparkPi without driver service (11 seconds, 914
milliseconds)
[info] YuniKornSuite:
[info] Run completed in 21 seconds, 737 milliseconds.
[info] Total number of tests run: 1
[info] Suites: completed 2, aborted 0
[info] Tests: succeeded 1, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.
[success] Total time: 44 s, completed Nov 9, 2025, 1:19:07 AM
```
Also manually tested in the internal cluster.
checklist:
- `spark.kubernetes.executor.useDriverPodIP` only takes effect on K8s mode
- when `s.k.e.useDriverPodIP` is true, check UI `Executors` and
`Environment` tabs, driver address displays correctly in IP, not svc endpoint
<img width="574" height="189" alt="image"
src="https://github.com/user-attachments/assets/9e538448-b66f-482b-b97a-ae1ee00601b3"
/>
<img width="969" height="125" alt="image"
src="https://github.com/user-attachments/assets/27f77899-1e04-40de-9f23-a66e5e6e3ce3"
/>
- run some queries, shuffle works as expected. (we use Apache Celeborn as
Remote Shuffle Service)
- all built-in functionalities that we used work with the driver svc
disabled.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #52954 from pan3793/SPARK-53944-refactor.
Authored-by: Cheng Pan <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit cc57743a229a70e2060230e3c9da49cbe24ad257)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
core/src/main/scala/org/apache/spark/SparkContext.scala | 9 ++++++++-
core/src/main/scala/org/apache/spark/SparkEnv.scala | 11 +----------
2 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 898bbad26b7e..2393851c6635 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -454,7 +454,14 @@ class SparkContext(config: SparkConf) extends Logging {
// Set Spark driver host and port system properties. This explicitly sets
the configuration
// instead of relying on the default value of the config constant.
- _conf.set(DRIVER_HOST_ADDRESS, _conf.get(DRIVER_HOST_ADDRESS))
+ if (master.startsWith("k8s") &&
+ _conf.getBoolean("spark.kubernetes.executor.useDriverPodIP", false)) {
+ logInfo("Use DRIVER_BIND_ADDRESS instead of DRIVER_HOST_ADDRESS as
driver address " +
+ "because spark.kubernetes.executor.useDriverPodIP is true in K8s
mode.")
+ _conf.set(DRIVER_HOST_ADDRESS, _conf.get(DRIVER_BIND_ADDRESS))
+ } else {
+ _conf.set(DRIVER_HOST_ADDRESS, _conf.get(DRIVER_HOST_ADDRESS))
+ }
_conf.setIfMissing(DRIVER_PORT, 0)
_conf.set(EXECUTOR_ID, SparkContext.DRIVER_IDENTIFIER)
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala
b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 48d4faafb514..796dbf4b6d5f 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -263,9 +263,7 @@ object SparkEnv extends Logging {
s"${DRIVER_HOST_ADDRESS.key} is not set on the driver!")
assert(conf.contains(DRIVER_PORT), s"${DRIVER_PORT.key} is not set on the
driver!")
val bindAddress = conf.get(DRIVER_BIND_ADDRESS)
- val useDriverPodIP =
- conf.get("spark.kubernetes.executor.useDriverPodIP",
"false").equalsIgnoreCase("true")
- val advertiseAddress = if (useDriverPodIP) bindAddress else
conf.get(DRIVER_HOST_ADDRESS)
+ val advertiseAddress = conf.get(DRIVER_HOST_ADDRESS)
val port = conf.get(DRIVER_PORT)
val ioEncryptionKey = if (conf.get(IO_ENCRYPTION_ENABLED)) {
Some(CryptoStreamUtils.createKey(conf))
@@ -371,13 +369,6 @@ object SparkEnv extends Logging {
logInfo(log"Registering ${MDC(LogKeys.ENDPOINT_NAME, name)}")
rpcEnv.setupEndpoint(name, endpointCreator)
} else {
- val useDriverPodIP =
- conf.get("spark.kubernetes.executor.useDriverPodIP",
"false").equalsIgnoreCase("true")
- if (useDriverPodIP) {
- logInfo(log"Use DRIVER_BIND_ADDRESS instead of DRIVER_HOST_ADDRESS
because " +
- log"spark.kubernetes.executor.useDriverPodIP is true")
- conf.set(config.DRIVER_HOST_ADDRESS.key,
conf.get(config.DRIVER_BIND_ADDRESS.key))
- }
RpcUtils.makeDriverRef(name, conf, rpcEnv)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]