This is an automated email from the ASF dual-hosted git repository.
feiwang pushed a commit to branch branch-1.10
in repository https://gitbox.apache.org/repos/asf/kyuubi.git
The following commit(s) were added to refs/heads/branch-1.10 by this push:
new 6de66220c8 [KYUUBI #7214] Fix kubernetes container state
6de66220c8 is described below
commit 6de66220c8dfc4cab1979827cf5289d8bef913af
Author: MElHoussein <[email protected]>
AuthorDate: Sun Sep 28 20:40:47 2025 -0700
[KYUUBI #7214] Fix kubernetes container state
### Why are the changes needed?
This PR fixes #7195 where if `kyuubi.kubernetes.application.state.source`
is `CONTAINER` and kubernetes fails to pull the image, or the image name is not
valid, or any failure ocurs, kyuubi marks the application as pending, forever.
### How was this patch tested?
- Added unit tests in KubernetesApplicationOperationSuite:
To run the targeted suite:
`./build/mvn -pl kyuubi-server -DskipITs
-Dtest=org.apache.kyuubi.engine.KubernetesApplicationOperationSuite test`
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #7214 from moelhoussein/fix-kubernetes-container-state.
Closes #7214
4f836678e [Wang, Fei] Revert "reformatted"
e869a468b [MElHoussein] reformatted
8ede9aac3 [Wang, Fei] code style
d3b9cd2c0 [MElHoussein] revert unrelated test/url overload changes; keep
only container waiting-state logic change
7d346fc9e [MElHoussein] engine(k8s): treat only specific waiting reasons as
PENDING; others FAILED; handle empty reason as PENDING; unify constant as
PENDING_WAITING_REASONS; add buildSparkAppUrl overload; restore POD IP URL
test; add tests for failure waiting reasons; revert .idea/vcs.xml
ca94d645a [MElHoussein] Fixing container state
Lead-authored-by: MElHoussein <[email protected]>
Co-authored-by: Wang, Fei <[email protected]>
Signed-off-by: Wang, Fei <[email protected]>
(cherry picked from commit b5d7f5800d1baf9b7117edd224e97e5716235144)
Signed-off-by: Wang, Fei <[email protected]>
---
.../engine/KubernetesApplicationOperation.scala | 4 +-
.../KubernetesApplicationOperationSuite.scala | 45 ++++++++++++++++++++++
2 files changed, 48 insertions(+), 1 deletion(-)
diff --git
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala
index b349ef160c..5c98c3f2e5 100644
---
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala
+++
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala
@@ -561,6 +561,7 @@ object KubernetesApplicationOperation extends Logging {
val KUBERNETES_SERVICE_HOST = "KUBERNETES_SERVICE_HOST"
val KUBERNETES_SERVICE_PORT = "KUBERNETES_SERVICE_PORT"
val SPARK_UI_PORT_NAME = "spark-ui"
+ private val PENDING_WAITING_REASONS: Set[String] = Set("ContainerCreating",
"PodInitializing")
def toLabel(tag: String): String = s"label: $LABEL_KYUUBI_UNIQUE_KEY=$tag"
@@ -638,7 +639,8 @@ object KubernetesApplicationOperation extends Logging {
def containerStateToApplicationState(containerState: ContainerState):
ApplicationState = {
//
https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-states
if (containerState.getWaiting != null) {
- PENDING
+ val reasonOpt =
Option(containerState.getWaiting.getReason).map(_.trim).filter(_.nonEmpty)
+ if (reasonOpt.isEmpty ||
PENDING_WAITING_REASONS.contains(reasonOpt.get)) PENDING else FAILED
} else if (containerState.getRunning != null) {
RUNNING
} else if (containerState.getTerminated == null) {
diff --git
a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/KubernetesApplicationOperationSuite.scala
b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/KubernetesApplicationOperationSuite.scala
index de40438524..9f5cdae663 100644
---
a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/KubernetesApplicationOperationSuite.scala
+++
b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/KubernetesApplicationOperationSuite.scala
@@ -17,8 +17,11 @@
package org.apache.kyuubi.engine
+import io.fabric8.kubernetes.api.model.{ContainerState, ContainerStateWaiting}
+
import org.apache.kyuubi.{KyuubiException, KyuubiFunSuite}
import org.apache.kyuubi.config.KyuubiConf
+import org.apache.kyuubi.engine.ApplicationState.{FAILED, PENDING}
class KubernetesApplicationOperationSuite extends KyuubiFunSuite {
@@ -113,4 +116,46 @@ class KubernetesApplicationOperationSuite extends
KyuubiFunSuite {
KubernetesInfo(Some("c1"), None),
KubernetesInfo(None, Some("ns1"))))
}
+
+ test("containerStateToApplicationState waiting reasons") {
+ // Only valid pending reasons: ContainerCreating and PodInitializing
+ val pendingWaitingReasons = Set("ContainerCreating", "PodInitializing")
+
+ pendingWaitingReasons.foreach { reason =>
+ val containerState = new ContainerState()
+ val waiting = new ContainerStateWaiting()
+ waiting.setReason(reason)
+ containerState.setWaiting(waiting)
+
+ val result =
KubernetesApplicationOperation.containerStateToApplicationState(containerState)
+ assert(result === PENDING)
+ }
+ }
+
+ test("containerStateToApplicationState failure reasons and empty reason") {
+ val failureReasons = Set(
+ "ErrImagePull",
+ "ImagePullBackOff",
+ "CrashLoopBackOff",
+ "CreateContainerConfigError")
+
+ failureReasons.foreach { reason =>
+ val containerState = new ContainerState()
+ val waiting = new ContainerStateWaiting()
+ waiting.setReason(reason)
+ containerState.setWaiting(waiting)
+
+ val result =
KubernetesApplicationOperation.containerStateToApplicationState(containerState)
+ assert(result === FAILED)
+ }
+
+ // Empty/null reason should be treated as PENDING (still initializing)
+ val containerStateEmpty = new ContainerState()
+ val waitingEmpty = new ContainerStateWaiting()
+ waitingEmpty.setReason(null)
+ containerStateEmpty.setWaiting(waitingEmpty)
+ val resultEmpty =
+
KubernetesApplicationOperation.containerStateToApplicationState(containerStateEmpty)
+ assert(resultEmpty === PENDING)
+ }
}