This is an automated email from the ASF dual-hosted git repository.
pingsutw pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/submarine.git
The following commit(s) were added to refs/heads/master by this push:
new 84599b3 SUBMARINE-1016. submarine-mlflow deployment takes a long time
to be ready
84599b3 is described below
commit 84599b36775181acb0a8fcffb39a61d31368ee26
Author: MortalHappiness <[email protected]>
AuthorDate: Sun Sep 12 23:11:31 2021 +0800
SUBMARINE-1016. submarine-mlflow deployment takes a long time to be ready
### What is this PR for?
The submarine-mlflow and submarine-database deployments are created at the
same time and mlflow needs to wait for the database. Since mlflow use
exponential back-off waiting, it takes a long time to be in the ready state.
### What type of PR is it?
[Bug Fix]
### Todos
* [x] - Add ReadinessProbe for submarine-database
* [x] - Add InitContainers for submarine-mlflow
Reference:
https://kubernetes.io/docs/concepts/workloads/pods/init-containers/
### What is the Jira issue?
https://issues.apache.org/jira/projects/SUBMARINE/issues/SUBMARINE-1016
### How should this be tested?
### Screenshots (if appropriate)
Before:

After:

### Questions:
* Do the license files need updating? No
* Are there breaking changes for older versions? No
* Does this need new documentation? No
Author: MortalHappiness <[email protected]>
Signed-off-by: Kevin <[email protected]>
Closes #745 from MortalHappiness/SUBMARINE-1016 and squashes the following
commits:
ae2b9f34 [MortalHappiness] SUBMARINE-1016. submarine-mlflow deployment
takes a long time to be ready
---
submarine-cloud-v2/pkg/controller/controller.go | 1 +
submarine-cloud-v2/pkg/controller/submarine_database.go | 13 ++++++++++---
submarine-cloud-v2/pkg/controller/submarine_mlflow.go | 11 +++++++++++
3 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/submarine-cloud-v2/pkg/controller/controller.go
b/submarine-cloud-v2/pkg/controller/controller.go
index 28c26d3..a55d9ba 100644
--- a/submarine-cloud-v2/pkg/controller/controller.go
+++ b/submarine-cloud-v2/pkg/controller/controller.go
@@ -67,6 +67,7 @@ const storageClassName = "submarine-storageclass"
const (
serverName = "submarine-server"
databaseName = "submarine-database"
+ databasePort = 3306
tensorboardName = "submarine-tensorboard"
mlflowName = "submarine-mlflow"
minioName = "submarine-minio"
diff --git a/submarine-cloud-v2/pkg/controller/submarine_database.go
b/submarine-cloud-v2/pkg/controller/submarine_database.go
index 493e19c..275959d 100644
--- a/submarine-cloud-v2/pkg/controller/submarine_database.go
+++ b/submarine-cloud-v2/pkg/controller/submarine_database.go
@@ -89,7 +89,7 @@ func newSubmarineDatabaseDeployment(submarine
*v1alpha1.Submarine) *appsv1.Deplo
ImagePullPolicy:
"IfNotPresent",
Ports:
[]corev1.ContainerPort{
{
-
ContainerPort: 3306,
+
ContainerPort: databasePort,
},
},
Env: []corev1.EnvVar{
@@ -105,6 +105,13 @@ func newSubmarineDatabaseDeployment(submarine
*v1alpha1.Submarine) *appsv1.Deplo
SubPath: databaseName,
},
},
+ ReadinessProbe:
&corev1.Probe{
+ Handler:
corev1.Handler{
+
TCPSocket: &corev1.TCPSocketAction{
+
Port: intstr.FromInt(databasePort),
+ },
+ },
+ },
},
},
Volumes: []corev1.Volume{
@@ -134,8 +141,8 @@ func newSubmarineDatabaseService(submarine
*v1alpha1.Submarine) *corev1.Service
Spec: corev1.ServiceSpec{
Ports: []corev1.ServicePort{
{
- Port: 3306,
- TargetPort: intstr.FromInt(3306),
+ Port: databasePort,
+ TargetPort:
intstr.FromInt(databasePort),
Name: databaseName,
},
},
diff --git a/submarine-cloud-v2/pkg/controller/submarine_mlflow.go
b/submarine-cloud-v2/pkg/controller/submarine_mlflow.go
index 4192013..ee4ab7f 100644
--- a/submarine-cloud-v2/pkg/controller/submarine_mlflow.go
+++ b/submarine-cloud-v2/pkg/controller/submarine_mlflow.go
@@ -76,6 +76,17 @@ func newSubmarineMlflowDeployment(submarine
*v1alpha1.Submarine) *appsv1.Deploym
},
},
Spec: corev1.PodSpec{
+ InitContainers: []corev1.Container{
+ {
+ Name:
"check-database-connection",
+ Image: "busybox:1.28",
+ Command: []string{
+ "sh",
+ "-c",
+
fmt.Sprintf("until nc -z %s %d; do echo waiting for database connection; sleep
20; done", databaseName, databasePort),
+ },
+ },
+ },
Containers: []corev1.Container{
{
Name:
mlflowName + "-container",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]