This is an automated email from the ASF dual-hosted git repository.

pingsutw pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/submarine.git


The following commit(s) were added to refs/heads/master by this push:
     new 84599b3  SUBMARINE-1016. submarine-mlflow deployment takes a long time 
to be ready
84599b3 is described below

commit 84599b36775181acb0a8fcffb39a61d31368ee26
Author: MortalHappiness <[email protected]>
AuthorDate: Sun Sep 12 23:11:31 2021 +0800

    SUBMARINE-1016. submarine-mlflow deployment takes a long time to be ready
    
    ### What is this PR for?
    The submarine-mlflow and submarine-database deployments are created at the 
same time and mlflow needs to wait for the database. Since mlflow use 
exponential back-off waiting, it takes a long time to be in the ready state.
    
    ### What type of PR is it?
    [Bug Fix]
    
    ### Todos
    * [x] - Add ReadinessProbe for submarine-database
    * [x] - Add InitContainers for submarine-mlflow
    
    Reference: 
https://kubernetes.io/docs/concepts/workloads/pods/init-containers/
    
    ### What is the Jira issue?
    https://issues.apache.org/jira/projects/SUBMARINE/issues/SUBMARINE-1016
    
    ### How should this be tested?
    
    ### Screenshots (if appropriate)
    
    Before:
    ![Screenshot from 2021-09-07 
22-22-36](https://user-images.githubusercontent.com/47914085/132993005-518bcd45-8662-4317-83f4-9d8628066893.png)
    
    After:
    ![Screenshot from 2021-09-12 
23-10-09](https://user-images.githubusercontent.com/47914085/132993023-22fcb635-6123-40e8-ac42-75c5fcf8daa7.png)
    
    ### Questions:
    * Do the license files need updating? No
    * Are there breaking changes for older versions? No
    * Does this need new documentation? No
    
    Author: MortalHappiness <[email protected]>
    
    Signed-off-by: Kevin <[email protected]>
    
    Closes #745 from MortalHappiness/SUBMARINE-1016 and squashes the following 
commits:
    
    ae2b9f34 [MortalHappiness] SUBMARINE-1016. submarine-mlflow deployment 
takes a long time to be ready
---
 submarine-cloud-v2/pkg/controller/controller.go         |  1 +
 submarine-cloud-v2/pkg/controller/submarine_database.go | 13 ++++++++++---
 submarine-cloud-v2/pkg/controller/submarine_mlflow.go   | 11 +++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/submarine-cloud-v2/pkg/controller/controller.go 
b/submarine-cloud-v2/pkg/controller/controller.go
index 28c26d3..a55d9ba 100644
--- a/submarine-cloud-v2/pkg/controller/controller.go
+++ b/submarine-cloud-v2/pkg/controller/controller.go
@@ -67,6 +67,7 @@ const storageClassName = "submarine-storageclass"
 const (
        serverName                  = "submarine-server"
        databaseName                = "submarine-database"
+       databasePort                = 3306
        tensorboardName             = "submarine-tensorboard"
        mlflowName                  = "submarine-mlflow"
        minioName                   = "submarine-minio"
diff --git a/submarine-cloud-v2/pkg/controller/submarine_database.go 
b/submarine-cloud-v2/pkg/controller/submarine_database.go
index 493e19c..275959d 100644
--- a/submarine-cloud-v2/pkg/controller/submarine_database.go
+++ b/submarine-cloud-v2/pkg/controller/submarine_database.go
@@ -89,7 +89,7 @@ func newSubmarineDatabaseDeployment(submarine 
*v1alpha1.Submarine) *appsv1.Deplo
                                                        ImagePullPolicy: 
"IfNotPresent",
                                                        Ports: 
[]corev1.ContainerPort{
                                                                {
-                                                                       
ContainerPort: 3306,
+                                                                       
ContainerPort: databasePort,
                                                                },
                                                        },
                                                        Env: []corev1.EnvVar{
@@ -105,6 +105,13 @@ func newSubmarineDatabaseDeployment(submarine 
*v1alpha1.Submarine) *appsv1.Deplo
                                                                        
SubPath:   databaseName,
                                                                },
                                                        },
+                                                       ReadinessProbe: 
&corev1.Probe{
+                                                               Handler: 
corev1.Handler{
+                                                                       
TCPSocket: &corev1.TCPSocketAction{
+                                                                               
Port: intstr.FromInt(databasePort),
+                                                                       },
+                                                               },
+                                                       },
                                                },
                                        },
                                        Volumes: []corev1.Volume{
@@ -134,8 +141,8 @@ func newSubmarineDatabaseService(submarine 
*v1alpha1.Submarine) *corev1.Service
                Spec: corev1.ServiceSpec{
                        Ports: []corev1.ServicePort{
                                {
-                                       Port:       3306,
-                                       TargetPort: intstr.FromInt(3306),
+                                       Port:       databasePort,
+                                       TargetPort: 
intstr.FromInt(databasePort),
                                        Name:       databaseName,
                                },
                        },
diff --git a/submarine-cloud-v2/pkg/controller/submarine_mlflow.go 
b/submarine-cloud-v2/pkg/controller/submarine_mlflow.go
index 4192013..ee4ab7f 100644
--- a/submarine-cloud-v2/pkg/controller/submarine_mlflow.go
+++ b/submarine-cloud-v2/pkg/controller/submarine_mlflow.go
@@ -76,6 +76,17 @@ func newSubmarineMlflowDeployment(submarine 
*v1alpha1.Submarine) *appsv1.Deploym
                                        },
                                },
                                Spec: corev1.PodSpec{
+                                       InitContainers: []corev1.Container{
+                                               {
+                                                       Name:  
"check-database-connection",
+                                                       Image: "busybox:1.28",
+                                                       Command: []string{
+                                                               "sh",
+                                                               "-c",
+                                                               
fmt.Sprintf("until nc -z %s %d; do echo waiting for database connection; sleep 
20; done", databaseName, databasePort),
+                                                       },
+                                               },
+                                       },
                                        Containers: []corev1.Container{
                                                {
                                                        Name:            
mlflowName + "-container",

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to