This is an automated email from the ASF dual-hosted git repository. liuxun pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push: new a530496 [ZEPPELIN-4263] Fixed cluster docker mode cannot exit the container of the remote interpreter a530496 is described below commit a5304964a8dbef3a3d0e0eb04cee94e045443bd2 Author: Xun Liu <liu...@apache.org> AuthorDate: Tue Sep 17 15:57:55 2019 +0800 [ZEPPELIN-4263] Fixed cluster docker mode cannot exit the container of the remote interpreter ### What is this PR for? Because docker first used the `while :; do sleep 1; done` script to start the container. Execute `bin/interpreter.sh` to execute the interpreter startup script in the container. When zeppelin closes or restarts the interpreter in docker, the interpreter process in the container can exit normally, but the container's startup shell does not exit, causing the container to continue running. So we need to determine if the interpreter process exits in the container's startup script. ### What type of PR is it? Bug Fix ### What is the Jira issue? https://issues.apache.org/jira/browse/ZEPPELIN-4263 ### How should this be tested? [CI Pass](https://travis-ci.org/liuxunorg/zeppelin/builds/585091106) ### Screenshots (if appropriate) ### Questions: * Does the licenses files need update? No * Is there breaking changes for older versions? No * Does this needs documentation? No Author: Xun Liu <liu...@apache.org> Closes #3445 from liuxunorg/ZEPPELIN-4263 and squashes the following commits: 017e98b01 [Xun Liu] 1. add comment. 2. add determine if container exists. 93e41e5fb [Xun Liu] [ZEPPELIN-4263] Fixed cluster docker mode cannot exit the container of the remote interpreter --- .../launcher/DockerInterpreterProcess.java | 57 ++++++++++++++++++++-- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java b/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java index 17bb093..9802f81 100644 --- a/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java +++ b/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java @@ -45,6 +45,7 @@ import com.spotify.docker.client.DockerClient; import com.spotify.docker.client.LogStream; import com.spotify.docker.client.ProgressHandler; import com.spotify.docker.client.exceptions.DockerException; +import com.spotify.docker.client.messages.Container; import com.spotify.docker.client.messages.ContainerConfig; import com.spotify.docker.client.messages.ContainerCreation; import com.spotify.docker.client.messages.ExecCreation; @@ -162,6 +163,8 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess { public void start(String userName) throws IOException { docker = DefaultDockerClient.builder().uri(URI.create(DOCKER_HOST)).build(); + removeExistContainer(containerName); + final Map<String, List<PortBinding>> portBindings = new HashMap<>(); // Bind container ports to host ports @@ -191,13 +194,24 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess { List<String> listEnv = getListEnvs(); LOGGER.info("docker listEnv = {}", listEnv); + // check if the interpreter process exit script + // if interpreter process exit, then container need exit + StringBuilder sbStartCmd = new StringBuilder(); + sbStartCmd.append("sleep 10; "); + sbStartCmd.append("process=RemoteInterpreterServer; "); + sbStartCmd.append("RUNNING_PIDS=$(ps x | grep $process | grep -v grep | awk '{print $1}'); "); + sbStartCmd.append("while [ ! -z \"$RUNNING_PIDS\" ]; "); + sbStartCmd.append("do sleep 1; "); + sbStartCmd.append("RUNNING_PIDS=$(ps x | grep $process | grep -v grep | awk '{print $1}'); "); + sbStartCmd.append("done"); + // Create container with exposed ports final ContainerConfig containerConfig = ContainerConfig.builder() .hostConfig(hostConfig) .image(containerImage) .workingDir("/") .env(listEnv) - .cmd("sh", "-c", "while :; do sleep 1; done") + .cmd("sh", "-c", sbStartCmd.toString()) .build(); try { @@ -340,16 +354,49 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess { // Remove container docker.removeContainer(containerName); - } catch (DockerException e) { - e.printStackTrace(); - } catch (InterruptedException e) { - e.printStackTrace(); + } catch (DockerException | InterruptedException e) { + LOGGER.error(e.getMessage(), e); } // Close the docker client docker.close(); } + // Because docker can't create a container with the same name, it will cause the creation to fail. + // If the zeppelin service is abnormal and the container that was created is not closed properly, + // the container will not be created again. + private void removeExistContainer(String containerName) { + boolean isExist = false; + try { + final List<Container> containers + = docker.listContainers(DockerClient.ListContainersParam.allContainers()); + for (Container container : containers) { + for (String name : container.names()) { + // because container name like '/md-shared', so need add '/' + if (StringUtils.equals(name, "/" + containerName)) { + isExist = true; + break; + } + } + } + + if (isExist == true) { + LOGGER.info("kill exist container {}", containerName); + docker.killContainer(containerName); + } + } catch (DockerException | InterruptedException e) { + LOGGER.error(e.getMessage(), e); + } finally { + try { + if (isExist == true) { + docker.removeContainer(containerName); + } + } catch (DockerException | InterruptedException e) { + LOGGER.error(e.getMessage(), e); + } + } + } + @Override public String getHost() { return containerHost;