This is an automated email from the ASF dual-hosted git repository. pdallig pushed a commit to branch recover_interpretergroup in repository https://gitbox.apache.org/repos/asf/zeppelin.git
commit 4c3f8a1cbc685541d334b299bf3642b334948561 Author: Philipp Dallig <philipp.dal...@gmail.com> AuthorDate: Fri Oct 22 14:52:55 2021 +0200 Restart a crashed interpreterprocess --- .../org/apache/zeppelin/interpreter/InterpreterSetting.java | 8 ++++++++ .../zeppelin/interpreter/ManagedInterpreterGroup.java | 13 +++++++------ .../zeppelin/interpreter/remote/RemoteInterpreter.java | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/InterpreterSetting.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/InterpreterSetting.java index bff9273..06ec121 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/InterpreterSetting.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/InterpreterSetting.java @@ -455,6 +455,14 @@ public class InterpreterSetting { groupId, executionContext); ManagedInterpreterGroup intpGroup = createInterpreterGroup(groupId); interpreterGroups.put(groupId, intpGroup); + } else { + // Check for a crashed interpreter process and restart interpreterGroup in this case + ManagedInterpreterGroup interpreterGroup = interpreterGroups.get(groupId); + if (interpreterGroup.isInterpreterProcessCrashed()) { + interpreterGroup.close(); + interpreterGroups.remove(interpreterGroup.getId()); + return getOrCreateInterpreterGroup(executionContext); + } } return interpreterGroups.get(groupId); } finally { diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/ManagedInterpreterGroup.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/ManagedInterpreterGroup.java index af7e7e8..1140ec9 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/ManagedInterpreterGroup.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/ManagedInterpreterGroup.java @@ -57,16 +57,17 @@ public class ManagedInterpreterGroup extends InterpreterGroup { return interpreterSetting; } + public boolean isInterpreterProcessCrashed() { + if (remoteInterpreterProcess == null) { + return false; + } + return !remoteInterpreterProcess.isRunning(); + } + public RemoteInterpreterProcess getOrCreateInterpreterProcess(String userName, Properties properties) throws IOException { synchronized (interpreterProcessCreationLock) { - // Stop the interpreterProcess to cleanup the state if not running - if (remoteInterpreterProcess != null && !remoteInterpreterProcess.isRunning()) { - LOGGER.info("InterpreterProcess for InterpreterGroup {} is not running. Stop the interpreter process to clean up the state. Error message: {}", getId(), remoteInterpreterProcess.getErrorMessage()); - remoteInterpreterProcess.stop(); - remoteInterpreterProcess = null; - } if (remoteInterpreterProcess == null) { LOGGER.info("Create InterpreterProcess for InterpreterGroup: {}", getId()); remoteInterpreterProcess = interpreterSetting.createInterpreterProcess(id, userName, diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/remote/RemoteInterpreter.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/remote/RemoteInterpreter.java index 439bf0b..967f4fa 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/remote/RemoteInterpreter.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/remote/RemoteInterpreter.java @@ -96,7 +96,7 @@ public class RemoteInterpreter extends Interpreter { } public synchronized RemoteInterpreterProcess getOrCreateInterpreterProcess() throws IOException { - if (this.interpreterProcess != null && this.interpreterProcess.isRunning()) { + if (this.interpreterProcess != null) { return this.interpreterProcess; } ManagedInterpreterGroup intpGroup = getInterpreterGroup();