This is an automated email from the ASF dual-hosted git repository. snemeth pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.2 by this push: new 7a8b626 YARN-10789. RM HA startup can fail due to race conditions in ZKConfigurationStore. Contributed by Tarun Parimi 7a8b626 is described below commit 7a8b6265c626c45d3af099723f0b1b9b76dd5cb4 Author: Szilard Nemeth <snem...@apache.org> AuthorDate: Thu Jul 29 19:22:57 2021 +0200 YARN-10789. RM HA startup can fail due to race conditions in ZKConfigurationStore. Contributed by Tarun Parimi --- .../capacity/conf/ZKConfigurationStore.java | 25 +++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/ZKConfigurationStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/ZKConfigurationStore.java index 3742c36..6f3612c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/ZKConfigurationStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/ZKConfigurationStore.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.conf; import com.google.common.annotations.VisibleForTesting; +import org.apache.zookeeper.KeeperException.NodeExistsException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -62,7 +63,8 @@ public class ZKConfigurationStore extends YarnConfigurationStore { private static final String CONF_STORE_PATH = "CONF_STORE"; private static final String FENCING_PATH = "FENCING"; private static final String CONF_VERSION_PATH = "CONF_VERSION"; - + private static final String NODEEXISTS_MSG = "Encountered NodeExists error." + + " Skipping znode creation since another RM has already created it"; private String zkVersionPath; private String logsPath; private String confStorePath; @@ -93,7 +95,11 @@ public class ZKConfigurationStore extends YarnConfigurationStore { this.fencingNodePath = getNodePath(znodeParentPath, FENCING_PATH); this.confVersionPath = getNodePath(znodeParentPath, CONF_VERSION_PATH); - zkManager.createRootDirRecursively(znodeParentPath, zkAcl); + try { + zkManager.createRootDirRecursively(znodeParentPath, zkAcl); + } catch(NodeExistsException e) { + LOG.warn(NODEEXISTS_MSG, e); + } zkManager.delete(fencingNodePath); if (createNewZkPath(logsPath)) { @@ -247,7 +253,12 @@ public class ZKConfigurationStore extends YarnConfigurationStore { */ private boolean createNewZkPath(String path) throws Exception { if (!zkManager.exists(path)) { - zkManager.create(path); + try { + zkManager.create(path); + } catch(NodeExistsException e) { + LOG.warn(NODEEXISTS_MSG, e); + return false; + } return true; } else { return false; @@ -282,8 +293,12 @@ public class ZKConfigurationStore extends YarnConfigurationStore { @VisibleForTesting protected void safeCreateZkData(String path, byte[] data) throws Exception { - zkManager.safeCreate(path, data, zkAcl, CreateMode.PERSISTENT, - zkAcl, fencingNodePath); + try { + zkManager.safeCreate(path, data, zkAcl, CreateMode.PERSISTENT, + zkAcl, fencingNodePath); + } catch(NodeExistsException e) { + LOG.warn(NODEEXISTS_MSG, e); + } } private static String getNodePath(String root, String nodeName) { --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org