This is an automated email from the ASF dual-hosted git repository. bteke pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push: new 964c1902c80 YARN-11463. Node Labels root directory creation doesn't have a retry logic (#5562) 964c1902c80 is described below commit 964c1902c8054dfe13c787222a12fb0daf1aaab9 Author: Ashutosh Gupta <ashutosh.gu...@st.niituniversity.in> AuthorDate: Fri Apr 21 13:03:22 2023 +0100 YARN-11463. Node Labels root directory creation doesn't have a retry logic (#5562) Co-authored-by: Ashutosh Gupta <ashu...@amazon.com> --- .../apache/hadoop/yarn/conf/YarnConfiguration.java | 10 ++++++++ .../yarn/nodelabels/store/AbstractFSNodeStore.java | 28 ++++++++++++++++++++-- .../src/main/resources/yarn-default.xml | 16 +++++++++++++ .../nodelabels/TestFileSystemNodeLabelsStore.java | 3 --- 4 files changed, 52 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 6d77eb492dc..a3faec7171b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -217,6 +217,16 @@ public class YarnConfiguration extends Configuration { public static final int DEFAULT_RM_APPLICATION_MAX_TAG_LENGTH = 100; + public static final String NODE_STORE_ROOT_DIR_NUM_RETRIES = + RM_PREFIX + "nodestore-rootdir.num-retries"; + + public static final int NODE_STORE_ROOT_DIR_NUM_DEFAULT_RETRIES = 1000; + + public static final String NODE_STORE_ROOT_DIR_RETRY_INTERVAL = + RM_PREFIX + "nodestore-rootdir.retry-interval-ms"; + + public static final int NODE_STORE_ROOT_DIR_RETRY_DEFAULT_INTERVAL = 1000; + public static final String RM_APPLICATION_MASTER_SERVICE_PROCESSORS = RM_PREFIX + "application-master-service.processors"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java index 81514942af0..a697be19512 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java @@ -65,8 +65,32 @@ public abstract class AbstractFSNodeStore<M> { this.fsWorkingPath = fsStorePath; this.manager = mgr; initFileSystem(conf); - // mkdir of root dir path - fs.mkdirs(fsWorkingPath); + // mkdir of root dir path with retry logic + int maxRetries = conf.getInt(YarnConfiguration.NODE_STORE_ROOT_DIR_NUM_RETRIES, + YarnConfiguration.NODE_STORE_ROOT_DIR_NUM_DEFAULT_RETRIES); + int retryCount = 0; + boolean success = fs.mkdirs(fsWorkingPath); + + while (!success && retryCount < maxRetries) { + try { + if (!fs.exists(fsWorkingPath)) { + success = fs.mkdirs(fsWorkingPath); + } else { + success = true; + } + } catch (IOException e) { + retryCount++; + if (retryCount >= maxRetries) { + throw e; + } + try { + Thread.sleep(conf.getInt(YarnConfiguration.NODE_STORE_ROOT_DIR_RETRY_INTERVAL, + YarnConfiguration.NODE_STORE_ROOT_DIR_RETRY_DEFAULT_INTERVAL)); + } catch (InterruptedException ie) { + throw new RuntimeException(ie); + } + } + } this.replication = conf.getInt(YarnConfiguration.FS_STORE_FILE_REPLICATION, YarnConfiguration.DEFAULT_FS_STORE_FILE_REPLICATION); LOG.info("Created store directory :" + fsWorkingPath); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index b9385d1c276..4fc414f0e01 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -5177,4 +5177,20 @@ <value>1</value> </property> + <property> + <description> + Number of Retries while trying to make root directory for node store. + </description> + <name>yarn.resourcemanager.nodestore-rootdir.num-retries</name> + <value>1000</value> + </property> + + <property> + <description> + Interval in ms between retries while trying to make root directory for node store. + </description> + <name>yarn.resourcemanager.nodestore-rootdir.retry-interval-ms</name> + <value>1000</value> + </property> + </configuration> diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java index 099684318f4..a861b0654ea 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java @@ -359,9 +359,6 @@ public class TestFileSystemNodeLabelsStore extends NodeLabelTestBase { mockStore.setFs(mockFs); verifyMkdirsCount(mockStore, true, 1); - verifyMkdirsCount(mockStore, false, 2); - verifyMkdirsCount(mockStore, true, 3); - verifyMkdirsCount(mockStore, false, 4); } private void verifyMkdirsCount(FileSystemNodeLabelsStore store, --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org