This is an automated email from the ASF dual-hosted git repository.
bteke pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 964c1902c80 YARN-11463. Node Labels root directory creation doesn't
have a retry logic (#5562)
964c1902c80 is described below
commit 964c1902c8054dfe13c787222a12fb0daf1aaab9
Author: Ashutosh Gupta <[email protected]>
AuthorDate: Fri Apr 21 13:03:22 2023 +0100
YARN-11463. Node Labels root directory creation doesn't have a retry logic
(#5562)
Co-authored-by: Ashutosh Gupta <[email protected]>
---
.../apache/hadoop/yarn/conf/YarnConfiguration.java | 10 ++++++++
.../yarn/nodelabels/store/AbstractFSNodeStore.java | 28 ++++++++++++++++++++--
.../src/main/resources/yarn-default.xml | 16 +++++++++++++
.../nodelabels/TestFileSystemNodeLabelsStore.java | 3 ---
4 files changed, 52 insertions(+), 5 deletions(-)
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 6d77eb492dc..a3faec7171b 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -217,6 +217,16 @@ public class YarnConfiguration extends Configuration {
public static final int DEFAULT_RM_APPLICATION_MAX_TAG_LENGTH = 100;
+ public static final String NODE_STORE_ROOT_DIR_NUM_RETRIES =
+ RM_PREFIX + "nodestore-rootdir.num-retries";
+
+ public static final int NODE_STORE_ROOT_DIR_NUM_DEFAULT_RETRIES = 1000;
+
+ public static final String NODE_STORE_ROOT_DIR_RETRY_INTERVAL =
+ RM_PREFIX + "nodestore-rootdir.retry-interval-ms";
+
+ public static final int NODE_STORE_ROOT_DIR_RETRY_DEFAULT_INTERVAL = 1000;
+
public static final String RM_APPLICATION_MASTER_SERVICE_PROCESSORS =
RM_PREFIX + "application-master-service.processors";
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java
index 81514942af0..a697be19512 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java
@@ -65,8 +65,32 @@ public abstract class AbstractFSNodeStore<M> {
this.fsWorkingPath = fsStorePath;
this.manager = mgr;
initFileSystem(conf);
- // mkdir of root dir path
- fs.mkdirs(fsWorkingPath);
+ // mkdir of root dir path with retry logic
+ int maxRetries =
conf.getInt(YarnConfiguration.NODE_STORE_ROOT_DIR_NUM_RETRIES,
+ YarnConfiguration.NODE_STORE_ROOT_DIR_NUM_DEFAULT_RETRIES);
+ int retryCount = 0;
+ boolean success = fs.mkdirs(fsWorkingPath);
+
+ while (!success && retryCount < maxRetries) {
+ try {
+ if (!fs.exists(fsWorkingPath)) {
+ success = fs.mkdirs(fsWorkingPath);
+ } else {
+ success = true;
+ }
+ } catch (IOException e) {
+ retryCount++;
+ if (retryCount >= maxRetries) {
+ throw e;
+ }
+ try {
+
Thread.sleep(conf.getInt(YarnConfiguration.NODE_STORE_ROOT_DIR_RETRY_INTERVAL,
+ YarnConfiguration.NODE_STORE_ROOT_DIR_RETRY_DEFAULT_INTERVAL));
+ } catch (InterruptedException ie) {
+ throw new RuntimeException(ie);
+ }
+ }
+ }
this.replication = conf.getInt(YarnConfiguration.FS_STORE_FILE_REPLICATION,
YarnConfiguration.DEFAULT_FS_STORE_FILE_REPLICATION);
LOG.info("Created store directory :" + fsWorkingPath);
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index b9385d1c276..4fc414f0e01 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -5177,4 +5177,20 @@
<value>1</value>
</property>
+ <property>
+ <description>
+ Number of Retries while trying to make root directory for node store.
+ </description>
+ <name>yarn.resourcemanager.nodestore-rootdir.num-retries</name>
+ <value>1000</value>
+ </property>
+
+ <property>
+ <description>
+ Interval in ms between retries while trying to make root directory for
node store.
+ </description>
+ <name>yarn.resourcemanager.nodestore-rootdir.retry-interval-ms</name>
+ <value>1000</value>
+ </property>
+
</configuration>
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java
index 099684318f4..a861b0654ea 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java
@@ -359,9 +359,6 @@ public class TestFileSystemNodeLabelsStore extends
NodeLabelTestBase {
mockStore.setFs(mockFs);
verifyMkdirsCount(mockStore, true, 1);
- verifyMkdirsCount(mockStore, false, 2);
- verifyMkdirsCount(mockStore, true, 3);
- verifyMkdirsCount(mockStore, false, 4);
}
private void verifyMkdirsCount(FileSystemNodeLabelsStore store,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]