HDFS-12496. Make QuorumJournalManager timeout properties configurable. Contributed by Ajay Kumar.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b9e423fa Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b9e423fa Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b9e423fa Branch: refs/heads/YARN-6592 Commit: b9e423fa8d30ea89244f6ec018a8064cc87d94a9 Parents: b9db0af Author: Arpit Agarwal <a...@apache.org> Authored: Thu Sep 21 08:44:43 2017 -0700 Committer: Arpit Agarwal <a...@apache.org> Committed: Thu Sep 21 08:44:43 2017 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 4 ++ .../qjournal/client/QuorumJournalManager.java | 39 +++++++++----------- .../src/main/resources/hdfs-default.xml | 11 ++++++ 3 files changed, 33 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/b9e423fa/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 91f3bb9..b4842f9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -725,6 +725,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.edit.log.transfer.bandwidthPerSec"; public static final long DFS_EDIT_LOG_TRANSFER_RATE_DEFAULT = 0; //no throttling + public static final String DFS_QJM_OPERATIONS_TIMEOUT = + "dfs.qjm.operations.timeout"; + public static final long DFS_QJM_OPERATIONS_TIMEOUT_DEFAULT = 60000; + // Datanode File IO Stats public static final String DFS_DATANODE_ENABLE_FILEIO_FAULT_INJECTION_KEY = "dfs.datanode.enable.fileio.fault.injection"; http://git-wip-us.apache.org/repos/asf/hadoop/blob/b9e423fa/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java index 97c0050..f66e2c0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.PriorityQueue; +import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import org.apache.commons.logging.Log; @@ -76,18 +77,10 @@ public class QuorumJournalManager implements JournalManager { private final int newEpochTimeoutMs; private final int writeTxnsTimeoutMs; - // Since these don't occur during normal operation, we can - // use rather lengthy timeouts, and don't need to make them - // configurable. - private static final int FORMAT_TIMEOUT_MS = 60000; - private static final int HASDATA_TIMEOUT_MS = 60000; - private static final int CAN_ROLL_BACK_TIMEOUT_MS = 60000; - private static final int FINALIZE_TIMEOUT_MS = 60000; - private static final int PRE_UPGRADE_TIMEOUT_MS = 60000; - private static final int ROLL_BACK_TIMEOUT_MS = 60000; - private static final int DISCARD_SEGMENTS_TIMEOUT_MS = 60000; - private static final int UPGRADE_TIMEOUT_MS = 60000; - private static final int GET_JOURNAL_CTIME_TIMEOUT_MS = 60000; + // This timeout is used for calls that don't occur during normal operation + // e.g. format, upgrade operations and a few others. So we can use rather + // lengthy timeouts by default. + private final int timeoutMs; private final Configuration conf; private final URI uri; @@ -141,6 +134,10 @@ public class QuorumJournalManager implements JournalManager { this.writeTxnsTimeoutMs = conf.getInt( DFSConfigKeys.DFS_QJOURNAL_WRITE_TXNS_TIMEOUT_KEY, DFSConfigKeys.DFS_QJOURNAL_WRITE_TXNS_TIMEOUT_DEFAULT); + this.timeoutMs = (int) conf.getTimeDuration(DFSConfigKeys + .DFS_QJM_OPERATIONS_TIMEOUT, + DFSConfigKeys.DFS_QJM_OPERATIONS_TIMEOUT_DEFAULT, TimeUnit + .MILLISECONDS); } protected List<AsyncLogger> createLoggers( @@ -201,7 +198,7 @@ public class QuorumJournalManager implements JournalManager { public void format(NamespaceInfo nsInfo) throws IOException { QuorumCall<AsyncLogger,Void> call = loggers.format(nsInfo); try { - call.waitFor(loggers.size(), loggers.size(), 0, FORMAT_TIMEOUT_MS, + call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "format"); } catch (InterruptedException e) { throw new IOException("Interrupted waiting for format() response"); @@ -220,7 +217,7 @@ public class QuorumJournalManager implements JournalManager { loggers.isFormatted(); try { - call.waitFor(loggers.size(), 0, 0, HASDATA_TIMEOUT_MS, "hasSomeData"); + call.waitFor(loggers.size(), 0, 0, timeoutMs, "hasSomeData"); } catch (InterruptedException e) { throw new IOException("Interrupted while determining if JNs have data"); } catch (TimeoutException e) { @@ -505,7 +502,7 @@ public class QuorumJournalManager implements JournalManager { public void doPreUpgrade() throws IOException { QuorumCall<AsyncLogger, Void> call = loggers.doPreUpgrade(); try { - call.waitFor(loggers.size(), loggers.size(), 0, PRE_UPGRADE_TIMEOUT_MS, + call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "doPreUpgrade"); if (call.countExceptions() > 0) { @@ -522,7 +519,7 @@ public class QuorumJournalManager implements JournalManager { public void doUpgrade(Storage storage) throws IOException { QuorumCall<AsyncLogger, Void> call = loggers.doUpgrade(storage); try { - call.waitFor(loggers.size(), loggers.size(), 0, UPGRADE_TIMEOUT_MS, + call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "doUpgrade"); if (call.countExceptions() > 0) { @@ -539,7 +536,7 @@ public class QuorumJournalManager implements JournalManager { public void doFinalize() throws IOException { QuorumCall<AsyncLogger, Void> call = loggers.doFinalize(); try { - call.waitFor(loggers.size(), loggers.size(), 0, FINALIZE_TIMEOUT_MS, + call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "doFinalize"); if (call.countExceptions() > 0) { @@ -558,7 +555,7 @@ public class QuorumJournalManager implements JournalManager { QuorumCall<AsyncLogger, Boolean> call = loggers.canRollBack(storage, prevStorage, targetLayoutVersion); try { - call.waitFor(loggers.size(), loggers.size(), 0, CAN_ROLL_BACK_TIMEOUT_MS, + call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "lockSharedStorage"); if (call.countExceptions() > 0) { @@ -591,7 +588,7 @@ public class QuorumJournalManager implements JournalManager { public void doRollback() throws IOException { QuorumCall<AsyncLogger, Void> call = loggers.doRollback(); try { - call.waitFor(loggers.size(), loggers.size(), 0, ROLL_BACK_TIMEOUT_MS, + call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs, "doRollback"); if (call.countExceptions() > 0) { @@ -609,7 +606,7 @@ public class QuorumJournalManager implements JournalManager { QuorumCall<AsyncLogger, Void> call = loggers.discardSegments(startTxId); try { call.waitFor(loggers.size(), loggers.size(), 0, - DISCARD_SEGMENTS_TIMEOUT_MS, "discardSegments"); + timeoutMs, "discardSegments"); if (call.countExceptions() > 0) { call.rethrowException( "Could not perform discardSegments of one or more JournalNodes"); @@ -628,7 +625,7 @@ public class QuorumJournalManager implements JournalManager { QuorumCall<AsyncLogger, Long> call = loggers.getJournalCTime(); try { call.waitFor(loggers.size(), loggers.size(), 0, - GET_JOURNAL_CTIME_TIMEOUT_MS, "getJournalCTime"); + timeoutMs, "getJournalCTime"); if (call.countExceptions() > 0) { call.rethrowException("Could not journal CTime for one " http://git-wip-us.apache.org/repos/asf/hadoop/blob/b9e423fa/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 9327a2c..4e4ee33 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -4629,4 +4629,15 @@ </description> </property> + <property> + <name>dfs.qjm.operations.timeout</name> + <value>60s</value> + <description> + Common key to set timeout for related operations in + QuorumJournalManager. This setting supports multiple time unit suffixes + as described in dfs.heartbeat.interval. + If no suffix is specified then milliseconds is assumed. + </description> + </property> + </configuration> --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org