HDDS-834. Datanode goes OOM based because of segment size. Contributed by Mukul Kumar Singh.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a9482817 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a9482817 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a9482817 Branch: refs/heads/HDFS-13891 Commit: a94828170684793b80efdd76dc8a3167e324c0ea Parents: 3fade86 Author: Shashikant Banerjee <shashik...@apache.org> Authored: Wed Nov 14 15:53:22 2018 +0530 Committer: Shashikant Banerjee <shashik...@apache.org> Committed: Wed Nov 14 15:53:22 2018 +0530 ---------------------------------------------------------------------- .../apache/hadoop/hdds/scm/ScmConfigKeys.java | 2 +- .../common/src/main/resources/ozone-default.xml | 4 +-- .../server/ratis/ContainerStateMachine.java | 27 +++++++++++++++----- 3 files changed, 24 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/a9482817/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index cedcc43..b748d69 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -74,7 +74,7 @@ public final class ScmConfigKeys { public static final String DFS_CONTAINER_RATIS_SEGMENT_SIZE_KEY = "dfs.container.ratis.segment.size"; public static final int DFS_CONTAINER_RATIS_SEGMENT_SIZE_DEFAULT = - 1 * 1024 * 1024 * 1024; + 16 * 1024; public static final String DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY = "dfs.container.ratis.segment.preallocated.size"; public static final int http://git-wip-us.apache.org/repos/asf/hadoop/blob/a9482817/hadoop-hdds/common/src/main/resources/ozone-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 54bffd5..e94e7e1 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -175,10 +175,10 @@ </property> <property> <name>dfs.container.ratis.segment.size</name> - <value>1073741824</value> + <value>16384</value> <tag>OZONE, RATIS, PERFORMANCE</tag> <description>The size of the raft segment used by Apache Ratis on datanodes. - (1 GB by default) + (16 KB by default) </description> </property> <property> http://git-wip-us.apache.org/repos/asf/hadoop/blob/a9482817/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java index 3899bde..a3b496a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java @@ -120,7 +120,8 @@ public class ContainerStateMachine extends BaseStateMachine { createContainerFutureMap; private ExecutorService[] executors; private final int numExecutors; - private final Map<Long, Long> containerCommandCompletionMap; + private final Map<Long, Long> applyTransactionCompletionMap; + private long lastIndex; /** * CSM metrics. */ @@ -138,7 +139,8 @@ public class ContainerStateMachine extends BaseStateMachine { this.executors = executors.toArray(new ExecutorService[numExecutors]); this.writeChunkFutureMap = new ConcurrentHashMap<>(); this.createContainerFutureMap = new ConcurrentHashMap<>(); - containerCommandCompletionMap = new ConcurrentHashMap<>(); + applyTransactionCompletionMap = new ConcurrentHashMap<>(); + this.lastIndex = RaftServerConstants.INVALID_LOG_INDEX; } @Override @@ -162,10 +164,12 @@ public class ContainerStateMachine extends BaseStateMachine { private long loadSnapshot(SingleFileSnapshotInfo snapshot) { if (snapshot == null) { - TermIndex empty = TermIndex.newTermIndex(0, 0); + TermIndex empty = TermIndex.newTermIndex(0, + RaftServerConstants.INVALID_LOG_INDEX); LOG.info("The snapshot info is null." + "Setting the last applied index to:" + empty); setLastAppliedTermIndex(empty); + lastIndex = RaftServerConstants.INVALID_LOG_INDEX; return RaftServerConstants.INVALID_LOG_INDEX; } @@ -174,6 +178,7 @@ public class ContainerStateMachine extends BaseStateMachine { snapshot.getFile().getPath().toFile()); LOG.info("Setting the last applied index to " + last); setLastAppliedTermIndex(last); + lastIndex = last.getIndex(); return last.getIndex(); } @@ -471,7 +476,7 @@ public class ContainerStateMachine extends BaseStateMachine { Long appliedTerm = null; long appliedIndex = -1; for(long i = getLastAppliedTermIndex().getIndex() + 1;; i++) { - final Long removed = containerCommandCompletionMap.remove(i); + final Long removed = applyTransactionCompletionMap.remove(i); if (removed == null) { break; } @@ -479,7 +484,7 @@ public class ContainerStateMachine extends BaseStateMachine { appliedIndex = i; } if (appliedTerm != null) { - updateLastAppliedTermIndex(appliedIndex, appliedTerm); + updateLastAppliedTermIndex(appliedTerm, appliedIndex); } } @@ -489,6 +494,15 @@ public class ContainerStateMachine extends BaseStateMachine { @Override public CompletableFuture<Message> applyTransaction(TransactionContext trx) { long index = trx.getLogEntry().getIndex(); + + // ApplyTransaction call can come with an entryIndex much greater than + // lastIndex updated because in between entries in the raft log can be + // appended because raft config persistence. Just add a dummy entry + // for those. + for (long i = lastIndex + 1; i < index; i++) { + LOG.info("Gap in indexes at:{} detected, adding dummy entries ", i); + applyTransactionCompletionMap.put(i, trx.getLogEntry().getTerm()); + } try { metrics.incNumApplyTransactionsOps(); ContainerCommandRequestProto requestProto = @@ -553,9 +567,10 @@ public class ContainerStateMachine extends BaseStateMachine { }); } + lastIndex = index; future.thenAccept(m -> { final Long previous = - containerCommandCompletionMap + applyTransactionCompletionMap .put(index, trx.getLogEntry().getTerm()); Preconditions.checkState(previous == null); updateLastApplied(); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org