HDFS-12607. [READ] Even one dead datanode with PROVIDED storage results in ProvidedStorageInfo being marked as FAILED
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/71d0a825 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/71d0a825 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/71d0a825 Branch: refs/heads/YARN-6592 Commit: 71d0a825711387fe06396323a9ca6a5af0ade415 Parents: 98f5ed5 Author: Virajith Jalaparti <viraj...@apache.org> Authored: Mon Nov 6 11:05:59 2017 -0800 Committer: Chris Douglas <cdoug...@apache.org> Committed: Fri Dec 15 17:51:39 2017 -0800 ---------------------------------------------------------------------- .../blockmanagement/DatanodeDescriptor.java | 6 ++- .../blockmanagement/ProvidedStorageMap.java | 40 +++++++++++++------- .../TestNameNodeProvidedImplementation.java | 40 ++++++++++++++++++++ 3 files changed, 71 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/71d0a825/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java index e3d6582..c17ab4c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java @@ -455,8 +455,10 @@ public class DatanodeDescriptor extends DatanodeInfo { totalDfsUsed += report.getDfsUsed(); totalNonDfsUsed += report.getNonDfsUsed(); - if (StorageType.PROVIDED.equals( - report.getStorage().getStorageType())) { + // for PROVIDED storages, do not call updateStorage() unless + // DatanodeStorageInfo already exists! + if (StorageType.PROVIDED.equals(report.getStorage().getStorageType()) + && storageMap.get(report.getStorage().getStorageID()) == null) { continue; } DatanodeStorageInfo storage = updateStorage(report.getStorage()); http://git-wip-us.apache.org/repos/asf/hadoop/blob/71d0a825/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java index a848d50..3d19775 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java @@ -66,7 +66,6 @@ public class ProvidedStorageMap { // limit to a single provider for now private RwLock lock; private BlockManager bm; - private boolean hasDNs = false; private BlockAliasMap aliasMap; private final String storageId; @@ -123,6 +122,11 @@ public class ProvidedStorageMap { BlockReportContext context) throws IOException { if (providedEnabled && storageId.equals(s.getStorageID())) { if (StorageType.PROVIDED.equals(s.getStorageType())) { + if (providedStorageInfo.getState() == State.FAILED + && s.getState() == State.NORMAL) { + providedStorageInfo.setState(State.NORMAL); + LOG.info("Provided storage transitioning to state " + State.NORMAL); + } processProvidedStorageReport(context); dn.injectStorage(providedStorageInfo); return providedDescriptor.getProvidedStorage(dn, s); @@ -135,21 +139,14 @@ public class ProvidedStorageMap { private void processProvidedStorageReport(BlockReportContext context) throws IOException { assert lock.hasWriteLock() : "Not holding write lock"; - if (hasDNs) { - return; - } - if (providedStorageInfo.getBlockReportCount() == 0) { + if (providedStorageInfo.getBlockReportCount() == 0 + || providedDescriptor.activeProvidedDatanodes() == 0) { LOG.info("Calling process first blk report from storage: " + providedStorageInfo); // first pass; periodic refresh should call bm.processReport bm.processFirstBlockReport(providedStorageInfo, new ProvidedBlockList(aliasMap.getReader(null).iterator())); - } else { - bm.processReport(providedStorageInfo, - new ProvidedBlockList(aliasMap.getReader(null).iterator()), - context); } - hasDNs = true; } @VisibleForTesting @@ -167,9 +164,10 @@ public class ProvidedStorageMap { public void removeDatanode(DatanodeDescriptor dnToRemove) { if (providedEnabled) { assert lock.hasWriteLock() : "Not holding write lock"; - int remainingDatanodes = providedDescriptor.remove(dnToRemove); - if (remainingDatanodes == 0) { - hasDNs = false; + providedDescriptor.remove(dnToRemove); + // if all datanodes fail, set the block report count to 0 + if (providedDescriptor.activeProvidedDatanodes() == 0) { + providedStorageInfo.setBlockReportCount(0); } } } @@ -466,6 +464,22 @@ public class ProvidedStorageMap { return false; } } + + @Override + void setState(DatanodeStorage.State state) { + if (state == State.FAILED) { + // The state should change to FAILED only when there are no active + // datanodes with PROVIDED storage. + ProvidedDescriptor dn = (ProvidedDescriptor) getDatanodeDescriptor(); + if (dn.activeProvidedDatanodes() == 0) { + LOG.info("Provided storage {} transitioning to state {}", + this, State.FAILED); + super.setState(state); + } + } else { + super.setState(state); + } + } } /** * Used to emulate block reports for provided blocks. http://git-wip-us.apache.org/repos/asf/hadoop/blob/71d0a825/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java index 2170baa..aae04be 100644 --- a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java +++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java @@ -492,4 +492,44 @@ public class TestNameNodeProvidedImplementation { dnInfos[0].getXferAddr()); } } + + @Test(timeout=300000) + public void testTransientDeadDatanodes() throws Exception { + createImage(new FSTreeWalk(NAMEPATH, conf), NNDIRPATH, + FixedBlockResolver.class); + // 2 Datanodes, 1 PROVIDED and other DISK + startCluster(NNDIRPATH, 2, null, + new StorageType[][] { + {StorageType.PROVIDED}, + {StorageType.DISK}}, + false); + + DataNode providedDatanode = cluster.getDataNodes().get(0); + + DFSClient client = new DFSClient(new InetSocketAddress("localhost", + cluster.getNameNodePort()), cluster.getConfiguration(0)); + + for (int i= 0; i < numFiles; i++) { + String filename = "/" + filePrefix + i + fileSuffix; + + DatanodeInfo[] dnInfos = getAndCheckBlockLocations(client, filename, 1); + // location should be the provided DN. + assertTrue(dnInfos[0].getDatanodeUuid() + .equals(providedDatanode.getDatanodeUuid())); + + // NameNode thinks the datanode is down + BlockManagerTestUtil.noticeDeadDatanode( + cluster.getNameNode(), + providedDatanode.getDatanodeId().getXferAddr()); + cluster.waitActive(); + cluster.triggerHeartbeats(); + Thread.sleep(1000); + + // should find the block on the 2nd provided datanode. + dnInfos = getAndCheckBlockLocations(client, filename, 1); + assertTrue( + dnInfos[0].getDatanodeUuid() + .equals(providedDatanode.getDatanodeUuid())); + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org