This is an automated email from the ASF dual-hosted git repository. ayushsaxena pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push: new 429da63 HDFS-15186. Erasure Coding: Decommission may generate the parity block's content with all 0 in some case. Contributed by Yao Guangdong. 429da63 is described below commit 429da635ec70f9abe5ab71e24c9f2eec0aa36e18 Author: Ayush Saxena <ayushsax...@apache.org> AuthorDate: Fri Feb 28 00:22:37 2020 +0530 HDFS-15186. Erasure Coding: Decommission may generate the parity block's content with all 0 in some case. Contributed by Yao Guangdong. --- .../hdfs/server/blockmanagement/BlockManager.java | 6 ++- .../server/blockmanagement/DatanodeDescriptor.java | 3 +- .../hadoop/hdfs/TestDecommissionWithStriped.java | 63 ++++++++++++++++++++++ 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 605f502..262e0c2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -2445,14 +2445,16 @@ public class BlockManager implements BlockStatsMXBean { if (priority != LowRedundancyBlocks.QUEUE_HIGHEST_PRIORITY && (!node.isDecommissionInProgress() && !node.isEnteringMaintenance()) && node.getNumberOfBlocksToBeReplicated() >= maxReplicationStreams) { - if (isStriped && state == StoredReplicaState.LIVE) { + if (isStriped && (state == StoredReplicaState.LIVE + || state == StoredReplicaState.DECOMMISSIONING)) { liveBusyBlockIndices.add(blockIndex); } continue; // already reached replication limit } if (node.getNumberOfBlocksToBeReplicated() >= replicationStreamsHardLimit) { - if (isStriped && state == StoredReplicaState.LIVE) { + if (isStriped && (state == StoredReplicaState.LIVE + || state == StoredReplicaState.DECOMMISSIONING)) { liveBusyBlockIndices.add(blockIndex); } continue; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java index 9035fd3..3fa9b3a 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java @@ -639,7 +639,8 @@ public class DatanodeDescriptor extends DatanodeInfo { pendingReplicationWithoutTargets++; } - void decrementPendingReplicationWithoutTargets() { + @VisibleForTesting + public void decrementPendingReplicationWithoutTargets() { pendingReplicationWithoutTargets--; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java index be3abab..f7dbe50 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java @@ -351,6 +351,69 @@ public class TestDecommissionWithStriped { } /** + * Decommission may generate the parity block's content with all 0 + * in some case. + * @throws Exception + */ + @Test(timeout = 120000) + public void testDecommission2NodeWithBusyNode() throws Exception { + byte busyDNIndex = 6; + byte decommissionDNIndex = 6; + byte decommissionDNIndex2 = 8; + //1. create EC file + final Path ecFile = new Path(ecDir, "testDecommission2NodeWithBusyNode"); + int writeBytes = cellSize * dataBlocks; + writeStripedFile(dfs, ecFile, writeBytes); + + Assert.assertEquals(0, bm.numOfUnderReplicatedBlocks()); + FileChecksum fileChecksum1 = dfs.getFileChecksum(ecFile, writeBytes); + + //2. make once DN busy + final INodeFile fileNode = cluster.getNamesystem().getFSDirectory() + .getINode4Write(ecFile.toString()).asFile(); + BlockInfo firstBlock = fileNode.getBlocks()[0]; + DatanodeStorageInfo[] dnStorageInfos = bm.getStorages(firstBlock); + DatanodeDescriptor busyNode = dnStorageInfos[busyDNIndex] + .getDatanodeDescriptor(); + for (int j = 0; j < replicationStreamsHardLimit; j++) { + busyNode.incrementPendingReplicationWithoutTargets(); + } + + //3. decommissioning one node + List<DatanodeInfo> decommissionNodes = new ArrayList<>(); + decommissionNodes.add(dnStorageInfos[decommissionDNIndex] + .getDatanodeDescriptor()); + decommissionNodes.add(dnStorageInfos[decommissionDNIndex2] + .getDatanodeDescriptor()); + decommissionNode(0, decommissionNodes, AdminStates.DECOMMISSION_INPROGRESS); + + //4. wait for decommissioning and not busy block to replicate(9-2+1=8) + GenericTestUtils.waitFor( + () -> bm.countNodes(firstBlock).liveReplicas() >= 8, + 100, 60000); + + //5. release busy DN, make the decommissioning and busy block can replicate + busyNode.decrementPendingReplicationWithoutTargets(); + + //6. decommissioned one node,make the decommission finished + decommissionNode(0, decommissionNodes, AdminStates.DECOMMISSIONED); + + //7. Busy DN shouldn't be reconstructed + DatanodeStorageInfo[] newDnStorageInfos = bm.getStorages(firstBlock); + Assert.assertEquals("Busy DN shouldn't be reconstructed", + dnStorageInfos[busyDNIndex].getStorageID(), + newDnStorageInfos[busyDNIndex].getStorageID()); + + //8. check the checksum of a file + FileChecksum fileChecksum2 = dfs.getFileChecksum(ecFile, writeBytes); + Assert.assertEquals("Checksum mismatches!", fileChecksum1, fileChecksum2); + + //9. check the data is correct + StripedFileTestUtil.checkData(dfs, ecFile, writeBytes, decommissionNodes, + null, blockGroupSize); + } + + /** * Tests to verify that the file checksum should be able to compute after the * decommission operation. * --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org