Repository: hadoop Updated Branches: refs/heads/branch-2 1efba62fd -> 7bad74809 refs/heads/branch-2.8 c1f3cd765 -> 4f5846f1e refs/heads/branch-2.8.1 3bf038446 -> e2a817a9a
HDFS-11445. FSCK shows overall health stauts as corrupt even one replica is corrupt. Contributed by Brahma Reddy Battula. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/7bad7480 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/7bad7480 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/7bad7480 Branch: refs/heads/branch-2 Commit: 7bad748091b6a5f90e919c781a915e13000e1be6 Parents: 1efba62 Author: Brahma Reddy Battula <bra...@apache.org> Authored: Thu May 25 22:38:12 2017 +0800 Committer: Brahma Reddy Battula <bra...@apache.org> Committed: Thu May 25 22:38:12 2017 +0800 ---------------------------------------------------------------------- .../hdfs/server/blockmanagement/BlockInfo.java | 18 +++++------ .../server/blockmanagement/BlockManager.java | 27 +++++++++++++++-- .../hdfs/server/namenode/FSNamesystem.java | 3 +- .../hadoop/hdfs/server/namenode/TestFsck.java | 32 ++++++++++++++++++++ 4 files changed, 64 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/7bad7480/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java index 8dfb80b..7f945c0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java @@ -25,7 +25,6 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; -import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.util.LightWeightGSet; import static org.apache.hadoop.hdfs.server.namenode.INodeId.INVALID_INODE_ID; @@ -391,28 +390,25 @@ public abstract class BlockInfo extends Block * Process the recorded replicas. When about to commit or finish the * pipeline recovery sort out bad replicas. * @param genStamp The final generation stamp for the block. + * @return staleReplica's List. */ - public void setGenerationStampAndVerifyReplicas(long genStamp) { + public List<ReplicaUnderConstruction> setGenerationStampAndVerifyReplicas( + long genStamp) { Preconditions.checkState(uc != null && !isComplete()); // Set the generation stamp for the block. setGenerationStamp(genStamp); - // Remove the replicas with wrong gen stamp - List<ReplicaUnderConstruction> staleReplicas = uc.getStaleReplicas(genStamp); - for (ReplicaUnderConstruction r : staleReplicas) { - r.getExpectedStorageLocation().removeBlock(this); - NameNode.blockStateChangeLog.debug("BLOCK* Removing stale replica {}" - + " of {}", r, Block.toString(r)); - } + return uc.getStaleReplicas(genStamp); } /** * Commit block's length and generation stamp as reported by the client. * Set block state to {@link BlockUCState#COMMITTED}. * @param block - contains client reported block length and generation + * @return staleReplica's List. * @throws IOException if block ids are inconsistent. */ - void commitBlock(Block block) throws IOException { + List<ReplicaUnderConstruction> commitBlock(Block block) throws IOException { if (getBlockId() != block.getBlockId()) { throw new IOException("Trying to commit inconsistent block: id = " + block.getBlockId() + ", expected id = " + getBlockId()); @@ -421,6 +417,6 @@ public abstract class BlockInfo extends Block uc.commit(); this.setNumBytes(block.getNumBytes()); // Sort out invalid replicas. - setGenerationStampAndVerifyReplicas(block.getGenerationStamp()); + return setGenerationStampAndVerifyReplicas(block.getGenerationStamp()); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/7bad7480/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 2aaad73..5fe285e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -764,7 +764,7 @@ public class BlockManager implements BlockStatsMXBean { * @throws IOException if the block does not have at least a minimal number * of replicas reported from data-nodes. */ - private static boolean commitBlock(final BlockInfo block, + private boolean commitBlock(final BlockInfo block, final Block commitBlock) throws IOException { if (block.getBlockUCState() == BlockUCState.COMMITTED) return false; @@ -775,7 +775,9 @@ public class BlockManager implements BlockStatsMXBean { throw new IOException("Commit block with mismatching GS. NN has " + block + ", client submits " + commitBlock); } - block.commitBlock(commitBlock); + List<ReplicaUnderConstruction> staleReplicas = + block.commitBlock(commitBlock); + removeStaleReplicas(staleReplicas, block); return true; } @@ -899,7 +901,8 @@ public class BlockManager implements BlockStatsMXBean { * when tailing edit logs as a Standby. */ public void forceCompleteBlock(final BlockInfo block) throws IOException { - block.commitBlock(block); + List<ReplicaUnderConstruction> staleReplicas = block.commitBlock(block); + removeStaleReplicas(staleReplicas, block); completeBlock(block, null, true); } @@ -3290,6 +3293,17 @@ public class BlockManager implements BlockStatsMXBean { } } + private void removeStaleReplicas(List<ReplicaUnderConstruction> staleReplicas, + BlockInfo block) { + for (ReplicaUnderConstruction r : staleReplicas) { + removeStoredBlock(block, + r.getExpectedStorageLocation().getDatanodeDescriptor()); + NameNode.blockStateChangeLog + .debug("BLOCK* Removing stale replica {}" + " of {}", r, + Block.toString(r)); + } + } + /** * Get all valid locations of the block & add the block to results * return the length of the added block; 0 if the block is not added @@ -3636,6 +3650,13 @@ public class BlockManager implements BlockStatsMXBean { return blocksMap.getStoredBlock(block); } + public void updateLastBlock(BlockInfo lastBlock, ExtendedBlock newBlock) { + lastBlock.setNumBytes(newBlock.getNumBytes()); + List<ReplicaUnderConstruction> staleReplicas = lastBlock + .setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp()); + removeStaleReplicas(staleReplicas, lastBlock); + } + /** updates a block in under replication queue */ private void updateNeededReplications(final BlockInfo block, final int curReplicasDelta, int expectedReplicasDelta) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/7bad7480/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 093d6f7..dc349f1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4917,8 +4917,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, } // Update old block with the new generation stamp and new length - blockinfo.setNumBytes(newBlock.getNumBytes()); - blockinfo.setGenerationStampAndVerifyReplicas(newBlock.getGenerationStamp()); + blockManager.updateLastBlock(blockinfo, newBlock); // find the DatanodeDescriptor objects final DatanodeStorageInfo[] storages = blockManager.getDatanodeManager() http://git-wip-us.apache.org/repos/asf/hadoop/blob/7bad7480/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index c419933..d51e0cb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -78,6 +78,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.protocol.Block; @@ -98,6 +99,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; import org.apache.hadoop.hdfs.server.blockmanagement.HostConfigManager; import org.apache.hadoop.hdfs.server.namenode.FSDirectory.DirOp; import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck.Result; +import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.hdfs.tools.DFSck; import org.apache.hadoop.hdfs.util.HostsFileWriter; @@ -1986,4 +1988,34 @@ public class TestFsck { } } } + + @Test(timeout = 300000) + public void testFsckCorruptWhenOneReplicaIsCorrupt() + throws Exception { + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(2) + .build()) { + cluster.waitActive(); + FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf); + cluster.transitionToActive(0); + String filePath = "/appendTest"; + Path fileName = new Path(filePath); + DFSTestUtil.createFile(fs, fileName, 512, (short) 2, 0); + DFSTestUtil.waitReplication(fs, fileName, (short) 2); + Assert.assertTrue("File not created", fs.exists(fileName)); + cluster.getDataNodes().get(1).shutdown(); + DFSTestUtil.appendFile(fs, fileName, "appendCorruptBlock"); + cluster.restartDataNode(1, true); + GenericTestUtils.waitFor(new Supplier<Boolean>() { + @Override public Boolean get() { + return ( + cluster.getNameNode(0).getNamesystem().getCorruptReplicaBlocks() + > 0); + } + }, 100, 5000); + + DFSTestUtil.appendFile(fs, fileName, "appendCorruptBlock"); + runFsck(cluster.getConfiguration(0), 0, true, "/"); + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org