Repository: hadoop Updated Branches: refs/heads/branch-2.8 e426d1bb3 -> 8514ab8cb
HDFS-6520. hdfs fsck passes invalid length value when creating BlockReader (Xiao Chen via cmccabe) (cherry picked from commit 188f65287d5b2f26a8862c88198f83ac59035016) (cherry picked from commit 387d3f25c192dc89484c3372bbc401a57175ff8e) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8514ab8c Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8514ab8c Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8514ab8c Branch: refs/heads/branch-2.8 Commit: 8514ab8cb534777a20ed62a62f42eb40c5eb2c1d Parents: e426d1b Author: Colin Patrick Mccabe <cmcc...@cloudera.com> Authored: Wed Apr 6 11:28:34 2016 -0700 Committer: Colin Patrick Mccabe <cmcc...@cloudera.com> Committed: Wed Apr 6 11:38:19 2016 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hdfs/BlockReaderFactory.java | 4 +- .../hdfs/server/namenode/NamenodeFsck.java | 2 +- .../blockmanagement/TestBlockTokenWithDFS.java | 2 +- .../datanode/TestDataNodeVolumeFailure.java | 2 +- .../hadoop/hdfs/server/namenode/TestFsck.java | 122 ++++++++++++++++++- 5 files changed, 126 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/8514ab8c/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java index 5c7bbd7..7becbb9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java @@ -150,7 +150,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { private ClientContext clientContext; /** - * Number of bytes to read. -1 indicates no limit. + * Number of bytes to read. Must be set to a non-negative value. */ private long length = -1; @@ -341,6 +341,8 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { */ public BlockReader build() throws IOException { Preconditions.checkNotNull(configuration); + Preconditions + .checkState(length >= 0, "Length must be set to a non-negative value"); BlockReader reader = tryToCreateExternalBlockReader(); if (reader != null) { return reader; http://git-wip-us.apache.org/repos/asf/hadoop/blob/8514ab8c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 80c2f27..8f1b921 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -848,7 +848,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { setBlock(block). setBlockToken(lblock.getBlockToken()). setStartOffset(0). - setLength(-1). + setLength(block.getNumBytes()). setVerifyChecksum(true). setClientName("fsck"). setDatanodeInfo(chosenNode). http://git-wip-us.apache.org/repos/asf/hadoop/blob/8514ab8c/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java index e39ab3e..763aeb5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java @@ -155,7 +155,7 @@ public class TestBlockTokenWithDFS { setBlockToken(lblock.getBlockToken()). setInetSocketAddress(targetAddr). setStartOffset(0). - setLength(-1). + setLength(0). setVerifyChecksum(true). setClientName("TestBlockTokenWithDFS"). setDatanodeInfo(nodes[0]). http://git-wip-us.apache.org/repos/asf/hadoop/blob/8514ab8c/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java index 05e6da1..5acbeb0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java @@ -521,7 +521,7 @@ public class TestDataNodeVolumeFailure { "test-blockpoolid", block.getBlockId())). setBlockToken(lblock.getBlockToken()). setStartOffset(0). - setLength(-1). + setLength(0). setVerifyChecksum(true). setClientName("TestDataNodeVolumeFailure"). setDatanodeInfo(datanode). http://git-wip-us.apache.org/repos/asf/hadoop/blob/8514ab8c/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index bc9bc4f..f1c09ef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -35,6 +35,7 @@ import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.PrintStream; @@ -46,6 +47,8 @@ import java.net.InetAddress; import java.net.InetSocketAddress; import java.nio.channels.FileChannel; import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -54,12 +57,15 @@ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.google.common.base.Supplier; import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSClient; @@ -98,6 +104,7 @@ import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.PatternLayout; import org.apache.log4j.RollingFileAppender; +import org.junit.Assert; import org.junit.Test; import com.google.common.collect.Sets; @@ -141,7 +148,7 @@ public class TestFsck { assertEquals(expectedErrCode, errCode); } GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.INFO); - FSImage.LOG.error("OUTPUT = " + bStream.toString()); + FSImage.LOG.info("OUTPUT = " + bStream.toString()); return bStream.toString(); } @@ -461,7 +468,25 @@ public class TestFsck { } } } - + + public void corruptBlocks(MiniDFSCluster cluster) throws IOException { + for (int corruptIdx : blocksToCorrupt) { + // Corrupt a block by deleting it + ExtendedBlock block = dfsClient.getNamenode().getBlockLocations(name, + blockSize * corruptIdx, Long.MAX_VALUE).get(0).getBlock(); + for (int i = 0; i < numDataNodes; i++) { + File blockFile = cluster.getBlockFile(i, block); + if(blockFile != null && blockFile.exists()) { + FileOutputStream blockFileStream = + new FileOutputStream(blockFile, false); + blockFileStream.write("corrupt".getBytes()); + blockFileStream.close(); + FSImage.LOG.info("Corrupted block file " + blockFile); + } + } + } + } + public void checkSalvagedRemains() throws IOException { int chainIdx = 0; HdfsFileStatus status = dfsClient.getFileInfo(name); @@ -1734,4 +1759,97 @@ public class TestFsck { if (cluster != null) {cluster.shutdown();} } } + + @Test (timeout = 300000) + public void testFsckMoveAfterCorruption() throws Exception { + final int DFS_BLOCK_SIZE = 512 * 1024; + final int NUM_DATANODES = 1; + final int REPLICATION = 1; + MiniDFSCluster cluster = null; + try { + final Configuration conf = new HdfsConfiguration(); + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE); + conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L); + conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, REPLICATION); + cluster = new MiniDFSCluster.Builder(conf).build(); + DistributedFileSystem dfs = cluster.getFileSystem(); + cluster.waitActive(); + + final String srcDir = "/srcdat"; + final DFSTestUtil util = new DFSTestUtil.Builder().setName("TestFsck") + .setMinSize(DFS_BLOCK_SIZE * 2).setMaxSize(DFS_BLOCK_SIZE * 3) + .setNumFiles(1).build(); + util.createFiles(dfs, srcDir, (short) REPLICATION); + final String fileNames[] = util.getFileNames(srcDir); + FSImage.LOG.info("Created files: " + Arrays.toString(fileNames)); + + // Run fsck here. The output is automatically logged for easier debugging + String outStr = runFsck(conf, 0, true, "/", "-files", "-blocks"); + assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS)); + + // Corrupt the first block + final DFSClient dfsClient = new DFSClient( + new InetSocketAddress("localhost", cluster.getNameNodePort()), conf); + final String blockFileToCorrupt = fileNames[0]; + final CorruptedTestFile ctf = new CorruptedTestFile(blockFileToCorrupt, + Sets.newHashSet(0), dfsClient, NUM_DATANODES, DFS_BLOCK_SIZE); + ctf.corruptBlocks(cluster); + + // Wait for fsck to discover all the missing blocks + GenericTestUtils.waitFor(new Supplier<Boolean>() { + @Override + public Boolean get() { + try { + final String str = runFsck(conf, 1, false, "/"); + String numCorrupt = null; + for (String line : str.split(LINE_SEPARATOR)) { + Matcher m = numCorruptBlocksPattern.matcher(line); + if (m.matches()) { + numCorrupt = m.group(1); + break; + } + } + if (numCorrupt == null) { + Assert.fail("Cannot find corrupt blocks count in fsck output."); + } + if (Integer.parseInt(numCorrupt) == ctf.getTotalMissingBlocks()) { + assertTrue(str.contains(NamenodeFsck.CORRUPT_STATUS)); + return true; + } + } catch (Exception e) { + FSImage.LOG.error("Exception caught", e); + Assert.fail("Caught unexpected exception."); + } + return false; + } + }, 1000, 60000); + + runFsck(conf, 1, true, "/", "-files", "-blocks", "-racks"); + FSImage.LOG.info("Moving blocks to lost+found"); + // Fsck will return error since we corrupted a block + runFsck(conf, 1, false, "/", "-move"); + + final List<LocatedFileStatus> retVal = new ArrayList<>(); + final RemoteIterator<LocatedFileStatus> iter = + dfs.listFiles(new Path("/lost+found"), true); + while (iter.hasNext()) { + retVal.add(iter.next()); + } + FSImage.LOG.info("Items in lost+found: " + retVal); + + // Expect all good blocks moved, only corrupted block skipped. + long totalLength = 0; + for (LocatedFileStatus lfs: retVal) { + totalLength += lfs.getLen(); + } + Assert.assertTrue("Nothing is moved to lost+found!", totalLength > 0); + util.cleanup(dfs, srcDir); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + }