HDFS-6520. hdfs fsck passes invalid length value when creating BlockReader 
(Xiao Chen via cmccabe)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/188f6528
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/188f6528
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/188f6528

Branch: refs/heads/HDFS-7240
Commit: 188f65287d5b2f26a8862c88198f83ac59035016
Parents: 221b3a8
Author: Colin Patrick Mccabe <cmcc...@cloudera.com>
Authored: Wed Apr 6 11:28:34 2016 -0700
Committer: Colin Patrick Mccabe <cmcc...@cloudera.com>
Committed: Wed Apr 6 11:28:34 2016 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hdfs/BlockReaderFactory.java  |   4 +-
 .../hdfs/server/namenode/NamenodeFsck.java      |   2 +-
 .../blockmanagement/TestBlockTokenWithDFS.java  |   2 +-
 .../datanode/TestDataNodeVolumeFailure.java     |   2 +-
 .../hadoop/hdfs/server/namenode/TestFsck.java   | 122 ++++++++++++++++++-
 5 files changed, 126 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/188f6528/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
index 8a0050f..7af4609 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
@@ -150,7 +150,7 @@ public class BlockReaderFactory implements 
ShortCircuitReplicaCreator {
   private ClientContext clientContext;
 
   /**
-   * Number of bytes to read.  -1 indicates no limit.
+   * Number of bytes to read. Must be set to a non-negative value.
    */
   private long length = -1;
 
@@ -341,6 +341,8 @@ public class BlockReaderFactory implements 
ShortCircuitReplicaCreator {
    */
   public BlockReader build() throws IOException {
     Preconditions.checkNotNull(configuration);
+    Preconditions
+        .checkState(length >= 0, "Length must be set to a non-negative value");
     BlockReader reader = tryToCreateExternalBlockReader();
     if (reader != null) {
       return reader;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/188f6528/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
index 291ba56..80f510c 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
@@ -931,7 +931,7 @@ public class NamenodeFsck implements 
DataEncryptionKeyFactory {
             setBlock(block).
             setBlockToken(lblock.getBlockToken()).
             setStartOffset(0).
-            setLength(-1).
+            setLength(block.getNumBytes()).
             setVerifyChecksum(true).
             setClientName("fsck").
             setDatanodeInfo(chosenNode).

http://git-wip-us.apache.org/repos/asf/hadoop/blob/188f6528/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
index 8e65ff6..aa46de2 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
@@ -158,7 +158,7 @@ public class TestBlockTokenWithDFS {
           setBlockToken(lblock.getBlockToken()).
           setInetSocketAddress(targetAddr).
           setStartOffset(0).
-          setLength(-1).
+          setLength(0).
           setVerifyChecksum(true).
           setClientName("TestBlockTokenWithDFS").
           setDatanodeInfo(nodes[0]).

http://git-wip-us.apache.org/repos/asf/hadoop/blob/188f6528/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
index 5da5cd9..73f02bd 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
@@ -523,7 +523,7 @@ public class TestDataNodeVolumeFailure {
                     "test-blockpoolid", block.getBlockId())).
       setBlockToken(lblock.getBlockToken()).
       setStartOffset(0).
-      setLength(-1).
+      setLength(0).
       setVerifyChecksum(true).
       setClientName("TestDataNodeVolumeFailure").
       setDatanodeInfo(datanode).

http://git-wip-us.apache.org/repos/asf/hadoop/blob/188f6528/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
index 1818db8..7cb6edc 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
@@ -35,6 +35,7 @@ import java.io.BufferedReader;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.PrintStream;
@@ -46,6 +47,8 @@ import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.nio.channels.FileChannel;
 import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -54,12 +57,15 @@ import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import com.google.common.base.Supplier;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSClient;
@@ -101,6 +107,7 @@ import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 import org.apache.log4j.PatternLayout;
 import org.apache.log4j.RollingFileAppender;
+import org.junit.Assert;
 import org.junit.Test;
 
 import com.google.common.collect.Sets;
@@ -147,7 +154,7 @@ public class TestFsck {
       assertEquals(expectedErrCode, errCode);
     }
     GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.INFO);
-    FSImage.LOG.error("OUTPUT = " + bStream.toString());
+    FSImage.LOG.info("OUTPUT = " + bStream.toString());
     return bStream.toString();
   }
 
@@ -475,7 +482,25 @@ public class TestFsck {
         }
       }
     }
-    
+
+    public void corruptBlocks(MiniDFSCluster cluster) throws IOException {
+      for (int corruptIdx : blocksToCorrupt) {
+        // Corrupt a block by deleting it
+        ExtendedBlock block = dfsClient.getNamenode().getBlockLocations(name,
+            blockSize * corruptIdx, Long.MAX_VALUE).get(0).getBlock();
+        for (int i = 0; i < numDataNodes; i++) {
+          File blockFile = cluster.getBlockFile(i, block);
+          if(blockFile != null && blockFile.exists()) {
+            FileOutputStream blockFileStream =
+                new FileOutputStream(blockFile, false);
+            blockFileStream.write("corrupt".getBytes());
+            blockFileStream.close();
+            FSImage.LOG.info("Corrupted block file " + blockFile);
+          }
+        }
+      }
+    }
+
     public void checkSalvagedRemains() throws IOException {
       int chainIdx = 0;
       HdfsFileStatus status = dfsClient.getFileInfo(name);
@@ -1890,4 +1915,97 @@ public class TestFsck {
       if (cluster != null) {cluster.shutdown();}
     }
   }
+
+  @Test (timeout = 300000)
+  public void testFsckMoveAfterCorruption() throws Exception {
+    final int DFS_BLOCK_SIZE = 512 * 1024;
+    final int NUM_DATANODES = 1;
+    final int REPLICATION = 1;
+    MiniDFSCluster cluster = null;
+    try {
+      final Configuration conf = new HdfsConfiguration();
+      conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DFS_BLOCK_SIZE);
+      conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
+      conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
+      conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, REPLICATION);
+      cluster = new MiniDFSCluster.Builder(conf).build();
+      DistributedFileSystem dfs = cluster.getFileSystem();
+      cluster.waitActive();
+
+      final String srcDir = "/srcdat";
+      final DFSTestUtil util = new DFSTestUtil.Builder().setName("TestFsck")
+          .setMinSize(DFS_BLOCK_SIZE * 2).setMaxSize(DFS_BLOCK_SIZE * 3)
+          .setNumFiles(1).build();
+      util.createFiles(dfs, srcDir, (short) REPLICATION);
+      final String fileNames[] = util.getFileNames(srcDir);
+      FSImage.LOG.info("Created files: " + Arrays.toString(fileNames));
+
+      // Run fsck here. The output is automatically logged for easier debugging
+      String outStr = runFsck(conf, 0, true, "/", "-files", "-blocks");
+      assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
+
+      // Corrupt the first block
+      final DFSClient dfsClient = new DFSClient(
+          new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);
+      final String blockFileToCorrupt = fileNames[0];
+      final CorruptedTestFile ctf = new CorruptedTestFile(blockFileToCorrupt,
+          Sets.newHashSet(0), dfsClient, NUM_DATANODES, DFS_BLOCK_SIZE);
+      ctf.corruptBlocks(cluster);
+
+      // Wait for fsck to discover all the missing blocks
+      GenericTestUtils.waitFor(new Supplier<Boolean>() {
+        @Override
+        public Boolean get() {
+          try {
+            final String str = runFsck(conf, 1, false, "/");
+            String numCorrupt = null;
+            for (String line : str.split(LINE_SEPARATOR)) {
+              Matcher m = numCorruptBlocksPattern.matcher(line);
+              if (m.matches()) {
+                numCorrupt = m.group(1);
+                break;
+              }
+            }
+            if (numCorrupt == null) {
+              Assert.fail("Cannot find corrupt blocks count in fsck output.");
+            }
+            if (Integer.parseInt(numCorrupt) == ctf.getTotalMissingBlocks()) {
+              assertTrue(str.contains(NamenodeFsck.CORRUPT_STATUS));
+              return true;
+            }
+          } catch (Exception e) {
+            FSImage.LOG.error("Exception caught", e);
+            Assert.fail("Caught unexpected exception.");
+          }
+          return false;
+        }
+      }, 1000, 60000);
+
+      runFsck(conf, 1, true, "/", "-files", "-blocks", "-racks");
+      FSImage.LOG.info("Moving blocks to lost+found");
+      // Fsck will return error since we corrupted a block
+      runFsck(conf, 1, false, "/", "-move");
+
+      final List<LocatedFileStatus> retVal = new ArrayList<>();
+      final RemoteIterator<LocatedFileStatus> iter =
+          dfs.listFiles(new Path("/lost+found"), true);
+      while (iter.hasNext()) {
+        retVal.add(iter.next());
+      }
+      FSImage.LOG.info("Items in lost+found: " + retVal);
+
+      // Expect all good blocks moved, only corrupted block skipped.
+      long totalLength = 0;
+      for (LocatedFileStatus lfs: retVal) {
+        totalLength += lfs.getLen();
+      }
+      Assert.assertTrue("Nothing is moved to lost+found!", totalLength > 0);
+      util.cleanup(dfs, srcDir);
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+
 }

Reply via email to