Repository: hadoop Updated Branches: refs/heads/branch-2 456fe08a6 -> 2d1d2dde1
HDFS-10360. DataNode may format directory and lose blocks if current/VERSION is missing. (Wei-Chiu Chuang via lei) (cherry picked from commit bae11b95d492e0be7c5576097424879c2539b474) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2d1d2dde Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2d1d2dde Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2d1d2dde Branch: refs/heads/branch-2 Commit: 2d1d2dde17e472a487b6ac2078529271ceadfb3f Parents: 456fe08 Author: Lei Xu <l...@apache.org> Authored: Tue May 17 16:36:48 2016 -0700 Committer: Lei Xu <l...@apache.org> Committed: Tue May 17 16:37:36 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hdfs/server/common/Storage.java | 65 ++++++++++++++++++-- .../hdfs/server/datanode/DataStorage.java | 2 +- .../TestDataNodeVolumeFailureReporting.java | 36 +++++++++++ .../hdfs/server/datanode/TestDataStorage.java | 26 ++++++++ 4 files changed, 124 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d1d2dde/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java index fa7e23b..6f1ee19 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java @@ -25,7 +25,10 @@ import java.io.RandomAccessFile; import java.lang.management.ManagementFactory; import java.nio.channels.FileLock; import java.nio.channels.OverlappingFileLockException; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Properties; @@ -345,9 +348,12 @@ public abstract class Storage extends StorageInfo { */ public void clearDirectory() throws IOException { File curDir = this.getCurrentDir(); - if (curDir.exists()) + if (curDir.exists()) { + File[] files = FileUtil.listFiles(curDir); + LOG.info("Will remove files: " + Arrays.toString(files)); if (!(FileUtil.fullyDelete(curDir))) throw new IOException("Cannot remove current directory: " + curDir); + } if (!curDir.mkdirs()) throw new IOException("Cannot create directory " + curDir); } @@ -470,16 +476,60 @@ public abstract class Storage extends StorageInfo { } /** - * Check consistency of the storage directory + * Check to see if current/ directory is empty. This method is used + * before determining to format the directory. + * + * @throws InconsistentFSStateException if not empty. + * @throws IOException if unable to list files under the directory. + */ + private void checkEmptyCurrent() throws InconsistentFSStateException, + IOException { + File currentDir = getCurrentDir(); + if(!currentDir.exists()) { + // if current/ does not exist, it's safe to format it. + return; + } + try(DirectoryStream<java.nio.file.Path> dirStream = + Files.newDirectoryStream(currentDir.toPath())) { + if (dirStream.iterator().hasNext()) { + throw new InconsistentFSStateException(root, + "Can't format the storage directory because the current/ " + + "directory is not empty."); + } + } + } + + /** + * Check consistency of the storage directory. + * + * @param startOpt a startup option. + * @param storage The Storage object that manages this StorageDirectory. + * + * @return state {@link StorageState} of the storage directory + * @throws InconsistentFSStateException if directory state is not + * consistent and cannot be recovered. + * @throws IOException + */ + public StorageState analyzeStorage(StartupOption startOpt, Storage storage) + throws IOException { + return analyzeStorage(startOpt, storage, false); + } + + /** + * Check consistency of the storage directory. * * @param startOpt a startup option. + * @param storage The Storage object that manages this StorageDirectory. + * @param checkCurrentIsEmpty if true, make sure current/ directory + * is empty before determining to format it. * * @return state {@link StorageState} of the storage directory * @throws InconsistentFSStateException if directory state is not * consistent and cannot be recovered. * @throws IOException */ - public StorageState analyzeStorage(StartupOption startOpt, Storage storage) + public StorageState analyzeStorage(StartupOption startOpt, Storage storage, + boolean checkCurrentIsEmpty) throws IOException { assert root != null : "root is null"; boolean hadMkdirs = false; @@ -516,8 +566,12 @@ public abstract class Storage extends StorageInfo { // If startOpt is HOTSWAP, it returns NOT_FORMATTED for empty directory, // while it also checks the layout version. if (startOpt == HdfsServerConstants.StartupOption.FORMAT || - (startOpt == StartupOption.HOTSWAP && hadMkdirs)) + (startOpt == StartupOption.HOTSWAP && hadMkdirs)) { + if (checkCurrentIsEmpty) { + checkEmptyCurrent(); + } return StorageState.NOT_FORMATTED; + } if (startOpt != HdfsServerConstants.StartupOption.IMPORT) { storage.checkOldLayoutStorage(this); @@ -542,6 +596,9 @@ public abstract class Storage extends StorageInfo { if (hasPrevious) throw new InconsistentFSStateException(root, "version file in current directory is missing."); + if (checkCurrentIsEmpty) { + checkEmptyCurrent(); + } return StorageState.NOT_FORMATTED; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d1d2dde/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java index 3844c8e..0e6b339 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java @@ -267,7 +267,7 @@ public class DataStorage extends Storage { List<Callable<StorageDirectory>> callables) throws IOException { StorageDirectory sd = new StorageDirectory(dataDir, null, false); try { - StorageState curState = sd.analyzeStorage(startOpt, this); + StorageState curState = sd.analyzeStorage(startOpt, this, true); // sd is locked but not opened switch (curState) { case NORMAL: http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d1d2dde/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java index b1ea5ae..c76fa2c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java @@ -22,6 +22,7 @@ import static org.apache.hadoop.test.MetricsAsserts.getMetrics; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; @@ -444,6 +445,41 @@ public class TestDataNodeVolumeFailureReporting { checkFailuresAtNameNode(dm, dns.get(1), true); } + @Test + public void testAutoFormatEmptyDirectory() throws Exception { + // remove the version file + File dn1Vol1 = cluster.getStorageDir(0, 0); + File current = new File(dn1Vol1, "current"); + File currentVersion = new File(current, "VERSION"); + currentVersion.delete(); + // restart the data node + assertTrue(cluster.restartDataNodes(true)); + // the DN should tolerate one volume failure. + cluster.waitActive(); + ArrayList<DataNode> dns = cluster.getDataNodes(); + DataNode dn = dns.get(0); + assertFalse("DataNode should not reformat if VERSION is missing", + currentVersion.exists()); + + // Make sure DN's JMX sees the failed volume + final String[] expectedFailedVolumes = {dn1Vol1.getAbsolutePath()}; + DataNodeTestUtils.triggerHeartbeat(dn); + FsDatasetSpi<?> fsd = dn.getFSDataset(); + assertEquals(expectedFailedVolumes.length, fsd.getNumFailedVolumes()); + assertArrayEquals(expectedFailedVolumes, fsd.getFailedStorageLocations()); + // there shouldn't be any more volume failures due to I/O failure + checkFailuresAtDataNode(dn, 0, false, expectedFailedVolumes); + + // The NN reports one volume failures + final DatanodeManager dm = cluster.getNamesystem().getBlockManager(). + getDatanodeManager(); + long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0); + DFSTestUtil.waitForDatanodeStatus(dm, 1, 0, 1, + (1*dnCapacity), WAIT_FOR_HEARTBEATS); + checkAggregateFailuresAtNameNode(false, 1); + checkFailuresAtNameNode(dm, dns.get(0), false, dn1Vol1.getAbsolutePath()); + } + /** * Checks the NameNode for correct values of aggregate counters tracking failed * volumes across all DataNodes. http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d1d2dde/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataStorage.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataStorage.java index c55dbae..405d2e9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataStorage.java @@ -167,6 +167,32 @@ public class TestDataStorage { } @Test + public void testMissingVersion() throws IOException, + URISyntaxException { + final int numLocations = 1; + final int numNamespace = 1; + List<StorageLocation> locations = createStorageLocations(numLocations); + + StorageLocation firstStorage = locations.get(0); + Storage.StorageDirectory sd = new Storage.StorageDirectory( + firstStorage.getFile()); + // the directory is not initialized so VERSION does not exist + // create a fake directory under current/ + File currentDir = new File(sd.getCurrentDir(), + "BP-787466439-172.26.24.43-1462305406642"); + assertTrue("unable to mkdir " + currentDir.getName(), currentDir.mkdirs()); + + // Add volumes for multiple namespaces. + List<NamespaceInfo> namespaceInfos = createNamespaceInfos(numNamespace); + for (NamespaceInfo ni : namespaceInfos) { + storage.addStorageLocations(mockDN, ni, locations, START_OPT); + } + + // It should not format the directory because VERSION is missing. + assertTrue("Storage directory was formatted", currentDir.exists()); + } + + @Test public void testRecoverTransitionReadFailure() throws IOException { final int numLocations = 3; List<StorageLocation> locations = --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org