Repository: hadoop Updated Branches: refs/heads/branch-2 8854c7801 -> 8bffaa46f
HDFS-7373. Clean up temporary files after fsimage transfer failures. Contributed by Kihwal Lee (cherry picked from commit c0d666c74e9ea76564a2458c6c0a78ae7afa9fea) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8bffaa46 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8bffaa46 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8bffaa46 Branch: refs/heads/branch-2 Commit: 8bffaa46fca8b07dff407935e3c30ec3a036286f Parents: 8854c78 Author: Kihwal Lee <kih...@apache.org> Authored: Thu Dec 18 13:00:18 2014 -0600 Committer: Kihwal Lee <kih...@apache.org> Committed: Thu Dec 18 13:00:18 2014 -0600 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hdfs/server/namenode/TransferFsImage.java | 21 ++++++++++++++++++++ .../hdfs/server/namenode/TestCheckpoint.java | 19 ++++++++++++++++++ 3 files changed, 43 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/8bffaa46/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 49d834d..21137b7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -211,6 +211,9 @@ Release 2.7.0 - UNRELEASED HDFS-7531. Improve the concurrent access on FsVolumeList (Lei Xu via Colin P. McCabe) + HDFS-7373. Clean up temporary files after fsimage transfer failures. + (kihwal) + OPTIMIZATIONS HDFS-7454. Reduce memory footprint for AclEntries in NameNode. http://git-wip-us.apache.org/repos/asf/hadoop/blob/8bffaa46/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java index 160371a..1f52ff7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java @@ -528,10 +528,18 @@ public class TransferFsImage { fos.getChannel().force(true); fos.close(); } + + // Something went wrong and did not finish reading. + // Remove the temporary files. + if (!finishedReceiving) { + deleteTmpFiles(localPaths); + } + if (finishedReceiving && received != advertisedSize) { // only throw this exception if we think we read all of it on our end // -- otherwise a client-side IOException would be masked by this // exception that makes it look like a server-side problem! + deleteTmpFiles(localPaths); throw new IOException("File " + url + " received length " + received + " is not of the advertised size " + advertisedSize); @@ -548,6 +556,7 @@ public class TransferFsImage { if (advertisedDigest != null && !computedDigest.equals(advertisedDigest)) { + deleteTmpFiles(localPaths); throw new IOException("File " + url + " computed digest " + computedDigest + " does not match advertised digest " + advertisedDigest); @@ -558,6 +567,18 @@ public class TransferFsImage { } } + private static void deleteTmpFiles(List<File> files) { + if (files == null) { + return; + } + + LOG.info("Deleting temporary files: " + files); + for (File file : files) { + file.delete(); // ignore the return value + } + } + + private static MD5Hash parseMD5Header(HttpURLConnection connection) { String header = connection.getHeaderField(MD5_HEADER); return (header != null) ? new MD5Hash(header) : null; http://git-wip-us.apache.org/repos/asf/hadoop/blob/8bffaa46/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java index e34bc2b..88adffe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java @@ -643,6 +643,22 @@ public class TestCheckpoint { }); } + private void checkTempImages(NNStorage storage) throws IOException { + List<File> dirs = new ArrayList<File>(); + dirs.add(storage.getStorageDir(0).getCurrentDir()); + dirs.add(storage.getStorageDir(1).getCurrentDir()); + + for (File dir : dirs) { + File[] list = dir.listFiles(); + for (File f : list) { + // Throw an exception if a temp image file is found. + if(f.getName().contains(NNStorage.NameNodeFile.IMAGE_NEW.getName())) { + throw new IOException("Found " + f); + } + } + } + } + /** * Simulate 2NN failing to send the whole file (error type 3) * The length header in the HTTP transfer should prevent @@ -704,6 +720,9 @@ public class TestCheckpoint { GenericTestUtils.assertExceptionContains(exceptionSubstring, e); } Mockito.reset(faultInjector); + // Make sure there is no temporary files left around. + checkTempImages(cluster.getNameNode().getFSImage().getStorage()); + checkTempImages(secondary.getFSImage().getStorage()); secondary.shutdown(); // secondary namenode crash! secondary = null;