HDFS-7830. DataNode does not release the volume lock when adding a volume 
fails. (Lei Xu via Colin P. McCabe)

(cherry picked from commit 5c1036d598051cf6af595740f1ab82092b0b6554)
(cherry picked from commit eefca23e8c5e474de1e25bf2ec8a5b266bbe8cfe)

Conflicts:
        
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
        
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java
        
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c723f3b1
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c723f3b1
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c723f3b1

Branch: refs/heads/sjlee/hdfs-merge
Commit: c723f3b1bd9eab261ab5edca33c4dae5ce3d0d30
Parents: 65ae3e2
Author: Colin Patrick Mccabe <cmcc...@cloudera.com>
Authored: Tue Mar 10 18:20:25 2015 -0700
Committer: Sangjin Lee <sj...@apache.org>
Committed: Thu Aug 13 00:06:16 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hdfs/server/common/Storage.java      |  2 +-
 .../datanode/fsdataset/impl/FsDatasetImpl.java  | 16 ++++++-
 .../datanode/TestDataNodeHotSwapVolumes.java    | 34 ++------------
 .../fsdataset/impl/FsDatasetTestUtil.java       | 49 ++++++++++++++++++++
 .../fsdataset/impl/TestFsDatasetImpl.java       | 41 ++++++++++++++++
 5 files changed, 109 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/c723f3b1/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
index 14b52ce..8d0129a 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
@@ -672,7 +672,7 @@ public abstract class Storage extends StorageInfo {
      */
     public void lock() throws IOException {
       if (isShared()) {
-        LOG.info("Locking is disabled");
+        LOG.info("Locking is disabled for " + this.root);
         return;
       }
       FileLock newLock = tryLock();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c723f3b1/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
index cbcf6b8..f24d644 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
@@ -46,6 +46,7 @@ import javax.management.NotCompliantMBeanException;
 import javax.management.ObjectName;
 import javax.management.StandardMBean;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
 import com.google.common.base.Preconditions;
 import org.apache.commons.logging.Log;
@@ -322,6 +323,12 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
     LOG.info("Added volume - " + dir + ", StorageType: " + storageType);
   }
 
+  @VisibleForTesting
+  public FsVolumeImpl createFsVolume(String storageUuid, File currentDir,
+      StorageType storageType) throws IOException {
+    return new FsVolumeImpl(this, storageUuid, currentDir, conf, storageType);
+  }
+
   @Override
   public void addVolume(final StorageLocation location,
       final List<NamespaceInfo> nsInfos)
@@ -335,8 +342,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
     final Storage.StorageDirectory sd = builder.getStorageDirectory();
 
     StorageType storageType = location.getStorageType();
-    final FsVolumeImpl fsVolume = new FsVolumeImpl(
-        this, sd.getStorageUuid(), sd.getCurrentDir(), this.conf, storageType);
+    final FsVolumeImpl fsVolume =
+        createFsVolume(sd.getStorageUuid(), sd.getCurrentDir(), storageType);
     final ReplicaMap tempVolumeMap = new ReplicaMap(fsVolume);
     ArrayList<IOException> exceptions = Lists.newArrayList();
 
@@ -352,6 +359,11 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
       }
     }
     if (!exceptions.isEmpty()) {
+      try {
+        sd.unlock();
+      } catch (IOException e) {
+        exceptions.add(e);
+      }
       throw MultipleIOException.createIOException(exceptions);
     }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c723f3b1/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java
index 7ed0421..cde7264 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java
@@ -36,18 +36,14 @@ import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
+import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetTestUtil;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
-import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.After;
 import org.junit.Test;
 
 import java.io.File;
 import java.io.IOException;
-import java.io.RandomAccessFile;
-import java.nio.channels.FileChannel;
-import java.nio.channels.FileLock;
-import java.nio.channels.OverlappingFileLockException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -66,7 +62,6 @@ import static org.hamcrest.CoreMatchers.not;
 import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -528,31 +523,10 @@ public class TestDataNodeHotSwapVolumes {
   private static void assertFileLocksReleased(Collection<String> dirs)
       throws IOException {
     for (String dir: dirs) {
-      StorageLocation sl = StorageLocation.parse(dir);
-      File lockFile = new File(sl.getFile(), Storage.STORAGE_FILE_LOCK);
-      RandomAccessFile raf = null;
-      FileChannel channel = null;
-      FileLock lock = null;
       try {
-        raf = new RandomAccessFile(lockFile, "rws");
-        channel = raf.getChannel();
-        lock = channel.tryLock();
-        assertNotNull(String.format(
-          "Lock file at %s appears to be held by a different process.",
-          lockFile.getAbsolutePath()), lock);
-      } catch (OverlappingFileLockException e) {
-        fail(String.format("Must release lock file at %s.",
-          lockFile.getAbsolutePath()));
-      } finally {
-        if (lock != null) {
-          try {
-            lock.release();
-          } catch (IOException e) {
-            LOG.warn(String.format("I/O error releasing file lock %s.",
-              lockFile.getAbsolutePath()), e);
-          }
-        }
-        IOUtils.cleanup(null, channel, raf);
+        FsDatasetTestUtil.assertFileLockReleased(dir);
+      } catch (IOException e) {
+        LOG.warn(e);
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c723f3b1/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java
index f9e30e1..52e011b 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java
@@ -19,13 +19,24 @@ package 
org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
 
 import java.io.File;
 import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileLock;
+import java.nio.channels.OverlappingFileLockException;
 import java.util.Collection;
+import java.util.Random;
 
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo;
+import org.apache.hadoop.hdfs.server.datanode.StorageLocation;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
+import org.apache.hadoop.io.IOUtils;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
 
 public class FsDatasetTestUtil {
 
@@ -72,4 +83,42 @@ public class FsDatasetTestUtil {
     FsDatasetImpl fsDataset = ((FsDatasetImpl) dn.getFSDataset());
     ((FsDatasetImpl.LazyWriter) fsDataset.lazyWriter.getRunnable()).stop();
   }
+
+  /**
+   * Asserts that the storage lock file in the given directory has been
+   * released.  This method works by trying to acquire the lock file itself.  
If
+   * locking fails here, then the main code must have failed to release it.
+   *
+   * @param dir the storage directory to check
+   * @throws IOException if there is an unexpected I/O error
+   */
+  public static void assertFileLockReleased(String dir) throws IOException {
+    StorageLocation sl = StorageLocation.parse(dir);
+    File lockFile = new File(sl.getFile(), Storage.STORAGE_FILE_LOCK);
+    RandomAccessFile raf = new RandomAccessFile(lockFile, "rws");
+    FileChannel channel = raf.getChannel();
+    try {
+      FileLock lock = channel.tryLock();
+      assertNotNull(String.format(
+          "Lock file at %s appears to be held by a different process.",
+          lockFile.getAbsolutePath()), lock);
+      if (lock != null) {
+        try {
+          lock.release();
+        } catch (IOException e) {
+          FsDatasetImpl.LOG.warn(String.format("I/O error releasing file lock 
%s.",
+              lockFile.getAbsolutePath()), e);
+          throw e;
+        }
+      }
+    } catch (OverlappingFileLockException e) {
+      fail(String.format("Must release lock file at %s.",
+          lockFile.getAbsolutePath()));
+    } finally {
+      try {
+        channel.close();
+        raf.close();
+      } catch (IOException ignore) {}
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c723f3b1/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
index 09934c2..0a9776d 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
@@ -36,11 +36,13 @@ import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo;
 import org.apache.hadoop.hdfs.server.datanode.StorageLocation;
 import 
org.apache.hadoop.hdfs.server.datanode.fsdataset.RoundRobinVolumeChoosingPolicy;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
+import org.apache.hadoop.io.MultipleIOException;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.DiskChecker;
 import org.apache.hadoop.util.StringUtils;
 import org.junit.Before;
 import org.junit.Test;
+import org.mockito.Matchers;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
@@ -59,13 +61,16 @@ import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyList;
 import static org.mockito.Matchers.anyListOf;
 import static org.mockito.Matchers.anyString;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.doThrow;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
@@ -321,4 +326,40 @@ public class TestFsDatasetImpl {
     assertFalse(volumeList.getVolumes().contains(brokenVolume));
     assertEquals(NUM_VOLUMES - 1, volumeList.getVolumes().size());
   }
+
+  @Test
+  public void testAddVolumeFailureReleasesInUseLock() throws IOException {
+    FsDatasetImpl spyDataset = spy(dataset);
+    FsVolumeImpl mockVolume = mock(FsVolumeImpl.class);
+    File badDir = new File(BASE_DIR, "bad");
+    badDir.mkdirs();
+    doReturn(mockVolume).when(spyDataset)
+        .createFsVolume(anyString(), any(File.class), any(StorageType.class));
+    doThrow(new IOException("Failed to getVolumeMap()"))
+      .when(mockVolume).getVolumeMap(
+        anyString(),
+        any(ReplicaMap.class),
+        any(RamDiskReplicaLruTracker.class));
+
+    Storage.StorageDirectory sd = createStorageDirectory(badDir);
+    sd.lock();
+    DataStorage.VolumeBuilder builder = new DataStorage.VolumeBuilder(storage, 
sd);
+    when(storage.prepareVolume(eq(datanode), eq(badDir),
+        Matchers.<List<NamespaceInfo>>any()))
+        .thenReturn(builder);
+
+    StorageLocation location = StorageLocation.parse(badDir.toString());
+    List<NamespaceInfo> nsInfos = Lists.newArrayList();
+    for (String bpid : BLOCK_POOL_IDS) {
+      nsInfos.add(new NamespaceInfo(0, CLUSTER_ID, bpid, 1));
+    }
+
+    try {
+      spyDataset.addVolume(location, nsInfos);
+      fail("Expect to throw MultipleIOException");
+    } catch (MultipleIOException e) {
+    }
+
+    FsDatasetTestUtil.assertFileLockReleased(badDir.toString());
+  }
 }

Reply via email to