HDFS-7830. DataNode does not release the volume lock when adding a volume fails. (Lei Xu via Colin P. McCabe)
(cherry picked from commit 5c1036d598051cf6af595740f1ab82092b0b6554) (cherry picked from commit eefca23e8c5e474de1e25bf2ec8a5b266bbe8cfe) Conflicts: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c723f3b1 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c723f3b1 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c723f3b1 Branch: refs/heads/sjlee/hdfs-merge Commit: c723f3b1bd9eab261ab5edca33c4dae5ce3d0d30 Parents: 65ae3e2 Author: Colin Patrick Mccabe <cmcc...@cloudera.com> Authored: Tue Mar 10 18:20:25 2015 -0700 Committer: Sangjin Lee <sj...@apache.org> Committed: Thu Aug 13 00:06:16 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hdfs/server/common/Storage.java | 2 +- .../datanode/fsdataset/impl/FsDatasetImpl.java | 16 ++++++- .../datanode/TestDataNodeHotSwapVolumes.java | 34 ++------------ .../fsdataset/impl/FsDatasetTestUtil.java | 49 ++++++++++++++++++++ .../fsdataset/impl/TestFsDatasetImpl.java | 41 ++++++++++++++++ 5 files changed, 109 insertions(+), 33 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/c723f3b1/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java index 14b52ce..8d0129a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java @@ -672,7 +672,7 @@ public abstract class Storage extends StorageInfo { */ public void lock() throws IOException { if (isShared()) { - LOG.info("Locking is disabled"); + LOG.info("Locking is disabled for " + this.root); return; } FileLock newLock = tryLock(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/c723f3b1/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index cbcf6b8..f24d644 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -46,6 +46,7 @@ import javax.management.NotCompliantMBeanException; import javax.management.ObjectName; import javax.management.StandardMBean; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import com.google.common.base.Preconditions; import org.apache.commons.logging.Log; @@ -322,6 +323,12 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> { LOG.info("Added volume - " + dir + ", StorageType: " + storageType); } + @VisibleForTesting + public FsVolumeImpl createFsVolume(String storageUuid, File currentDir, + StorageType storageType) throws IOException { + return new FsVolumeImpl(this, storageUuid, currentDir, conf, storageType); + } + @Override public void addVolume(final StorageLocation location, final List<NamespaceInfo> nsInfos) @@ -335,8 +342,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> { final Storage.StorageDirectory sd = builder.getStorageDirectory(); StorageType storageType = location.getStorageType(); - final FsVolumeImpl fsVolume = new FsVolumeImpl( - this, sd.getStorageUuid(), sd.getCurrentDir(), this.conf, storageType); + final FsVolumeImpl fsVolume = + createFsVolume(sd.getStorageUuid(), sd.getCurrentDir(), storageType); final ReplicaMap tempVolumeMap = new ReplicaMap(fsVolume); ArrayList<IOException> exceptions = Lists.newArrayList(); @@ -352,6 +359,11 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> { } } if (!exceptions.isEmpty()) { + try { + sd.unlock(); + } catch (IOException e) { + exceptions.add(e); + } throw MultipleIOException.createIOException(exceptions); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/c723f3b1/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java index 7ed0421..cde7264 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java @@ -36,18 +36,14 @@ import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetTestUtil; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; -import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.test.GenericTestUtils; import org.junit.After; import org.junit.Test; import java.io.File; import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.channels.FileChannel; -import java.nio.channels.FileLock; -import java.nio.channels.OverlappingFileLockException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -66,7 +62,6 @@ import static org.hamcrest.CoreMatchers.not; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -528,31 +523,10 @@ public class TestDataNodeHotSwapVolumes { private static void assertFileLocksReleased(Collection<String> dirs) throws IOException { for (String dir: dirs) { - StorageLocation sl = StorageLocation.parse(dir); - File lockFile = new File(sl.getFile(), Storage.STORAGE_FILE_LOCK); - RandomAccessFile raf = null; - FileChannel channel = null; - FileLock lock = null; try { - raf = new RandomAccessFile(lockFile, "rws"); - channel = raf.getChannel(); - lock = channel.tryLock(); - assertNotNull(String.format( - "Lock file at %s appears to be held by a different process.", - lockFile.getAbsolutePath()), lock); - } catch (OverlappingFileLockException e) { - fail(String.format("Must release lock file at %s.", - lockFile.getAbsolutePath())); - } finally { - if (lock != null) { - try { - lock.release(); - } catch (IOException e) { - LOG.warn(String.format("I/O error releasing file lock %s.", - lockFile.getAbsolutePath()), e); - } - } - IOUtils.cleanup(null, channel, raf); + FsDatasetTestUtil.assertFileLockReleased(dir); + } catch (IOException e) { + LOG.warn(e); } } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/c723f3b1/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java index f9e30e1..52e011b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java @@ -19,13 +19,24 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl; import java.io.File; import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.nio.channels.FileLock; +import java.nio.channels.OverlappingFileLockException; import java.util.Collection; +import java.util.Random; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo; +import org.apache.hadoop.hdfs.server.datanode.StorageLocation; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; +import org.apache.hadoop.io.IOUtils; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; public class FsDatasetTestUtil { @@ -72,4 +83,42 @@ public class FsDatasetTestUtil { FsDatasetImpl fsDataset = ((FsDatasetImpl) dn.getFSDataset()); ((FsDatasetImpl.LazyWriter) fsDataset.lazyWriter.getRunnable()).stop(); } + + /** + * Asserts that the storage lock file in the given directory has been + * released. This method works by trying to acquire the lock file itself. If + * locking fails here, then the main code must have failed to release it. + * + * @param dir the storage directory to check + * @throws IOException if there is an unexpected I/O error + */ + public static void assertFileLockReleased(String dir) throws IOException { + StorageLocation sl = StorageLocation.parse(dir); + File lockFile = new File(sl.getFile(), Storage.STORAGE_FILE_LOCK); + RandomAccessFile raf = new RandomAccessFile(lockFile, "rws"); + FileChannel channel = raf.getChannel(); + try { + FileLock lock = channel.tryLock(); + assertNotNull(String.format( + "Lock file at %s appears to be held by a different process.", + lockFile.getAbsolutePath()), lock); + if (lock != null) { + try { + lock.release(); + } catch (IOException e) { + FsDatasetImpl.LOG.warn(String.format("I/O error releasing file lock %s.", + lockFile.getAbsolutePath()), e); + throw e; + } + } + } catch (OverlappingFileLockException e) { + fail(String.format("Must release lock file at %s.", + lockFile.getAbsolutePath())); + } finally { + try { + channel.close(); + raf.close(); + } catch (IOException ignore) {} + } + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/c723f3b1/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java index 09934c2..0a9776d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java @@ -36,11 +36,13 @@ import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo; import org.apache.hadoop.hdfs.server.datanode.StorageLocation; import org.apache.hadoop.hdfs.server.datanode.fsdataset.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; +import org.apache.hadoop.io.MultipleIOException; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.DiskChecker; import org.apache.hadoop.util.StringUtils; import org.junit.Before; import org.junit.Test; +import org.mockito.Matchers; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -59,13 +61,16 @@ import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyList; import static org.mockito.Matchers.anyListOf; import static org.mockito.Matchers.anyString; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -321,4 +326,40 @@ public class TestFsDatasetImpl { assertFalse(volumeList.getVolumes().contains(brokenVolume)); assertEquals(NUM_VOLUMES - 1, volumeList.getVolumes().size()); } + + @Test + public void testAddVolumeFailureReleasesInUseLock() throws IOException { + FsDatasetImpl spyDataset = spy(dataset); + FsVolumeImpl mockVolume = mock(FsVolumeImpl.class); + File badDir = new File(BASE_DIR, "bad"); + badDir.mkdirs(); + doReturn(mockVolume).when(spyDataset) + .createFsVolume(anyString(), any(File.class), any(StorageType.class)); + doThrow(new IOException("Failed to getVolumeMap()")) + .when(mockVolume).getVolumeMap( + anyString(), + any(ReplicaMap.class), + any(RamDiskReplicaLruTracker.class)); + + Storage.StorageDirectory sd = createStorageDirectory(badDir); + sd.lock(); + DataStorage.VolumeBuilder builder = new DataStorage.VolumeBuilder(storage, sd); + when(storage.prepareVolume(eq(datanode), eq(badDir), + Matchers.<List<NamespaceInfo>>any())) + .thenReturn(builder); + + StorageLocation location = StorageLocation.parse(badDir.toString()); + List<NamespaceInfo> nsInfos = Lists.newArrayList(); + for (String bpid : BLOCK_POOL_IDS) { + nsInfos.add(new NamespaceInfo(0, CLUSTER_ID, bpid, 1)); + } + + try { + spyDataset.addVolume(location, nsInfos); + fail("Expect to throw MultipleIOException"); + } catch (MultipleIOException e) { + } + + FsDatasetTestUtil.assertFileLockReleased(badDir.toString()); + } }