HDFS-8778. TestBlockReportRateLimiting#testLeaseExpiration can deadlock. (Contributed by Arpit Agarwal)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/3ec0a044 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/3ec0a044 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/3ec0a044 Branch: refs/heads/HADOOP-12111 Commit: 3ec0a0444f75c8743289ec7c8645d4bdf51fc45a Parents: edcaae4 Author: Arpit Agarwal <[email protected]> Authored: Wed Jul 15 14:08:58 2015 -0700 Committer: Arpit Agarwal <[email protected]> Committed: Wed Jul 15 14:08:58 2015 -0700 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 5 +- .../TestBlockReportRateLimiting.java | 64 ++++++-------------- 2 files changed, 23 insertions(+), 46 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/3ec0a044/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 20bdef0..8f6dd41 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1044,6 +1044,9 @@ Release 2.8.0 - UNRELEASED HDFS-7608: hdfs dfsclient newConnectedPeer has no write timeout (Xiaoyu Yao via Colin P. McCabe) + HDFS-8778. TestBlockReportRateLimiting#testLeaseExpiration can deadlock. + (Arpit Agarwal) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES @@ -1059,7 +1062,7 @@ Release 2.7.2 - UNRELEASED HDFS-8722. Optimize datanode writes for small writes and flushes (kihwal) BUG FIXES - + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/3ec0a044/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportRateLimiting.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportRateLimiting.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportRateLimiting.java index fc5f9e7..86a7511 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportRateLimiting.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportRateLimiting.java @@ -24,7 +24,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_FULL_BLOCK_REPOR import com.google.common.base.Joiner; import com.google.common.base.Supplier; import com.google.common.util.concurrent.Uninterruptibles; -import org.apache.commons.lang.mutable.MutableObject; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -42,8 +41,6 @@ import org.junit.Test; import java.io.IOException; import java.util.HashSet; import java.util.List; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.BlockingQueue; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; @@ -174,13 +171,11 @@ public class TestBlockReportRateLimiting { conf.setLong(DFS_NAMENODE_FULL_BLOCK_REPORT_LEASE_LENGTH_MS, 100L); final Semaphore gotFbrSem = new Semaphore(0); - final AtomicReference<String> failure = new AtomicReference<String>(""); + final AtomicReference<String> failure = new AtomicReference<>(); final AtomicReference<MiniDFSCluster> cluster = - new AtomicReference<>(null); - final BlockingQueue<Integer> datanodeToStop = - new ArrayBlockingQueue<Integer>(1); + new AtomicReference<>(); + final AtomicReference<String> datanodeToStop = new AtomicReference<>(); final BlockManagerFaultInjector injector = new BlockManagerFaultInjector() { - private String uuidToStop = ""; @Override public void incomingBlockReportRpc(DatanodeID nodeID, @@ -189,11 +184,9 @@ public class TestBlockReportRateLimiting { setFailure(failure, "Got unexpected rate-limiting-" + "bypassing full block report RPC from " + nodeID); } - synchronized (this) { - if (uuidToStop.equals(nodeID.getDatanodeUuid())) { - throw new IOException("Injecting failure into block " + - "report RPC for " + nodeID); - } + if (nodeID.getXferAddr().equals(datanodeToStop.get())) { + throw new IOException("Injecting failure into block " + + "report RPC for " + nodeID); } gotFbrSem.release(); } @@ -204,43 +197,24 @@ public class TestBlockReportRateLimiting { if (leaseId == 0) { return; } - synchronized (this) { - if (uuidToStop.isEmpty()) { - MiniDFSCluster cl; - do { - cl = cluster.get(); - } while (cl == null); - int datanodeIndexToStop = getDatanodeIndex(cl, node); - uuidToStop = node.getDatanodeUuid(); - datanodeToStop.add(Integer.valueOf(datanodeIndexToStop)); - } - } - } - - private int getDatanodeIndex(MiniDFSCluster cl, - DatanodeDescriptor node) { - List<DataNode> datanodes = cl.getDataNodes(); - for (int i = 0; i < datanodes.size(); i++) { - DataNode datanode = datanodes.get(i); - if (datanode.getDatanodeUuid().equals(node.getDatanodeUuid())) { - return i; - } - } - throw new RuntimeException("Failed to find UUID " + - node.getDatanodeUuid() + " in the list of datanodes."); + datanodeToStop.compareAndSet(null, node.getXferAddr()); } @Override public void removeBlockReportLease(DatanodeDescriptor node, long leaseId) { } }; - BlockManagerFaultInjector.instance = injector; - cluster.set(new MiniDFSCluster.Builder(conf).numDataNodes(2).build()); - cluster.get().waitActive(); - int datanodeIndexToStop = datanodeToStop.take(); - cluster.get().stopDataNode(datanodeIndexToStop); - gotFbrSem.acquire(); - cluster.get().shutdown(); - Assert.assertEquals("", failure.get()); + try { + BlockManagerFaultInjector.instance = injector; + cluster.set(new MiniDFSCluster.Builder(conf).numDataNodes(2).build()); + cluster.get().waitActive(); + Assert.assertNotNull(cluster.get().stopDataNode(datanodeToStop.get())); + gotFbrSem.acquire(); + Assert.assertNull(failure.get()); + } finally { + if (cluster.get() != null) { + cluster.get().shutdown(); + } + } } }
