Repository: hadoop Updated Branches: refs/heads/trunk d42806160 -> 1290e3c64
HDFS-10240. Race between close/recoverLease leads to missing block. Contributed by Jinglun, zhouyingchao and Wei-Chiu Chuang. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1290e3c6 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1290e3c6 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1290e3c6 Branch: refs/heads/trunk Commit: 1290e3c647092f0bfbb250731a6805aba1be8e4b Parents: d428061 Author: Wei-Chiu Chuang <weic...@apache.org> Authored: Thu Aug 16 16:29:38 2018 -0700 Committer: Wei-Chiu Chuang <weic...@apache.org> Committed: Thu Aug 16 16:29:38 2018 -0700 ---------------------------------------------------------------------- .../hdfs/server/blockmanagement/BlockInfo.java | 4 ++ .../server/blockmanagement/BlockManager.java | 4 ++ .../hdfs/server/datanode/BPServiceActor.java | 3 +- .../hadoop/hdfs/server/datanode/DataNode.java | 10 +++ .../apache/hadoop/hdfs/TestLeaseRecovery2.java | 65 ++++++++++++++++++++ .../hdfs/server/datanode/DataNodeTestUtils.java | 3 + 6 files changed, 88 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/1290e3c6/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java index 111ade1..43f4f47 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java @@ -262,6 +262,10 @@ public abstract class BlockInfo extends Block return getBlockUCState().equals(BlockUCState.COMPLETE); } + public boolean isUnderRecovery() { + return getBlockUCState().equals(BlockUCState.UNDER_RECOVERY); + } + public final boolean isCompleteOrCommitted() { final BlockUCState state = getBlockUCState(); return state.equals(BlockUCState.COMPLETE) || http://git-wip-us.apache.org/repos/asf/hadoop/blob/1290e3c6/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index d8a3aa3..17f6f6e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -985,6 +985,10 @@ public class BlockManager implements BlockStatsMXBean { return false; // no blocks in file yet if(lastBlock.isComplete()) return false; // already completed (e.g. by syncBlock) + if(lastBlock.isUnderRecovery()) { + throw new IOException("Commit or complete block " + commitBlock + + ", whereas it is under recovery."); + } final boolean committed = commitBlock(lastBlock, commitBlock); if (committed && lastBlock.isStriped()) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/1290e3c6/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index f09ff66..8f7a186 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -684,7 +684,8 @@ class BPServiceActor implements Runnable { } } } - if (ibrManager.sendImmediately() || sendHeartbeat) { + if (!dn.areIBRDisabledForTests() && + (ibrManager.sendImmediately()|| sendHeartbeat)) { ibrManager.sendIBRs(bpNamenode, bpRegistration, bpos.getBlockPoolId()); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/1290e3c6/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index ea3bab6..c980395 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -331,6 +331,7 @@ public class DataNode extends ReconfigurableBase ThreadGroup threadGroup = null; private DNConf dnConf; private volatile boolean heartbeatsDisabledForTests = false; + private volatile boolean ibrDisabledForTests = false; private volatile boolean cacheReportsDisabledForTests = false; private DataStorage storage = null; @@ -1334,6 +1335,15 @@ public class DataNode extends ReconfigurableBase } @VisibleForTesting + void setIBRDisabledForTest(boolean disabled) { + this.ibrDisabledForTests = disabled; + } + + @VisibleForTesting + boolean areIBRDisabledForTests() { + return this.ibrDisabledForTests; + } + void setCacheReportsDisabledForTest(boolean disabled) { this.cacheReportsDisabledForTests = disabled; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/1290e3c6/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery2.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery2.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery2.java index a96d8b3..940e13e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery2.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery2.java @@ -25,6 +25,7 @@ import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.spy; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.Map; @@ -164,6 +165,70 @@ public class TestLeaseRecovery2 { } @Test + public void testCloseWhileRecoverLease() throws Exception { + // test recoverLease + // set the soft limit to be 1 hour but recoverLease should + // close the file immediately + cluster.setLeasePeriod(LONG_LEASE_PERIOD, LONG_LEASE_PERIOD); + int size = AppendTestUtil.nextInt(FILE_SIZE); + String filestr = "/testCloseWhileRecoverLease"; + + AppendTestUtil.LOG.info("filestr=" + filestr); + Path filepath = new Path(filestr); + FSDataOutputStream stm = dfs.create(filepath, true, BUF_SIZE, + REPLICATION_NUM, BLOCK_SIZE); + assertTrue(dfs.dfs.exists(filestr)); + + // hflush file + AppendTestUtil.LOG.info("hflush"); + stm.hflush(); + + // Pause DN block report. + // Let client recover lease, and then close the file, and then let DN + // report blocks. + ArrayList<DataNode> dataNodes = cluster.getDataNodes(); + for (DataNode dn: dataNodes) { + DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, false); + } + + LOG.info("pause IBR"); + for (DataNode dn: dataNodes) { + DataNodeTestUtils.pauseIBR(dn); + } + + AppendTestUtil.LOG.info("size=" + size); + stm.write(buffer, 0, size); + + // hflush file + AppendTestUtil.LOG.info("hflush"); + stm.hflush(); + + LOG.info("recover lease"); + dfs.recoverLease(filepath); + try { + stm.close(); + fail("close() should fail because the file is under recovery."); + } catch (IOException ioe) { + GenericTestUtils.assertExceptionContains( + "whereas it is under recovery", ioe); + } + + for (DataNode dn: dataNodes) { + DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, false); + } + + LOG.info("trigger heartbeats"); + // resume DN block report + for (DataNode dn: dataNodes) { + DataNodeTestUtils.triggerHeartbeat(dn); + } + + stm.close(); + assertEquals(cluster.getNamesystem().getBlockManager(). + getMissingBlocksCount(), 0); + } + + @Test public void testLeaseRecoverByAnotherUser() throws Exception { byte [] actual = new byte[FILE_SIZE]; cluster.setLeasePeriod(SHORT_LEASE_PERIOD, LONG_LEASE_PERIOD); http://git-wip-us.apache.org/repos/asf/hadoop/blob/1290e3c6/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java index 19d9dfc..25eca88 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java @@ -98,6 +98,9 @@ public class DataNodeTestUtils { } } + public static void pauseIBR(DataNode dn) { + dn.setIBRDisabledForTest(true); + } public static InterDatanodeProtocol createInterDatanodeProtocolProxy( DataNode dn, DatanodeID datanodeid, final Configuration conf, boolean connectToDnViaHostname) throws IOException { --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org