HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor goes for infinite loop. Contributed by Rushabh Shah.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/ea11590a Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/ea11590a Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/ea11590a Branch: refs/heads/HDFS-7240 Commit: ea11590aad952b5b560a5101d064adf27d8656db Parents: cbea5d2 Author: Kihwal Lee <kih...@apache.org> Authored: Mon May 11 14:30:35 2015 -0500 Committer: Kihwal Lee <kih...@apache.org> Committed: Mon May 11 14:31:58 2015 -0500 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../hdfs/server/datanode/ErrorReportAction.java | 4 ++ .../server/datanode/ReportBadBlockAction.java | 4 ++ .../server/datanode/TestBPOfferService.java | 54 ++++++++++++++++++++ 4 files changed, 65 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/ea11590a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 8060644..b67caed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -814,6 +814,9 @@ Release 2.7.1 - UNRELEASED HDFS-8254. Standby namenode doesn't process DELETED_BLOCK if the add block request is in edit log. (Rushabh S Shah via kihwal) + HDFS-7916. 'reportBadBlocks' from datanodes to standby Node BPServiceActor + goes for infinite loop (Rushabh S Shah via kihwal) + Release 2.7.0 - 2015-04-20 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/ea11590a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java index 331822a..b7a9dae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java @@ -22,6 +22,7 @@ import java.io.IOException; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; +import org.apache.hadoop.ipc.RemoteException; /** @@ -43,6 +44,9 @@ public class ErrorReportAction implements BPServiceActorAction { DatanodeRegistration bpRegistration) throws BPServiceActorActionException { try { bpNamenode.errorReport(bpRegistration, errorCode, errorMessage); + } catch (RemoteException re) { + DataNode.LOG.info("trySendErrorReport encountered RemoteException " + + "errorMessage: " + errorMessage + " errorCode: " + errorCode, re); } catch(IOException e) { throw new BPServiceActorActionException("Error reporting " + "an error to namenode: "); http://git-wip-us.apache.org/repos/asf/hadoop/blob/ea11590a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java index 7155eae..671a1fe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; +import org.apache.hadoop.ipc.RemoteException; /** * ReportBadBlockAction is an instruction issued by {{BPOfferService}} to @@ -59,6 +60,9 @@ public class ReportBadBlockAction implements BPServiceActorAction { try { bpNamenode.reportBadBlocks(locatedBlock); + } catch (RemoteException re) { + DataNode.LOG.info("reportBadBlock encountered RemoteException for " + + "block: " + block , re); } catch (IOException e) { throw new BPServiceActorActionException("Failed to report bad block " + block + " to namenode: "); http://git-wip-us.apache.org/repos/asf/hadoop/blob/ea11590a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java index 3aa9a7b..64cc78b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java @@ -55,6 +55,9 @@ import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport; import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks; import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; +import org.apache.hadoop.ipc.RemoteException; +import org.apache.hadoop.ipc.StandbyException; +import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcErrorCodeProto; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.PathUtils; import org.apache.hadoop.util.Time; @@ -621,4 +624,55 @@ public class TestBPOfferService { bpos.stop(); } } + + /** + * This test case doesn't add the reportBadBlock request to + * {@link BPServiceActor#bpThreadEnqueue} when the Standby namenode throws + * {@link StandbyException} + * @throws Exception + */ + @Test + public void testReportBadBlocksWhenNNThrowsStandbyException() + throws Exception { + BPOfferService bpos = setupBPOSForNNs(mockNN1, mockNN2); + bpos.start(); + try { + waitForInitialization(bpos); + // Should start with neither NN as active. + assertNull(bpos.getActiveNN()); + // Have NN1 claim active at txid 1 + mockHaStatuses[0] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1); + bpos.triggerHeartbeatForTests(); + // Now mockNN1 is acting like active namenode and mockNN2 as Standby + assertSame(mockNN1, bpos.getActiveNN()); + // Return nothing when active Active Namenode calls reportBadBlocks + Mockito.doNothing().when(mockNN1).reportBadBlocks + (Mockito.any(LocatedBlock[].class)); + + RemoteException re = new RemoteException(StandbyException.class. + getName(), "Operation category WRITE is not supported in state " + + "standby", RpcErrorCodeProto.ERROR_APPLICATION); + // Return StandbyException wrapped in RemoteException when Standby NN + // calls reportBadBlocks + Mockito.doThrow(re).when(mockNN2).reportBadBlocks + (Mockito.any(LocatedBlock[].class)); + + bpos.reportBadBlocks(FAKE_BLOCK, mockFSDataset.getVolume(FAKE_BLOCK) + .getStorageID(), mockFSDataset.getVolume(FAKE_BLOCK) + .getStorageType()); + // Send heartbeat so that the BpServiceActor can report bad block to + // namenode + bpos.triggerHeartbeatForTests(); + Mockito.verify(mockNN2, Mockito.times(1)) + .reportBadBlocks(Mockito.any(LocatedBlock[].class)); + + // Trigger another heartbeat, this will send reportBadBlock again if it + // is present in the queue. + bpos.triggerHeartbeatForTests(); + Mockito.verify(mockNN2, Mockito.times(1)) + .reportBadBlocks(Mockito.any(LocatedBlock[].class)); + } finally { + bpos.stop(); + } + } }