This is an automated email from the ASF dual-hosted git repository. jhung pushed a commit to branch branch-2.10 in repository https://gitbox.apache.org/repos/asf/hadoop.git
commit 70c2a2992f4488a76351701af2faf7f6e5b05678 Author: Chen Liang <cli...@apache.org> AuthorDate: Thu Dec 12 12:12:44 2019 -0800 HDFS-15036. Active NameNode should not silently fail the image transfer. Contributed by Chen Liang. --- .../hadoop/hdfs/server/namenode/ImageServlet.java | 6 ++++++ .../hdfs/server/namenode/ha/StandbyCheckpointer.java | 12 +++++++++++- .../org/apache/hadoop/hdfs/TestRollingUpgrade.java | 20 ++++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java index ad8b159..3dcc168 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java @@ -573,7 +573,13 @@ public class ImageServlet extends HttpServlet { long timeDelta = TimeUnit.MILLISECONDS.toSeconds( now - lastCheckpointTime); + // Since the goal of the check below is to prevent overly + // frequent upload from Standby, the check should only be done + // for the periodical upload from Standby. For the other + // scenarios such as rollback image and ckpt file, they skip + // this check, see HDFS-15036 for more info. if (checkRecentImageEnable && + NameNodeFile.IMAGE.equals(parsedParams.getNameNodeFile()) && timeDelta < checkpointPeriod && txid - lastCheckpointTxid < checkpointTxnCount) { // only when at least one of two conditions are met we accept diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java index c05a0da..5cac972 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java @@ -289,10 +289,20 @@ public class StandbyCheckpointer { // TODO should there be some smarts here about retries nodes that // are not the active NN? CheckpointReceiverEntry receiverEntry = checkpointReceivers.get(url); - if (upload.get() == TransferFsImage.TransferResult.SUCCESS) { + TransferFsImage.TransferResult uploadResult = upload.get(); + if (uploadResult == TransferFsImage.TransferResult.SUCCESS) { receiverEntry.setLastUploadTime(monotonicNow()); receiverEntry.setIsPrimary(true); } else { + // Getting here means image upload is explicitly rejected + // by the other node. This could happen if: + // 1. the other is also a standby, or + // 2. the other is active, but already accepted another + // newer image, or + // 3. the other is active but has a recent enough image. + // All these are valid cases, just log for information. + LOG.info("Image upload rejected by the other NameNode: " + + uploadResult); receiverEntry.setIsPrimary(false); } } catch (ExecutionException e) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java index 749ec96..579c5ef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java @@ -57,6 +57,7 @@ import org.junit.Assert; import static org.junit.Assert.assertTrue; import org.junit.Test; +import static org.apache.hadoop.hdfs.server.namenode.ImageServlet.RECENT_IMAGE_CHECK_ENABLED; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNull; @@ -430,7 +431,22 @@ public class TestRollingUpgrade { testFinalize(3); } + @Test(timeout = 300000) + public void testFinalizeWithDeltaCheck() throws Exception { + testFinalize(2, true); + } + + @Test(timeout = 300000) + public void testFinalizeWithMultipleNNDeltaCheck() throws Exception { + testFinalize(3, true); + } + private void testFinalize(int nnCount) throws Exception { + testFinalize(nnCount, false); + } + + private void testFinalize(int nnCount, boolean skipImageDeltaCheck) + throws Exception { final Configuration conf = new HdfsConfiguration(); MiniQJMHACluster cluster = null; final Path foo = new Path("/foo"); @@ -449,6 +465,10 @@ public class TestRollingUpgrade { dfsCluster.restartNameNodes(); dfsCluster.transitionToActive(0); + + dfsCluster.getNameNode(0).getHttpServer() + .setAttribute(RECENT_IMAGE_CHECK_ENABLED, skipImageDeltaCheck); + DistributedFileSystem dfs = dfsCluster.getFileSystem(0); dfs.mkdirs(foo); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org