[ https://issues.apache.org/jira/browse/HDFS-13220?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16915401#comment-16915401 ]
Hadoop QA commented on HDFS-13220: ---------------------------------- | (x) *{color:red}-1 overall{color}* | \\ \\ || Vote || Subsystem || Runtime || Comment || | {color:blue}0{color} | {color:blue} reexec {color} | {color:blue} 12m 7s{color} | {color:blue} Docker mode activated. {color} | || || || || {color:brown} Prechecks {color} || | {color:green}+1{color} | {color:green} @author {color} | {color:green} 0m 0s{color} | {color:green} The patch does not contain any @author tags. {color} | | {color:red}-1{color} | {color:red} test4tests {color} | {color:red} 0m 0s{color} | {color:red} The patch doesn't appear to include any new or modified tests. Please justify why no new tests are needed for this patch. Also please list what manual steps were performed to verify this patch. {color} | || || || || {color:brown} trunk Compile Tests {color} || | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 28m 38s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 1m 8s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 0m 58s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 1m 19s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} shadedclient {color} | {color:green} 15m 54s{color} | {color:green} branch has no errors when building and testing our client artifacts. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 2m 32s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 1m 1s{color} | {color:green} trunk passed {color} | || || || || {color:brown} Patch Compile Tests {color} || | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 1m 13s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 1m 6s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javac {color} | {color:green} 1m 6s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 0m 42s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 1m 10s{color} | {color:green} the patch passed {color} | | {color:red}-1{color} | {color:red} whitespace {color} | {color:red} 0m 0s{color} | {color:red} The patch has 2 line(s) that end in whitespace. Use git apply --whitespace=fix <<patch_file>>. Refer https://git-scm.com/docs/git-apply {color} | | {color:green}+1{color} | {color:green} shadedclient {color} | {color:green} 14m 29s{color} | {color:green} patch has no errors when building and testing our client artifacts. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 2m 42s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 1m 2s{color} | {color:green} the patch passed {color} | || || || || {color:brown} Other Tests {color} || | {color:red}-1{color} | {color:red} unit {color} | {color:red} 56m 11s{color} | {color:red} hadoop-hdfs in the patch failed. {color} | | {color:red}-1{color} | {color:red} asflicense {color} | {color:red} 0m 36s{color} | {color:red} The patch generated 7 ASF License warnings. {color} | | {color:black}{color} | {color:black} {color} | {color:black}142m 48s{color} | {color:black} {color} | \\ \\ || Reason || Tests || | Failed junit tests | hadoop.hdfs.TestFileCreation | | | hadoop.hdfs.server.balancer.TestBalancerWithNodeGroup | | | hadoop.hdfs.TestBlockTokenWrappingQOP | | | hadoop.hdfs.TestBlocksScheduledCounter | | | hadoop.hdfs.TestMaintenanceState | | | hadoop.hdfs.TestDFSStripedInputStream | | | hadoop.hdfs.server.balancer.TestBalancerService | | | hadoop.hdfs.server.datanode.TestDataNodeInitStorage | | | hadoop.hdfs.TestDFSStripedOutputStreamWithFailure | | | hadoop.hdfs.server.datanode.TestBPOfferService | | | hadoop.hdfs.server.datanode.TestDataNodeMetrics | | | hadoop.hdfs.TestErasureCodingExerciseAPIs | | | hadoop.hdfs.TestPread | | | hadoop.hdfs.TestErasureCodingPolicies | | | hadoop.hdfs.TestErasureCodingPoliciesWithRandomECPolicy | | | hadoop.hdfs.TestHDFSFileSystemContract | | | hadoop.hdfs.server.balancer.TestBalancerWithSaslDataTransfer | | | hadoop.hdfs.TestFileChecksumCompositeCrc | | | hadoop.hdfs.server.balancer.TestBalancerRPCDelay | | | hadoop.hdfs.TestQuota | \\ \\ || Subsystem || Report/Notes || | Docker | Client=19.03.1 Server=19.03.1 Image:yetus/hadoop:bdbca0e53b4 | | JIRA Issue | HDFS-13220 | | JIRA Patch URL | https://issues.apache.org/jira/secure/attachment/12978532/HDFS-13220.patch | | Optional Tests | dupname asflicense compile javac javadoc mvninstall mvnsite unit shadedclient findbugs checkstyle | | uname | Linux fa8cc60a5398 4.15.0-58-generic #64-Ubuntu SMP Tue Aug 6 11:12:41 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | /testptch/patchprocess/precommit/personality/provided.sh | | git revision | trunk / d2225c8 | | maven | version: Apache Maven 3.3.9 | | Default Java | 1.8.0_222 | | findbugs | v3.1.0-RC1 | | whitespace | https://builds.apache.org/job/PreCommit-HDFS-Build/27669/artifact/out/whitespace-eol.txt | | unit | https://builds.apache.org/job/PreCommit-HDFS-Build/27669/artifact/out/patch-unit-hadoop-hdfs-project_hadoop-hdfs.txt | | Test Results | https://builds.apache.org/job/PreCommit-HDFS-Build/27669/testReport/ | | asflicense | https://builds.apache.org/job/PreCommit-HDFS-Build/27669/artifact/out/patch-asflicense-problems.txt | | Max. process+thread count | 3781 (vs. ulimit of 5500) | | modules | C: hadoop-hdfs-project/hadoop-hdfs U: hadoop-hdfs-project/hadoop-hdfs | | Console output | https://builds.apache.org/job/PreCommit-HDFS-Build/27669/console | | Powered by | Apache Yetus 0.8.0 http://yetus.apache.org | This message was automatically generated. > Change lastCheckpointTime to use fsimage mostRecentCheckpointTime > ----------------------------------------------------------------- > > Key: HDFS-13220 > URL: https://issues.apache.org/jira/browse/HDFS-13220 > Project: Hadoop HDFS > Issue Type: Bug > Components: namenode > Reporter: Nie Gus > Assignee: hemanthboyina > Priority: Minor > Attachments: HDFS-13220.patch > > > we found the our standby nn did not do the checkpoint, and the checkpoint > alert keep alert, we use the jmx last checkpoint time and > dfs.namenode.checkpoint.period to do the monitor check. > > then check the code and log, found the standby NN are using monotonicNow, not > fsimage checkpoint time, so when Standby NN restart or switch to Active, then > the > lastCheckpointTime in doWork will be reset. so there is risk standby nn > restart or stand active switch will cause the checkpoint delay. > StandbyCheckpointer.java > {code:java} > private void doWork() { > final long checkPeriod = 1000 * checkpointConf.getCheckPeriod(); > // Reset checkpoint time so that we don't always checkpoint > // on startup. > lastCheckpointTime = monotonicNow(); > while (shouldRun) { > boolean needRollbackCheckpoint = namesystem.isNeedRollbackFsImage(); > if (!needRollbackCheckpoint) { > try { > Thread.sleep(checkPeriod); > } catch (InterruptedException ie) { > } > if (!shouldRun) { > break; > } > } > try { > // We may have lost our ticket since last checkpoint, log in again, just in > case > if (UserGroupInformation.isSecurityEnabled()) { > UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab(); > } > final long now = monotonicNow(); > final long uncheckpointed = countUncheckpointedTxns(); > final long secsSinceLast = (now - lastCheckpointTime) / 1000; > boolean needCheckpoint = needRollbackCheckpoint; > if (needCheckpoint) { > LOG.info("Triggering a rollback fsimage for rolling upgrade."); > } else if (uncheckpointed >= checkpointConf.getTxnCount()) { > LOG.info("Triggering checkpoint because there have been " + > uncheckpointed + " txns since the last checkpoint, which " + > "exceeds the configured threshold " + > checkpointConf.getTxnCount()); > needCheckpoint = true; > } else if (secsSinceLast >= checkpointConf.getPeriod()) { > LOG.info("Triggering checkpoint because it has been " + > secsSinceLast + " seconds since the last checkpoint, which " + > "exceeds the configured interval " + checkpointConf.getPeriod()); > needCheckpoint = true; > } > synchronized (cancelLock) { > if (now < preventCheckpointsUntil) { > LOG.info("But skipping this checkpoint since we are about to failover!"); > canceledCount++; > continue; > } > assert canceler == null; > canceler = new Canceler(); > } > if (needCheckpoint) { > doCheckpoint(); > // reset needRollbackCheckpoint to false only when we finish a ckpt > // for rollback image > if (needRollbackCheckpoint > && namesystem.getFSImage().hasRollbackFSImage()) { > namesystem.setCreatedRollbackImages(true); > namesystem.setNeedRollbackFsImage(false); > } > lastCheckpointTime = now; > } > } catch (SaveNamespaceCancelledException ce) { > LOG.info("Checkpoint was cancelled: " + ce.getMessage()); > canceledCount++; > } catch (InterruptedException ie) { > LOG.info("Interrupted during checkpointing", ie); > // Probably requested shutdown. > continue; > } catch (Throwable t) { > LOG.error("Exception in doCheckpoint", t); > } finally { > synchronized (cancelLock) { > canceler = null; > } > } > } > } > } > {code} > > can we use the fsimage's mostRecentCheckpointTime to do the check. > > thanks, > Gus -- This message was sent by Atlassian Jira (v8.3.2#803003) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org