[ https://issues.apache.org/jira/browse/HDFS-16143?focusedWorklogId=631850&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-631850 ]
ASF GitHub Bot logged work on HDFS-16143: ----------------------------------------- Author: ASF GitHub Bot Created on: 30/Jul/21 19:14 Start Date: 30/Jul/21 19:14 Worklog Time Spent: 10m Work Description: xkrogen commented on a change in pull request #3235: URL: https://github.com/apache/hadoop/pull/3235#discussion_r680158212 ########## File path: hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java ########## @@ -452,26 +472,20 @@ public void testStandbyTriggersLogRollsWhenTailInProgressEdits() private static void waitForStandbyToCatchUpWithInProgressEdits( final NameNode standby, final long activeTxId, int maxWaitSec) throws Exception { - GenericTestUtils.waitFor(new Supplier<Boolean>() { - @Override - public Boolean get() { - long standbyTxId = standby.getNamesystem().getFSImage() - .getLastAppliedTxId(); - return (standbyTxId >= activeTxId); - } - }, 100, maxWaitSec * 1000); + GenericTestUtils.waitFor(() -> { + long standbyTxId = standby.getNamesystem().getFSImage() + .getLastAppliedTxId(); + return (standbyTxId >= activeTxId); + }, 100, TimeUnit.SECONDS.toMillis(maxWaitSec)); } private static void checkForLogRoll(final NameNode active, final long origTxId, int maxWaitSec) throws Exception { - GenericTestUtils.waitFor(new Supplier<Boolean>() { - @Override - public Boolean get() { - long curSegmentTxId = active.getNamesystem().getFSImage().getEditLog() - .getCurSegmentTxId(); - return (origTxId != curSegmentTxId); - } - }, 100, maxWaitSec * 1000); + GenericTestUtils.waitFor(() -> { + long curSegmentTxId = active.getNamesystem().getFSImage().getEditLog() + .getCurSegmentTxId(); + return (origTxId != curSegmentTxId); + }, 500, TimeUnit.SECONDS.toMillis(maxWaitSec)); Review comment: why is the check interval increased from 100ms to 500ms? ########## File path: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java ########## @@ -423,21 +423,22 @@ void triggerActiveLogRoll() { try { future = rollEditsRpcExecutor.submit(getNameNodeProxy()); future.get(rollEditsTimeoutMs, TimeUnit.MILLISECONDS); - lastRollTimeMs = monotonicNow(); + resetLastRollTimeMs(); lastRollTriggerTxId = lastLoadedTxnId; - } catch (ExecutionException e) { + } catch (ExecutionException | InterruptedException e) { LOG.warn("Unable to trigger a roll of the active NN", e); } catch (TimeoutException e) { - if (future != null) { - future.cancel(true); - } + future.cancel(true); LOG.warn(String.format( "Unable to finish rolling edits in %d ms", rollEditsTimeoutMs)); - } catch (InterruptedException e) { - LOG.warn("Unable to trigger a roll of the active NN", e); } } + @VisibleForTesting + public void resetLastRollTimeMs() { Review comment: package-private instead of public? ########## File path: hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java ########## @@ -433,15 +440,28 @@ public void testStandbyTriggersLogRollsWhenTailInProgressEdits() NameNodeAdapter.mkdirs(active, getDirPath(i), new PermissionStatus("test", "test", new FsPermission((short)00755)), true); + // reset lastRollTimeMs in EditLogTailer. + active.getNamesystem().getEditLogTailer().resetLastRollTimeMs(); } - boolean exceptionThrown = false; + // We should explicitly update lastRollTimeMs in EditLogTailer + // so that our timeout test provided just below can take advantage + // of validation: (monotonicNow() - lastRollTimeMs) > logRollPeriodMs + // provided in EditLogTailer#tooLongSinceLastLoad(). + active.getNamesystem().getEditLogTailer().resetLastRollTimeMs(); Review comment: if you just updated the last roll time on L444 above, why do we need to do it again? ########## File path: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java ########## @@ -423,21 +423,22 @@ void triggerActiveLogRoll() { try { future = rollEditsRpcExecutor.submit(getNameNodeProxy()); future.get(rollEditsTimeoutMs, TimeUnit.MILLISECONDS); - lastRollTimeMs = monotonicNow(); + resetLastRollTimeMs(); lastRollTriggerTxId = lastLoadedTxnId; - } catch (ExecutionException e) { + } catch (ExecutionException | InterruptedException e) { LOG.warn("Unable to trigger a roll of the active NN", e); } catch (TimeoutException e) { - if (future != null) { - future.cancel(true); - } + future.cancel(true); Review comment: why is the null-check removed here? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 631850) Time Spent: 4h (was: 3h 50m) > TestEditLogTailer#testStandbyTriggersLogRollsWhenTailInProgressEdits is flaky > ----------------------------------------------------------------------------- > > Key: HDFS-16143 > URL: https://issues.apache.org/jira/browse/HDFS-16143 > Project: Hadoop HDFS > Issue Type: Sub-task > Components: test > Reporter: Akira Ajisaka > Assignee: Viraj Jasani > Priority: Major > Labels: pull-request-available > Attachments: patch-unit-hadoop-hdfs-project_hadoop-hdfs.txt > > Time Spent: 4h > Remaining Estimate: 0h > > https://ci-hadoop.apache.org/job/hadoop-multibranch/job/PR-3229/1/artifact/out/patch-unit-hadoop-hdfs-project_hadoop-hdfs.txt > {quote} > [ERROR] > testStandbyTriggersLogRollsWhenTailInProgressEdits[0](org.apache.hadoop.hdfs.server.namenode.ha.TestEditLogTailer) > Time elapsed: 6.862 s <<< FAILURE! > java.lang.AssertionError > at org.junit.Assert.fail(Assert.java:87) > at org.junit.Assert.assertTrue(Assert.java:42) > at org.junit.Assert.assertTrue(Assert.java:53) > at > org.apache.hadoop.hdfs.server.namenode.ha.TestEditLogTailer.testStandbyTriggersLogRollsWhenTailInProgressEdits(TestEditLogTailer.java:444) > {quote} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org