[ https://issues.apache.org/jira/browse/BEAM-3776?focusedWorklogId=79572&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-79572 ]
ASF GitHub Bot logged work on BEAM-3776: ---------------------------------------- Author: ASF GitHub Bot Created on: 12/Mar/18 18:46 Start Date: 12/Mar/18 18:46 Worklog Time Spent: 10m Work Description: huygaa11 commented on a change in pull request #4793: [BEAM-3776] Fix issue with merging late windows where a watermark hold could be added behind the input watermark. URL: https://github.com/apache/beam/pull/4793#discussion_r173896102 ########## File path: runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnRunnerTest.java ########## @@ -873,6 +907,288 @@ public void testWatermarkHoldAndLateData() throws Exception { tester.assertHasOnlyGlobalAndFinishedSetsFor(); } + @Test + public void testMergingWatermarkHoldAndLateDataSpecific() throws Exception { + LinkedList<LinkedList<Action>> configurations = new LinkedList<>(); + + // Simple: late new window + LinkedList<Action> actions = new LinkedList<>(); + actions.add(Action.inputWatermark(40)); + actions.add(Action.times(1)); + configurations.add(actions); + + // Simple: late new window, closed and extended. + actions = new LinkedList<>(); + actions.add(Action.inputWatermark(40)); + actions.add(Action.times(1)); + actions.add(Action.times(10)); + configurations.add(actions); + + // Simple: late new window, closed and merged + actions = new LinkedList<>(); + actions.add(Action.inputWatermark(40)); + actions.add(Action.times(1)); + actions.add(Action.times(14)); + actions.add(Action.times(6)); + configurations.add(actions); + + // Simple: late new window, extended past watermark + actions = new LinkedList<>(); + actions.add(Action.inputWatermark(40)); + actions.add(Action.times(25)); + actions.add(Action.times(33)); + configurations.add(actions); + + // Simple: late new window, extended past watermark, extend more + actions = new LinkedList<>(); + actions.add(Action.inputWatermark(40)); + actions.add(Action.times(25)); + actions.add(Action.times(33)); + actions.add(Action.times(43)); + configurations.add(actions); + + // Simple: late new window, extended past watermark, extend more + actions = new LinkedList<>(); + actions.add(Action.inputWatermark(40)); + actions.add(Action.times(25)); + actions.add(Action.times(33)); + actions.add(Action.times(11)); + configurations.add(actions); + + // Simple: new window closes, then extended + actions = new LinkedList<>(); + actions.add(Action.times(11)); + actions.add(Action.inputWatermark(40)); + actions.add(Action.times(18)); + configurations.add(actions); + + // Merging: new window closes, then extended then merged with new window + actions = new LinkedList<>(); + actions.add(Action.times(11)); + actions.add(Action.inputWatermark(40)); + actions.add(Action.times(18)); + actions.add(Action.times(41)); + actions.add(Action.times(27, 33)); + configurations.add(actions); + + // Merging: late window, merges with new window + actions = new LinkedList<>(); + actions.add(Action.inputWatermark(40)); + actions.add(Action.times(29)); + actions.add(Action.times(41)); + configurations.add(actions); + + // Merging: late window, new window joined + actions = new LinkedList<>(); + actions.add(Action.inputWatermark(40)); + actions.add(Action.times(29)); + actions.add(Action.times(45)); + actions.add(Action.times(36)); + configurations.add(actions); + + // Merging: late window, new window all at once + actions = new LinkedList<>(); + actions.add(Action.inputWatermark(40)); + actions.add(Action.times(29, 45, 36)); + configurations.add(actions); + + actions = new LinkedList<>(); + actions.add(Action.inputWatermark(40)); + actions.add(Action.times(25)); + actions.add(Action.times(42)); + actions.add(Action.times(33)); + configurations.add(actions); + + actions = new LinkedList<>(); + actions.add(Action.inputWatermark(40)); + actions.add(Action.times(25)); + actions.add(Action.times(42)); + actions.add(Action.times(33, 21)); + actions.add(Action.inputWatermark(50)); + actions.add(Action.times(12)); + configurations.add(actions); + + for (LinkedList<Action> configuration : configurations) { + System.out.println("Running config " + configuration.toString()); + MetricsContainerImpl container = new MetricsContainerImpl("any"); + MetricsEnvironment.setCurrentContainer(container); + // Test handling of late data. Specifically, ensure the watermark hold is correct. + Duration allowedLateness = Duration.standardMinutes(1); + Duration gapDuration = Duration.millis(10); + System.out.printf("Gap duration %s\n", gapDuration); + ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = + ReduceFnTester.nonCombining( + WindowingStrategy.of(Sessions.withGapDuration(gapDuration)) + .withMode(AccumulationMode.DISCARDING_FIRED_PANES) + .withTrigger( + Repeatedly.forever( + AfterWatermark.pastEndOfWindow() + .withLateFirings(AfterPane.elementCountAtLeast(1)))) + .withAllowedLateness(allowedLateness)); + tester.setAutoAdvanceOutputWatermark(false); + + // Input watermark -> null + assertEquals(null, tester.getWatermarkHold()); + assertEquals(null, tester.getOutputWatermark()); + + int maxTs = 0; + long watermark = 0; + for (Action action : configuration) { + if (action.times != null) { + System.out.println("Injecting " + action.times.toString()); + injectElements(tester, action.times); + int maxLocalTs = Ordering.natural().max(action.times); + if (maxLocalTs > maxTs) { + maxTs = maxLocalTs; + } + } + if (action.inputWatermark > watermark) { + watermark = action.inputWatermark; + System.out.println("Advancing watermark to " + new Instant(watermark)); + tester.advanceInputWatermark(new Instant(watermark)); + } + Instant hold = tester.getWatermarkHold(); + if (hold != null) { + assertThat(hold, greaterThanOrEqualTo(new Instant(watermark))); + assertThat(watermark, lessThan(maxTs + gapDuration.getMillis())); + } + } + if (gapDuration.getMillis() + maxTs > watermark) { + watermark = gapDuration.getMillis() + maxTs; + tester.advanceInputWatermark(new Instant(watermark)); + } + System.out.print(tester.extractOutput()); + assertThat(tester.getWatermarkHold(), nullValue()); + tester.advanceInputWatermark(new Instant(watermark).plus(allowedLateness)); + assertThat(tester.getWatermarkHold(), nullValue()); + + // Nothing dropped. + long droppedElements = + container + .getCounter( + MetricName.named(ReduceFnRunner.class, + ReduceFnRunner.DROPPED_DUE_TO_CLOSED_WINDOW)) + .getCumulative() + .longValue(); + assertEquals(0, droppedElements); + } + } + + @Test + public void testMergingLateWatermarkHolds() throws Exception { + MetricsContainerImpl container = new MetricsContainerImpl("any"); + MetricsEnvironment.setCurrentContainer(container); + ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = + ReduceFnTester.nonCombining( + WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))) + .withMode(AccumulationMode.DISCARDING_FIRED_PANES) + .withTrigger( + Repeatedly.forever( + AfterWatermark.pastEndOfWindow() + .withLateFirings(AfterPane.elementCountAtLeast(10)))) + .withAllowedLateness(Duration.standardMinutes(100))); + tester.setAutoAdvanceOutputWatermark(false); + + // Input watermark -> null + assertEquals(null, tester.getWatermarkHold()); + assertEquals(null, tester.getOutputWatermark()); + + tester.advanceInputWatermark(new Instant(20)); + // Add two late elements that cause a window to merge. + injectElements(tester, Arrays.asList(3)); + assertThat(tester.getWatermarkHold(), nullValue()); + injectElements(tester, Arrays.asList(4)); + assertEquals( + new Instant(4).plus(9).plus(Duration.standardMinutes(100)), Review comment: Document where the numbers 4, 9, 100 came from. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 79572) Time Spent: 1.5h (was: 1h 20m) > StateMerging.mergeWatermarks sets a late watermark hold for late merging > windows that depend only on the window > --------------------------------------------------------------------------------------------------------------- > > Key: BEAM-3776 > URL: https://issues.apache.org/jira/browse/BEAM-3776 > Project: Beam > Issue Type: Bug > Components: runner-core > Affects Versions: 2.1.0, 2.2.0, 2.3.0 > Reporter: Sam Whittle > Assignee: Sam Whittle > Priority: Critical > Time Spent: 1.5h > Remaining Estimate: 0h > > WatermarkHold.addElementHold and WatermarkHold.addGarbageCollectionHold take > to not add holds that would be before the input watermark. > However WatermarkHold.onMerge calls StateMerging.mergeWatermarks which if the > window depends only on window, sets a hold for the end of the window > regardless of the input watermark. > Thus if you have a WindowingStrategy such as: > WindowingStrategy.of(Sessions.withGapDuration(gapDuration)) > .withMode(AccumulationMode.DISCARDING_FIRED_PANES) > .withTrigger( > Repeatedly.forever( > AfterWatermark.pastEndOfWindow() > .withLateFirings(AfterPane.elementCountAtLeast(10)))) > .withAllowedLateness(allowedLateness)) > and you merge windows that are late, you might end up holding the watermark > until the allowedLateness has passed. -- This message was sent by Atlassian JIRA (v7.6.3#76005)