ableegoldman commented on a change in pull request #10407: URL: https://github.com/apache/kafka/pull/10407#discussion_r602938154
########## File path: streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskManagerTest.java ########## @@ -816,6 +817,106 @@ public void shouldCleanAndReviveCorruptedStandbyTasksBeforeCommittingNonCorrupte verify(consumer); } + @Test + public void shouldCloseAndReviveUncorruptedTasksWhenTimeoutExceptionThrownFromCommit() { + final ProcessorStateManager stateManager = EasyMock.createStrictMock(ProcessorStateManager.class); + stateManager.markChangelogAsCorrupted(taskId00Partitions); + replay(stateManager); + + final StateMachineTask corruptedActive = new StateMachineTask(taskId00, taskId00Partitions, true, stateManager); + final StateMachineTask unCorruptedActive = new StateMachineTask(taskId01, taskId01Partitions, true, stateManager) { + @Override + public void markChangelogAsCorrupted(final Collection<TopicPartition> partitions) { + fail("Should not try to mark changelogs as corrupted for uncorrupted task"); + } + + @Override + public void maybeInitTaskTimeoutOrThrow(final long currentWallClockMs, + final Exception cause) { + throw new TimeoutException(); + } + }; + final Map<TopicPartition, OffsetAndMetadata> offsets = singletonMap(t1p1, new OffsetAndMetadata(0L, null)); + unCorruptedActive.setCommittableOffsetsAndMetadata(offsets); + + // handleAssignment + final Map<TaskId, Set<TopicPartition>> assignment = new HashMap<>(); + assignment.putAll(taskId00Assignment); + assignment.putAll(taskId01Assignment); + expect(activeTaskCreator.createTasks(anyObject(), eq(assignment))).andStubReturn(asList(corruptedActive, unCorruptedActive)); + topologyBuilder.addSubscribedTopicsFromAssignment(anyObject(), anyString()); + expectLastCall().anyTimes(); + + expectRestoreToBeCompleted(consumer, changeLogReader); + + consumer.commitSync(offsets); + expectLastCall().andThrow(new TimeoutException()); + + expect(consumer.assignment()).andStubReturn(union(HashSet::new, taskId00Partitions, taskId01Partitions)); + + replay(activeTaskCreator, standbyTaskCreator, topologyBuilder, consumer, changeLogReader); + + taskManager.handleAssignment(assignment, emptyMap()); + assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true)); + + // make sure this will be committed and throw + assertThat(unCorruptedActive.state(), is(Task.State.RUNNING)); + assertThat(corruptedActive.state(), is(Task.State.RUNNING)); + + unCorruptedActive.setCommitNeeded(); + + corruptedActive.setChangelogOffsets(singletonMap(t1p0, 0L)); + taskManager.handleCorruption(singleton(taskId00)); + + assertThat(corruptedActive.commitPrepared, is(true)); Review comment: Seems like we don't reset the `commitPrepared` during revive, ~good point. I guess we should reset all of those in the `StateMachineTask#revive`~ edit: actually I think for `commitPrepared` at least we should _not_ reset it, since we just use this to verify that we did, indeed, prepare a commit. But at least `commitNeeded` should probably be cleared in `StateMachineTask#closeXXX` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org