Nikita-Shupletsov commented on code in PR #21365: URL: https://github.com/apache/kafka/pull/21365#discussion_r2744816224
########## streams/integration-tests/src/test/java/org/apache/kafka/streams/integration/RebalanceTaskClosureIntegrationTest.java: ########## @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.integration; + +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.common.serialization.LongSerializer; +import org.apache.kafka.common.serialization.Serdes; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.common.utils.Bytes; +import org.apache.kafka.common.utils.MockTime; +import org.apache.kafka.streams.CloseOptions; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.TopologyWrapper; +import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; +import org.apache.kafka.streams.integration.utils.IntegrationTestUtils; +import org.apache.kafka.streams.processor.StateStore; +import org.apache.kafka.streams.processor.StateStoreContext; +import org.apache.kafka.streams.processor.internals.StreamThread; +import org.apache.kafka.streams.state.KeyValueStore; +import org.apache.kafka.streams.state.StoreBuilder; +import org.apache.kafka.streams.state.internals.AbstractStoreBuilder; +import org.apache.kafka.streams.state.internals.CacheFlushListener; +import org.apache.kafka.streams.state.internals.CachedStateStore; +import org.apache.kafka.streams.state.internals.RocksDBStore; +import org.apache.kafka.test.MockApiProcessorSupplier; +import org.apache.kafka.test.TestUtils; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInfo; + +import java.io.IOException; +import java.time.Duration; +import java.util.List; +import java.util.Properties; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.apache.kafka.streams.utils.TestUtils.safeUniqueTestName; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class RebalanceTaskClosureIntegrationTest { + + private static final int NUM_BROKERS = 1; + protected static final String INPUT_TOPIC_NAME = "input-topic"; + private static final int NUM_PARTITIONS = 3; + + private final EmbeddedKafkaCluster cluster = new EmbeddedKafkaCluster(NUM_BROKERS); + + private KafkaStreamsWrapper streams1; + private KafkaStreamsWrapper streams2; + private String safeTestName; + + @BeforeEach + public void before(final TestInfo testInfo) throws InterruptedException, IOException { + cluster.start(); + cluster.createTopic(INPUT_TOPIC_NAME, NUM_PARTITIONS, 1); + safeTestName = safeUniqueTestName(testInfo); + + } + + @AfterEach + public void after() { + cluster.stop(); + if (streams1 != null) { + streams1.close(Duration.ofSeconds(30)); + } + if (streams2 != null) { + streams2.close(Duration.ofSeconds(30)); + } + } + + @Test + public void shouldClosePendingTasksToInitAfterRebalance() throws Exception { + final CountDownLatch recycleLatch = new CountDownLatch(1); + final CountDownLatch pendingShutdownLatch = new CountDownLatch(1); + // Count how many times we initialize and close stores + final AtomicInteger initCount = new AtomicInteger(); + final AtomicInteger closeCount = new AtomicInteger(); + final StoreBuilder<KeyValueStore<Bytes, byte[]>> storeBuilder = new AbstractStoreBuilder<>("testStateStore", Serdes.Integer(), Serdes.ByteArray(), new MockTime()) { + + @Override + public KeyValueStore<Bytes, byte[]> build() { + return new TestRocksDBStore(name, recycleLatch, pendingShutdownLatch, initCount, closeCount); + } + }; + + final TopologyWrapper topology = new TopologyWrapper(); + topology.addSource("ingest", INPUT_TOPIC_NAME); + topology.addProcessor("my-processor", new MockApiProcessorSupplier<>(), "ingest"); + topology.addStateStore(storeBuilder, "my-processor"); + + streams1 = new KafkaStreamsWrapper(topology, props("1")); + streams1.setStreamThreadStateListener((t, newState, oldState) -> { + if (newState == StreamThread.State.PENDING_SHUTDOWN) { + pendingShutdownLatch.countDown(); + } + }); + streams1.start(); + + TestUtils.waitForCondition(() -> streams1.state() == KafkaStreams.State.RUNNING, "Streams never reached RUNNING state"); + + streams2 = new KafkaStreamsWrapper(topology, props("2")); + streams2.start(); + + TestUtils.waitForCondition(() -> streams2.state() == KafkaStreams.State.RUNNING, "Streams never reached RUNNING state"); + + // starting the second KS app triggered a rebalance. Which in turn will recycle active tasks that need to become standby. + // That's exactly what we are waiting for + recycleLatch.await(); + + // sending a message to disable retries in the consumer client. if there are no messages, it retries the whole sequence of actions, + // including the rebalance data. which we don't want, because we just staged the right condition + IntegrationTestUtils.produceKeyValuesSynchronously(INPUT_TOPIC_NAME, List.of(new KeyValue<>(1L, "key")), Review Comment: I was talking about this code: https://github.com/apache/kafka/blob/trunk/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ClassicKafkaConsumer.java#L639-L671 so updateAssignmentMetadataIfNeeded calls onAssignment and so on, which in turn at some point will call clearCache. where we set up a synchronization point in this test. however if pollForFetches returned nothing, we will retry the whole thing again, which will call onAssignment again, and it will cause problems in the test, because we will not know how many times we get this onAssigment called. it can be one, it can two, it can be more. so in order to always call it once, we want pollForFetches to return something to break the loop right after we get to the right place(in our case it's clearCache) I will update the comment to make it more clear what I mean -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
