mimaison commented on a change in pull request #9224: URL: https://github.com/apache/kafka/pull/9224#discussion_r538876091
########## File path: connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/EmbeddedKafkaCluster.java ########## @@ -278,6 +283,11 @@ protected boolean hasState(KafkaServer server, Predicate<BrokerState> desiredSta return false; } } + + public boolean sslEnabled() { + final String listeners = brokerConfig.getProperty(KafkaConfig$.MODULE$.ListenersProp()); + return (listeners != null && listeners.contains("SSL")) ? true : false; Review comment: We can return `listeners != null && listeners.contains("SSL")` ########## File path: connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/MirrorConnectorsIntegrationSSLTest.java ########## @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.connect.mirror.integration; + +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.common.network.Mode; +import org.apache.kafka.connect.mirror.MirrorCheckpointConnector; +import org.apache.kafka.connect.mirror.MirrorHeartbeatConnector; +import org.apache.kafka.connect.mirror.MirrorMakerConfig; +import org.apache.kafka.connect.mirror.MirrorSourceConnector; +import org.apache.kafka.test.IntegrationTest; +import org.apache.kafka.test.TestSslUtils; +import org.apache.kafka.test.TestUtils; +import kafka.server.KafkaConfig$; + +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import org.junit.Before; +import org.junit.After; +import org.junit.Test; + +/** + * Tests MM2 replication with SSL enabled at backup kafka cluster + */ +@Category(IntegrationTest.class) +public class MirrorConnectorsIntegrationSSLTest extends MirrorConnectorsIntegrationBaseTest { + + private static final Logger log = LoggerFactory.getLogger(MirrorConnectorsIntegrationSSLTest.class); + + private static final List<Class> CONNECTOR_LIST = + Arrays.asList(MirrorSourceConnector.class, MirrorCheckpointConnector.class, MirrorHeartbeatConnector.class); + + @Before + public void setup() throws InterruptedException { + try { + Map<String, Object> sslConfig = TestSslUtils.createSslConfig(false, true, Mode.SERVER, TestUtils.tempFile(), "testCert"); + backupBrokerProps.put(KafkaConfig$.MODULE$.ListenersProp(), "SSL://localhost:0"); + backupBrokerProps.put(KafkaConfig$.MODULE$.InterBrokerListenerNameProp(), "SSL"); + backupBrokerProps.putAll(sslConfig); + } catch (final Exception e) { + throw new RuntimeException(e); Review comment: This still needs to be addressed ########## File path: connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/MirrorConnectorsIntegrationBaseTest.java ########## @@ -0,0 +1,577 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.connect.mirror.integration; + +import org.apache.kafka.clients.admin.Admin; +import org.apache.kafka.clients.admin.Config; +import org.apache.kafka.clients.admin.ConfigEntry; +import org.apache.kafka.clients.admin.DescribeConfigsResult; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.OffsetAndMetadata; +import org.apache.kafka.common.config.ConfigResource; +import org.apache.kafka.common.config.TopicConfig; +import org.apache.kafka.common.utils.Exit; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.connect.connector.Connector; +import org.apache.kafka.connect.mirror.MirrorClient; +import org.apache.kafka.connect.mirror.MirrorHeartbeatConnector; +import org.apache.kafka.connect.mirror.MirrorMakerConfig; +import org.apache.kafka.connect.mirror.MirrorSourceConnector; +import org.apache.kafka.connect.mirror.SourceAndTarget; +import org.apache.kafka.connect.mirror.MirrorCheckpointConnector; +import org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster; +import org.apache.kafka.connect.util.clusters.EmbeddedKafkaCluster; +import org.apache.kafka.test.IntegrationTest; +import static org.apache.kafka.test.TestUtils.waitForCondition; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Properties; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNotNull; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.After; +import org.junit.Before; + +import static org.apache.kafka.connect.mirror.TestUtils.generateRecords; + +/** + * Tests MM2 replication and failover/failback logic. + * + * MM2 is configured with active/active replication between two Kafka clusters. Tests validate that + * records sent to either cluster arrive at the other cluster. Then, a consumer group is migrated from + * one cluster to the other and back. Tests validate that consumer offsets are translated and replicated + * between clusters during this failover and failback. + */ +@Category(IntegrationTest.class) +public abstract class MirrorConnectorsIntegrationBaseTest { + private static final Logger log = LoggerFactory.getLogger(MirrorConnectorsIntegrationBaseTest.class); + + private static final int NUM_RECORDS_PER_PARTITION = 10; + private static final int NUM_PARTITIONS = 10; + private static final int NUM_RECORDS_PRODUCED = NUM_PARTITIONS * NUM_RECORDS_PER_PARTITION; + private static final int RECORD_TRANSFER_DURATION_MS = 30_000; + private static final int CHECKPOINT_DURATION_MS = 20_000; + private static final int RECORD_CONSUME_DURATION_MS = 20_000; + private static final int OFFSET_SYNC_DURATION_MS = 30_000; + private static final int NUM_WORKERS = 3; + private static final Duration CONSUMER_POLL_TIMEOUT_MS = Duration.ofMillis(500); + private static final int BROKER_RESTART_TIMEOUT_MS = 10_000; + protected static final String PRIMARY_CLUSTER_ALIAS = "primary"; + protected static final String BACKUP_CLUSTER_ALIAS = "backup"; + private static final List<Class<? extends Connector>> CONNECTOR_LIST = + Arrays.asList(MirrorSourceConnector.class, MirrorCheckpointConnector.class, MirrorHeartbeatConnector.class); + + protected Map<String, String> mm2Props = new HashMap<>(); + private MirrorMakerConfig mm2Config; + private EmbeddedConnectCluster primary; + private EmbeddedConnectCluster backup; + + private final AtomicBoolean exited = new AtomicBoolean(false); + protected Properties primaryBrokerProps = new Properties(); + protected Properties backupBrokerProps = new Properties(); + protected Map<String, String> primaryWorkerProps = new HashMap<>(); + protected Map<String, String> backupWorkerProps = new HashMap<>(); + + @Before + public void startClusters() throws InterruptedException { + primaryBrokerProps.put("auto.create.topics.enable", "false"); + backupBrokerProps.put("auto.create.topics.enable", "false"); + + mm2Props.putAll(basicMM2Config()); + + mm2Config = new MirrorMakerConfig(mm2Props); + primaryWorkerProps = mm2Config.workerConfig(new SourceAndTarget(BACKUP_CLUSTER_ALIAS, PRIMARY_CLUSTER_ALIAS)); + backupWorkerProps.putAll(mm2Config.workerConfig(new SourceAndTarget(PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS))); + + primary = new EmbeddedConnectCluster.Builder() + .name(PRIMARY_CLUSTER_ALIAS + "-connect-cluster") + .numWorkers(NUM_WORKERS) + .numBrokers(1) + .brokerProps(primaryBrokerProps) + .workerProps(primaryWorkerProps) + .build(); + + backup = new EmbeddedConnectCluster.Builder() + .name(BACKUP_CLUSTER_ALIAS + "-connect-cluster") + .numWorkers(NUM_WORKERS) + .numBrokers(1) + .brokerProps(backupBrokerProps) + .workerProps(backupWorkerProps) + .build(); + + primary.start(); + primary.assertions().assertAtLeastNumWorkersAreUp(NUM_WORKERS, + "Workers of " + PRIMARY_CLUSTER_ALIAS + "-connect-cluster did not start in time."); + + backup.start(); + backup.assertions().assertAtLeastNumWorkersAreUp(NUM_WORKERS, + "Workers of " + BACKUP_CLUSTER_ALIAS + "-connect-cluster did not start in time."); + + createTopics(); + + warmUpConsumer(); + + log.info(PRIMARY_CLUSTER_ALIAS + " REST service: {}", primary.endpointForResource("connectors")); + log.info(BACKUP_CLUSTER_ALIAS + " REST service: {}", backup.endpointForResource("connectors")); + log.info(PRIMARY_CLUSTER_ALIAS + " brokers: {}", primary.kafka().bootstrapServers()); + log.info(BACKUP_CLUSTER_ALIAS + " brokers: {}", backup.kafka().bootstrapServers()); + + // now that the brokers are running, we can finish setting up the Connectors + mm2Props.put(PRIMARY_CLUSTER_ALIAS + ".bootstrap.servers", primary.kafka().bootstrapServers()); + mm2Props.put(BACKUP_CLUSTER_ALIAS + ".bootstrap.servers", backup.kafka().bootstrapServers()); + + Exit.setExitProcedure((status, errorCode) -> exited.set(true)); + } + + @After + public void shutdownClusters() throws Exception { + for (String x : primary.connectors()) { + primary.deleteConnector(x); + } + for (String x : backup.connectors()) { + backup.deleteConnector(x); + } + deleteAllTopics(primary.kafka()); + deleteAllTopics(backup.kafka()); + primary.stop(); + backup.stop(); + try { + assertFalse(exited.get()); + } finally { + Exit.resetExitProcedure(); + } + } + + @Test + public void testReplication() throws Exception { + produceMessages(primary, "test-topic-1"); + produceMessages(backup, "test-topic-1"); + String consumerGroupName = "consumer-group-testReplication"; + Map<String, Object> consumerProps = new HashMap<String, Object>() {{ + put("group.id", consumerGroupName); + put("auto.offset.reset", "latest"); + }}; + // create consumers before starting the connectors so we don't need to wait for discovery + waitForConsumingAllRecords(primary.kafka().createConsumerAndSubscribeTo(consumerProps, "test-topic-1"), 0); + waitForConsumingAllRecords(backup.kafka().createConsumerAndSubscribeTo(consumerProps, "test-topic-1"), 0); + + mm2Config = new MirrorMakerConfig(mm2Props); + + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + waitUntilMirrorMakerIsRunning(primary, CONNECTOR_LIST, mm2Config, BACKUP_CLUSTER_ALIAS, PRIMARY_CLUSTER_ALIAS); + + MirrorClient primaryClient = new MirrorClient(mm2Config.clientConfig(PRIMARY_CLUSTER_ALIAS)); + MirrorClient backupClient = new MirrorClient(mm2Config.clientConfig(BACKUP_CLUSTER_ALIAS)); + + assertEquals("topic config was not synced", TopicConfig.CLEANUP_POLICY_COMPACT, + getTopicConfig(backup.kafka(), "primary.test-topic-1", TopicConfig.CLEANUP_POLICY_CONFIG)); + + assertEquals("Records were not produced to primary cluster.", NUM_RECORDS_PRODUCED, + primary.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "test-topic-1").count()); + assertEquals("Records were not replicated to backup cluster.", NUM_RECORDS_PRODUCED, + backup.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "primary.test-topic-1").count()); + assertEquals("Records were not produced to backup cluster.", NUM_RECORDS_PRODUCED, + backup.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "test-topic-1").count()); + assertEquals("Records were not replicated to primary cluster.", NUM_RECORDS_PRODUCED, + primary.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "backup.test-topic-1").count()); + + assertEquals("Primary cluster doesn't have all records from both clusters.", NUM_RECORDS_PRODUCED * 2, + primary.kafka().consume(NUM_RECORDS_PRODUCED * 2, RECORD_TRANSFER_DURATION_MS, "backup.test-topic-1", "test-topic-1").count()); + assertEquals("Backup cluster doesn't have all records from both clusters.", NUM_RECORDS_PRODUCED * 2, + backup.kafka().consume(NUM_RECORDS_PRODUCED * 2, RECORD_TRANSFER_DURATION_MS, "primary.test-topic-1", "test-topic-1").count()); + + assertTrue("Heartbeats were not emitted to primary cluster.", primary.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "heartbeats").count() > 0); + assertTrue("Heartbeats were not emitted to backup cluster.", backup.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "heartbeats").count() > 0); + assertTrue("Heartbeats were not replicated downstream to backup cluster.", backup.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "primary.heartbeats").count() > 0); + assertTrue("Heartbeats were not replicated downstream to primary cluster.", primary.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "backup.heartbeats").count() > 0); + + assertTrue("Did not find upstream primary cluster.", backupClient.upstreamClusters().contains(PRIMARY_CLUSTER_ALIAS)); + assertEquals("Did not calculate replication hops correctly.", 1, backupClient.replicationHops(PRIMARY_CLUSTER_ALIAS)); + assertTrue("Did not find upstream backup cluster.", primaryClient.upstreamClusters().contains(BACKUP_CLUSTER_ALIAS)); + assertEquals("Did not calculate replication hops correctly.", 1, primaryClient.replicationHops(BACKUP_CLUSTER_ALIAS)); + assertTrue("Checkpoints were not emitted downstream to backup cluster.", backup.kafka().consume(1, + CHECKPOINT_DURATION_MS, "primary.checkpoints.internal").count() > 0); + + Map<TopicPartition, OffsetAndMetadata> backupOffsets = backupClient.remoteConsumerOffsets(consumerGroupName, PRIMARY_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)); + + assertTrue("Offsets not translated downstream to backup cluster. Found: " + backupOffsets, backupOffsets.containsKey( + new TopicPartition("primary.test-topic-1", 0))); + + // Failover consumer group to backup cluster. + try (Consumer<byte[], byte[]> primaryConsumer = backup.kafka().createConsumer(Collections.singletonMap("group.id", consumerGroupName))) { + primaryConsumer.assign(backupOffsets.keySet()); + backupOffsets.forEach(primaryConsumer::seek); + primaryConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + primaryConsumer.commitAsync(); + + assertTrue("Consumer failedover to zero offset.", primaryConsumer.position(new TopicPartition("primary.test-topic-1", 0)) > 0); + assertTrue("Consumer failedover beyond expected offset.", primaryConsumer.position( + new TopicPartition("primary.test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + assertTrue("Checkpoints were not emitted upstream to primary cluster.", primary.kafka().consume(1, + CHECKPOINT_DURATION_MS, "backup.checkpoints.internal").count() > 0); + } + + waitForCondition(() -> { + return primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)).containsKey(new TopicPartition("backup.test-topic-1", 0)); + }, CHECKPOINT_DURATION_MS, "Offsets not translated downstream to primary cluster."); + + waitForCondition(() -> { + return primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)).containsKey(new TopicPartition("test-topic-1", 0)); + }, CHECKPOINT_DURATION_MS, "Offsets not translated upstream to primary cluster."); + + Map<TopicPartition, OffsetAndMetadata> primaryOffsets = primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)); + + // Failback consumer group to primary cluster + try (Consumer<byte[], byte[]> backupConsumer = primary.kafka().createConsumer(Collections.singletonMap("group.id", consumerGroupName))) { + backupConsumer.assign(primaryOffsets.keySet()); + primaryOffsets.forEach(backupConsumer::seek); + backupConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + backupConsumer.commitAsync(); + + assertTrue("Consumer failedback to zero upstream offset.", backupConsumer.position(new TopicPartition("test-topic-1", 0)) > 0); + assertTrue("Consumer failedback to zero downstream offset.", backupConsumer.position(new TopicPartition("backup.test-topic-1", 0)) > 0); + assertTrue("Consumer failedback beyond expected upstream offset.", backupConsumer.position( + new TopicPartition("test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + assertTrue("Consumer failedback beyond expected downstream offset.", backupConsumer.position( + new TopicPartition("backup.test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + } + + // create more matching topics + primary.kafka().createTopic("test-topic-2", NUM_PARTITIONS); + backup.kafka().createTopic("test-topic-3", NUM_PARTITIONS); + + // only produce messages to the first partition + produceMessages(primary, "test-topic-2", 1); + produceMessages(backup, "test-topic-3", 1); + + // expect total consumed messages equals to NUM_RECORDS_PER_PARTITION + assertEquals("Records were not produced to primary cluster.", NUM_RECORDS_PER_PARTITION, + primary.kafka().consume(NUM_RECORDS_PER_PARTITION, RECORD_TRANSFER_DURATION_MS, "test-topic-2").count()); + assertEquals("Records were not produced to backup cluster.", NUM_RECORDS_PER_PARTITION, + backup.kafka().consume(NUM_RECORDS_PER_PARTITION, RECORD_TRANSFER_DURATION_MS, "test-topic-3").count()); + + assertEquals("New topic was not replicated to primary cluster.", NUM_RECORDS_PER_PARTITION, + primary.kafka().consume(NUM_RECORDS_PER_PARTITION, 2 * RECORD_TRANSFER_DURATION_MS, "backup.test-topic-3").count()); + assertEquals("New topic was not replicated to backup cluster.", NUM_RECORDS_PER_PARTITION, + backup.kafka().consume(NUM_RECORDS_PER_PARTITION, 2 * RECORD_TRANSFER_DURATION_MS, "primary.test-topic-2").count()); + + } + + @Test + public void testReplicationWithEmptyPartition() throws Exception { + String consumerGroupName = "consumer-group-testReplicationWithEmptyPartition"; + Map<String, Object> consumerProps = Collections.singletonMap("group.id", consumerGroupName); + + // create topic + String topic = "test-topic-with-empty-partition"; + primary.kafka().createTopic(topic, NUM_PARTITIONS); + + // produce to all test-topic-empty's partitions, except the last partition + produceMessages(primary, topic, NUM_PARTITIONS - 1); + + // consume before starting the connectors so we don't need to wait for discovery + int expectedRecords = NUM_RECORDS_PER_PARTITION * (NUM_PARTITIONS - 1); + try (Consumer<byte[], byte[]> primaryConsumer = primary.kafka().createConsumerAndSubscribeTo(consumerProps, topic)) { + waitForConsumingAllRecords(primaryConsumer, expectedRecords); + } + + // one way replication from primary to backup + mm2Props.put(BACKUP_CLUSTER_ALIAS + "->" + PRIMARY_CLUSTER_ALIAS + ".enabled", "false"); + mm2Config = new MirrorMakerConfig(mm2Props); + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + + // sleep few seconds to have MM2 finish replication so that "end" consumer will consume some record + Thread.sleep(TimeUnit.SECONDS.toMillis(3)); + + // consume all records from backup cluster + try (Consumer<byte[], byte[]> backupConsumer = backup.kafka().createConsumerAndSubscribeTo(consumerProps, + PRIMARY_CLUSTER_ALIAS + "." + topic)) { + waitForConsumingAllRecords(backupConsumer, expectedRecords); + } + + Admin backupClient = backup.kafka().createAdminClient(); + // retrieve the consumer group offset from backup cluster + Map<TopicPartition, OffsetAndMetadata> remoteOffsets = + backupClient.listConsumerGroupOffsets(consumerGroupName).partitionsToOffsetAndMetadata().get(); + // pinpoint the offset of the last partition which does not receive records + OffsetAndMetadata offset = remoteOffsets.get(new TopicPartition(PRIMARY_CLUSTER_ALIAS + "." + topic, NUM_PARTITIONS - 1)); + // offset of the last partition should exist, but its value should be 0 + assertNotNull("Offset of last partition was not replicated", offset); + assertEquals("Offset of last partition is not zero", 0, offset.offset()); + } + + @Test + public void testOneWayReplicationWithAutoOffsetSync() throws InterruptedException { + produceMessages(primary, "test-topic-1"); + String consumerGroupName = "consumer-group-testOneWayReplicationWithAutoOffsetSync"; + Map<String, Object> consumerProps = new HashMap<String, Object>() {{ + put("group.id", consumerGroupName); + put("auto.offset.reset", "earliest"); + }}; + // create consumers before starting the connectors so we don't need to wait for discovery + try (Consumer<byte[], byte[]> primaryConsumer = primary.kafka().createConsumerAndSubscribeTo(consumerProps, + "test-topic-1")) { + // we need to wait for consuming all the records for MM2 replicating the expected offsets + waitForConsumingAllRecords(primaryConsumer, NUM_RECORDS_PRODUCED); + } + + // enable automated consumer group offset sync + mm2Props.put("sync.group.offsets.enabled", "true"); + mm2Props.put("sync.group.offsets.interval.seconds", "1"); + // one way replication from primary to backup + mm2Props.put(BACKUP_CLUSTER_ALIAS + "->" + PRIMARY_CLUSTER_ALIAS + ".enabled", "false"); + + mm2Config = new MirrorMakerConfig(mm2Props); + + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + + // create a consumer at backup cluster with same consumer group Id to consume 1 topic + Consumer<byte[], byte[]> backupConsumer = backup.kafka().createConsumerAndSubscribeTo( + consumerProps, "primary.test-topic-1"); + + waitForConsumerGroupOffsetSync(backup, backupConsumer, Collections.singletonList("primary.test-topic-1"), + consumerGroupName, NUM_RECORDS_PRODUCED); + + ConsumerRecords<byte[], byte[]> records = backupConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + + // the size of consumer record should be zero, because the offsets of the same consumer group + // have been automatically synchronized from primary to backup by the background job, so no + // more records to consume from the replicated topic by the same consumer group at backup cluster + assertEquals("consumer record size is not zero", 0, records.count()); + + // now create a new topic in primary cluster + primary.kafka().createTopic("test-topic-2", NUM_PARTITIONS); + backup.kafka().createTopic("primary.test-topic-2", 1); + // produce some records to the new topic in primary cluster + produceMessages(primary, "test-topic-2"); + + // create a consumer at primary cluster to consume the new topic + try (Consumer<byte[], byte[]> consumer1 = primary.kafka().createConsumerAndSubscribeTo(Collections.singletonMap( + "group.id", "consumer-group-1"), "test-topic-2")) { + // we need to wait for consuming all the records for MM2 replicating the expected offsets + waitForConsumingAllRecords(consumer1, NUM_RECORDS_PRODUCED); + } + + // create a consumer at backup cluster with same consumer group Id to consume old and new topic + backupConsumer = backup.kafka().createConsumerAndSubscribeTo(Collections.singletonMap( + "group.id", consumerGroupName), "primary.test-topic-1", "primary.test-topic-2"); + + waitForConsumerGroupOffsetSync(backup, backupConsumer, Arrays.asList("primary.test-topic-1", "primary.test-topic-2"), + consumerGroupName, NUM_RECORDS_PRODUCED); + + records = backupConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + // similar reasoning as above, no more records to consume by the same consumer group at backup cluster + assertEquals("consumer record size is not zero", 0, records.count()); + backupConsumer.close(); + } + + /* + * launch the connectors on kafka connect cluster and check if they are running + */ + private static void waitUntilMirrorMakerIsRunning(EmbeddedConnectCluster connectCluster, + List<Class<? extends Connector>> connectorClasses, MirrorMakerConfig mm2Config, + String primary, String backup) throws InterruptedException { + for (int i = 0; i < connectorClasses.size(); i++) { Review comment: We could use a "for each" loop here, something like: ``` for (Class<? extends Connector> connector : connectorClasses) { connectCluster.configureConnector(connector.getSimpleName(), mm2Config.connectorBaseConfig( new SourceAndTarget(primary, backup), connector)); } ``` ########## File path: connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/MirrorConnectorsIntegrationBaseTest.java ########## @@ -0,0 +1,617 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.connect.mirror.integration; + +import org.apache.kafka.clients.admin.Admin; +import org.apache.kafka.clients.admin.Config; +import org.apache.kafka.clients.admin.ConfigEntry; +import org.apache.kafka.clients.admin.DescribeConfigsResult; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.OffsetAndMetadata; +import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.kafka.common.config.ConfigResource; +import org.apache.kafka.common.config.TopicConfig; +import org.apache.kafka.common.config.SslConfigs; +import org.apache.kafka.common.config.types.Password; +import org.apache.kafka.common.utils.Exit; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.connect.connector.Connector; +import org.apache.kafka.connect.mirror.MirrorClient; +import org.apache.kafka.connect.mirror.MirrorHeartbeatConnector; +import org.apache.kafka.connect.mirror.MirrorMakerConfig; +import org.apache.kafka.connect.mirror.MirrorSourceConnector; +import org.apache.kafka.connect.mirror.SourceAndTarget; +import org.apache.kafka.connect.mirror.MirrorCheckpointConnector; +import org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster; +import org.apache.kafka.connect.util.clusters.EmbeddedKafkaCluster; +import org.apache.kafka.test.IntegrationTest; +import static org.apache.kafka.test.TestUtils.waitForCondition; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Properties; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNotNull; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import static org.apache.kafka.connect.mirror.TestUtils.generateRecords; + +/** + * Tests MM2 replication and failover/failback logic. + * + * MM2 is configured with active/active replication between two Kafka clusters. Tests validate that + * records sent to either cluster arrive at the other cluster. Then, a consumer group is migrated from + * one cluster to the other and back. Tests validate that consumer offsets are translated and replicated + * between clusters during this failover and failback. + */ +@Category(IntegrationTest.class) +public abstract class MirrorConnectorsIntegrationBaseTest { + private static final Logger log = LoggerFactory.getLogger(MirrorConnectorsIntegrationBaseTest.class); + + private static final int NUM_RECORDS_PER_PARTITION = 10; + private static final int NUM_PARTITIONS = 10; + private static final int NUM_RECORDS_PRODUCED = NUM_PARTITIONS * NUM_RECORDS_PER_PARTITION; + private static final int RECORD_TRANSFER_DURATION_MS = 30_000; + private static final int CHECKPOINT_DURATION_MS = 20_000; + private static final int RECORD_CONSUME_DURATION_MS = 20_000; + private static final int OFFSET_SYNC_DURATION_MS = 30_000; + private static final int NUM_WORKERS = 3; + private static final int CONSUMER_POLL_TIMEOUT_MS = 500; + private static final int BROKER_RESTART_TIMEOUT_MS = 10_000; + private static final long DEFAULT_PRODUCE_SEND_DURATION_MS = TimeUnit.SECONDS.toMillis(120); + private static final String PRIMARY_CLUSTER_ALIAS = "primary"; + private static final String BACKUP_CLUSTER_ALIAS = "backup"; + private static final List<Class<? extends Connector>> CONNECTOR_LIST = + Arrays.asList(MirrorSourceConnector.class, MirrorCheckpointConnector.class, MirrorHeartbeatConnector.class); + + private Map<String, String> mm2Props; + private MirrorMakerConfig mm2Config; + private EmbeddedConnectCluster primary; + private EmbeddedConnectCluster backup; + + private final AtomicBoolean exited = new AtomicBoolean(false); + protected Properties primaryBrokerProps = new Properties(); + protected Properties backupBrokerProps = new Properties(); + private Map<String, String> primaryWorkerProps = new HashMap<>(); + private Map<String, String> backupWorkerProps = new HashMap<>(); + abstract Map<String, Object> getSslConfig(); + + protected void startClusters() throws InterruptedException { + primaryBrokerProps.put("auto.create.topics.enable", "false"); + backupBrokerProps.put("auto.create.topics.enable", "false"); + + mm2Props = basicMM2Config(); + + final Map<String, Object> sslConfig = getSslConfig(); + if (sslConfig != null) { + Properties sslProps = new Properties(); + sslProps.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, sslConfig.get(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG)); + sslProps.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, ((Password) sslConfig.get(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG)).value()); + sslProps.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SSL"); + + // set SSL config for kafka connect worker + backupWorkerProps.putAll(sslProps.entrySet().stream().collect(Collectors.toMap( + e -> String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())))); + + mm2Props.putAll(sslProps.entrySet().stream().collect(Collectors.toMap( + e -> BACKUP_CLUSTER_ALIAS + "." + String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())))); + // set SSL config for producer used by source task in MM2 + mm2Props.putAll(sslProps.entrySet().stream().collect(Collectors.toMap( + e -> BACKUP_CLUSTER_ALIAS + ".producer." + String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())))); + } + + mm2Config = new MirrorMakerConfig(mm2Props); + primaryWorkerProps = mm2Config.workerConfig(new SourceAndTarget(BACKUP_CLUSTER_ALIAS, PRIMARY_CLUSTER_ALIAS)); + backupWorkerProps.putAll(mm2Config.workerConfig(new SourceAndTarget(PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS))); + + primary = new EmbeddedConnectCluster.Builder() + .name(PRIMARY_CLUSTER_ALIAS + "-connect-cluster") + .numWorkers(3) + .numBrokers(1) + .brokerProps(primaryBrokerProps) + .workerProps(primaryWorkerProps) + .build(); + + backup = new EmbeddedConnectCluster.Builder() + .name(BACKUP_CLUSTER_ALIAS + "-connect-cluster") + .numWorkers(3) + .numBrokers(1) + .brokerProps(backupBrokerProps) + .workerProps(backupWorkerProps) + .build(); + + primary.start(); + primary.assertions().assertAtLeastNumWorkersAreUp(3, + "Workers of " + PRIMARY_CLUSTER_ALIAS + "-connect-cluster did not start in time."); + + backup.start(); + backup.assertions().assertAtLeastNumWorkersAreUp(3, + "Workers of " + BACKUP_CLUSTER_ALIAS + "-connect-cluster did not start in time."); + + createTopics(); + + warmUpConsumer(); + + log.info(PRIMARY_CLUSTER_ALIAS + " REST service: {}", primary.endpointForResource("connectors")); + log.info(BACKUP_CLUSTER_ALIAS + " REST service: {}", backup.endpointForResource("connectors")); + log.info(PRIMARY_CLUSTER_ALIAS + " brokers: {}", primary.kafka().bootstrapServers()); + log.info(BACKUP_CLUSTER_ALIAS + " brokers: {}", backup.kafka().bootstrapServers()); + + // now that the brokers are running, we can finish setting up the Connectors + mm2Props.put(PRIMARY_CLUSTER_ALIAS + ".bootstrap.servers", primary.kafka().bootstrapServers()); + mm2Props.put(BACKUP_CLUSTER_ALIAS + ".bootstrap.servers", backup.kafka().bootstrapServers()); + + Exit.setExitProcedure((status, errorCode) -> exited.set(true)); + } + + public void shutdownClusters() { + for (String x : primary.connectors()) { + primary.deleteConnector(x); + } + for (String x : backup.connectors()) { + backup.deleteConnector(x); + } + deleteAllTopics(primary.kafka()); + deleteAllTopics(backup.kafka()); + primary.stop(); + backup.stop(); + try { + assertFalse(exited.get()); + } finally { + Exit.resetExitProcedure(); + } + } + + @Test + public void testReplication() throws InterruptedException { + produceMessages(primary, "test-topic-1"); + produceMessages(backup, "test-topic-1"); + String consumerGroupName = "consumer-group-testReplication"; + Map<String, Object> consumerProps = new HashMap<String, Object>() {{ + put("group.id", consumerGroupName); + put("auto.offset.reset", "latest"); + }}; + // create consumers before starting the connectors so we don't need to wait for discovery + Consumer<byte[], byte[]> primaryConsumer = primary.kafka().createConsumerAndSubscribeTo(consumerProps, "test-topic-1"); + waitForConsumingAllRecords(primaryConsumer, 0); + + Consumer<byte[], byte[]> backupConsumer = backup.kafka().createConsumerAndSubscribeTo(consumerProps, "test-topic-1"); + waitForConsumingAllRecords(backupConsumer, 0); + + mm2Config = new MirrorMakerConfig(mm2Props); + + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + waitUntilMirrorMakerIsRunning(primary, CONNECTOR_LIST, mm2Config, BACKUP_CLUSTER_ALIAS, PRIMARY_CLUSTER_ALIAS); + + MirrorClient primaryClient = new MirrorClient(mm2Config.clientConfig(PRIMARY_CLUSTER_ALIAS)); + MirrorClient backupClient = new MirrorClient(mm2Config.clientConfig(BACKUP_CLUSTER_ALIAS)); + + assertEquals("topic config was not synced", TopicConfig.CLEANUP_POLICY_COMPACT, + getTopicConfig(backup.kafka(), "primary.test-topic-1", TopicConfig.CLEANUP_POLICY_CONFIG)); + + assertEquals("Records were not produced to primary cluster.", NUM_RECORDS_PRODUCED, + primary.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "test-topic-1").count()); + assertEquals("Records were not replicated to backup cluster.", NUM_RECORDS_PRODUCED, + backup.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "primary.test-topic-1").count()); + assertEquals("Records were not produced to backup cluster.", NUM_RECORDS_PRODUCED, + backup.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "test-topic-1").count()); + assertEquals("Records were not replicated to primary cluster.", NUM_RECORDS_PRODUCED, + primary.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "backup.test-topic-1").count()); + + assertEquals("Primary cluster doesn't have all records from both clusters.", NUM_RECORDS_PRODUCED * 2, + primary.kafka().consume(NUM_RECORDS_PRODUCED * 2, RECORD_TRANSFER_DURATION_MS, "backup.test-topic-1", "test-topic-1").count()); + assertEquals("Backup cluster doesn't have all records from both clusters.", NUM_RECORDS_PRODUCED * 2, + backup.kafka().consume(NUM_RECORDS_PRODUCED * 2, RECORD_TRANSFER_DURATION_MS, "primary.test-topic-1", "test-topic-1").count()); + + assertTrue("Heartbeats were not emitted to primary cluster.", primary.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "heartbeats").count() > 0); + assertTrue("Heartbeats were not emitted to backup cluster.", backup.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "heartbeats").count() > 0); + assertTrue("Heartbeats were not replicated downstream to backup cluster.", backup.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "primary.heartbeats").count() > 0); + assertTrue("Heartbeats were not replicated downstream to primary cluster.", primary.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "backup.heartbeats").count() > 0); + + assertTrue("Did not find upstream primary cluster.", backupClient.upstreamClusters().contains(PRIMARY_CLUSTER_ALIAS)); + assertEquals("Did not calculate replication hops correctly.", 1, backupClient.replicationHops(PRIMARY_CLUSTER_ALIAS)); + assertTrue("Did not find upstream backup cluster.", primaryClient.upstreamClusters().contains(BACKUP_CLUSTER_ALIAS)); + assertEquals("Did not calculate replication hops correctly.", 1, primaryClient.replicationHops(BACKUP_CLUSTER_ALIAS)); + assertTrue("Checkpoints were not emitted downstream to backup cluster.", backup.kafka().consume(1, + CHECKPOINT_DURATION_MS, "primary.checkpoints.internal").count() > 0); + + Map<TopicPartition, OffsetAndMetadata> backupOffsets = backupClient.remoteConsumerOffsets(consumerGroupName, PRIMARY_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)); + + assertTrue("Offsets not translated downstream to backup cluster. Found: " + backupOffsets, backupOffsets.containsKey( + new TopicPartition("primary.test-topic-1", 0))); + + // Failover consumer group to backup cluster. + primaryConsumer = backup.kafka().createConsumer(Collections.singletonMap("group.id", consumerGroupName)); + primaryConsumer.assign(backupOffsets.keySet()); + backupOffsets.forEach(primaryConsumer::seek); + primaryConsumer.poll(Duration.ofMillis(CONSUMER_POLL_TIMEOUT_MS)); + primaryConsumer.commitAsync(); + + assertTrue("Consumer failedover to zero offset.", primaryConsumer.position(new TopicPartition("primary.test-topic-1", 0)) > 0); + assertTrue("Consumer failedover beyond expected offset.", primaryConsumer.position( + new TopicPartition("primary.test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + assertTrue("Checkpoints were not emitted upstream to primary cluster.", primary.kafka().consume(1, + CHECKPOINT_DURATION_MS, "backup.checkpoints.internal").count() > 0); + + primaryConsumer.close(); + + waitForCondition(() -> { + try { + return primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)).containsKey(new TopicPartition("backup.test-topic-1", 0)); + } catch (Throwable e) { + return false; + } + }, CHECKPOINT_DURATION_MS, "Offsets not translated downstream to primary cluster."); + + waitForCondition(() -> { + try { + return primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)).containsKey(new TopicPartition("test-topic-1", 0)); + } catch (Throwable e) { + return false; + } + }, CHECKPOINT_DURATION_MS, "Offsets not translated upstream to primary cluster."); + + Map<TopicPartition, OffsetAndMetadata> primaryOffsets = primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)); + + // Failback consumer group to primary cluster + backupConsumer = primary.kafka().createConsumer(Collections.singletonMap("group.id", consumerGroupName)); + backupConsumer.assign(primaryOffsets.keySet()); + primaryOffsets.forEach(backupConsumer::seek); + backupConsumer.poll(Duration.ofMillis(CONSUMER_POLL_TIMEOUT_MS)); + backupConsumer.commitAsync(); + + assertTrue("Consumer failedback to zero upstream offset.", backupConsumer.position(new TopicPartition("test-topic-1", 0)) > 0); + assertTrue("Consumer failedback to zero downstream offset.", backupConsumer.position(new TopicPartition("backup.test-topic-1", 0)) > 0); + assertTrue("Consumer failedback beyond expected upstream offset.", backupConsumer.position( + new TopicPartition("test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + assertTrue("Consumer failedback beyond expected downstream offset.", backupConsumer.position( + new TopicPartition("backup.test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + + backupConsumer.close(); + + // create more matching topics + primary.kafka().createTopic("test-topic-2", NUM_PARTITIONS); + backup.kafka().createTopic("test-topic-3", NUM_PARTITIONS); + + // only produce messages to the first partition + produceMessages(primary, "test-topic-2", 1); + produceMessages(backup, "test-topic-3", 1); + + // expect total consumed messages equals to NUM_RECORDS_PER_PARTITION + assertEquals("Records were not produced to primary cluster.", NUM_RECORDS_PER_PARTITION, + primary.kafka().consume(NUM_RECORDS_PER_PARTITION, RECORD_TRANSFER_DURATION_MS, "test-topic-2").count()); + assertEquals("Records were not produced to backup cluster.", NUM_RECORDS_PER_PARTITION, + backup.kafka().consume(NUM_RECORDS_PER_PARTITION, RECORD_TRANSFER_DURATION_MS, "test-topic-3").count()); + + assertEquals("New topic was not replicated to primary cluster.", NUM_RECORDS_PER_PARTITION, + primary.kafka().consume(NUM_RECORDS_PER_PARTITION, 2 * RECORD_TRANSFER_DURATION_MS, "backup.test-topic-3").count()); + assertEquals("New topic was not replicated to backup cluster.", NUM_RECORDS_PER_PARTITION, + backup.kafka().consume(NUM_RECORDS_PER_PARTITION, 2 * RECORD_TRANSFER_DURATION_MS, "primary.test-topic-2").count()); + + } + + @Test + public void testReplicationWithEmptyPartition() throws Exception { + String consumerGroupName = "consumer-group-testReplicationWithEmptyPartition"; + Map<String, Object> consumerProps = Collections.singletonMap("group.id", consumerGroupName); + + // create topic + String topic = "test-topic-with-empty-partition"; + primary.kafka().createTopic(topic, NUM_PARTITIONS); + + // produce to all test-topic-empty's partitions, except the last partition + produceMessages(primary, topic, NUM_PARTITIONS - 1); + + // consume before starting the connectors so we don't need to wait for discovery + int expectedRecords = NUM_RECORDS_PER_PARTITION * (NUM_PARTITIONS - 1); + try (Consumer<byte[], byte[]> primaryConsumer = primary.kafka().createConsumerAndSubscribeTo(consumerProps, topic)) { + waitForConsumingAllRecords(primaryConsumer, expectedRecords); + } + + // one way replication from primary to backup + mm2Props.put(BACKUP_CLUSTER_ALIAS + "->" + PRIMARY_CLUSTER_ALIAS + ".enabled", "false"); + mm2Config = new MirrorMakerConfig(mm2Props); + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + + // sleep few seconds to have MM2 finish replication so that "end" consumer will consume some record + Thread.sleep(TimeUnit.SECONDS.toMillis(3)); + + // consume all records from backup cluster + try (Consumer<byte[], byte[]> backupConsumer = backup.kafka().createConsumerAndSubscribeTo(consumerProps, + PRIMARY_CLUSTER_ALIAS + "." + topic)) { + waitForConsumingAllRecords(backupConsumer, expectedRecords); + } + + Admin backupClient = backup.kafka().createAdminClient(); + // retrieve the consumer group offset from backup cluster + Map<TopicPartition, OffsetAndMetadata> remoteOffsets = + backupClient.listConsumerGroupOffsets(consumerGroupName).partitionsToOffsetAndMetadata().get(); + // pinpoint the offset of the last partition which does not receive records + OffsetAndMetadata offset = remoteOffsets.get(new TopicPartition(PRIMARY_CLUSTER_ALIAS + "." + topic, NUM_PARTITIONS - 1)); + // offset of the last partition should exist, but its value should be 0 + assertNotNull("Offset of last partition was not replicated", offset); + assertEquals("Offset of last partition is not zero", 0, offset.offset()); + } + + @Test + public void testOneWayReplicationWithAutoOffsetSync() throws InterruptedException { + produceMessages(primary, "test-topic-1"); + String consumerGroupName = "consumer-group-testOneWayReplicationWithAutoOffsetSync"; + Map<String, Object> consumerProps = new HashMap<String, Object>() {{ + put("group.id", consumerGroupName); + put("auto.offset.reset", "earliest"); + }}; + // create consumers before starting the connectors so we don't need to wait for discovery + try (Consumer<byte[], byte[]> primaryConsumer = primary.kafka().createConsumerAndSubscribeTo(consumerProps, + "test-topic-1")) { + // we need to wait for consuming all the records for MM2 replicating the expected offsets + waitForConsumingAllRecords(primaryConsumer, NUM_RECORDS_PRODUCED); + } + + // enable automated consumer group offset sync + mm2Props.put("sync.group.offsets.enabled", "true"); + mm2Props.put("sync.group.offsets.interval.seconds", "1"); + // one way replication from primary to backup + mm2Props.put(BACKUP_CLUSTER_ALIAS + "->" + PRIMARY_CLUSTER_ALIAS + ".enabled", "false"); + + mm2Config = new MirrorMakerConfig(mm2Props); + + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + + // create a consumer at backup cluster with same consumer group Id to consume 1 topic + Consumer<byte[], byte[]> backupConsumer = backup.kafka().createConsumerAndSubscribeTo( + consumerProps, "primary.test-topic-1"); + + waitForConsumerGroupOffsetSync(backup, backupConsumer, Collections.singletonList("primary.test-topic-1"), + consumerGroupName, NUM_RECORDS_PRODUCED); + + ConsumerRecords<byte[], byte[]> records = backupConsumer.poll(Duration.ofMillis(CONSUMER_POLL_TIMEOUT_MS)); + + // the size of consumer record should be zero, because the offsets of the same consumer group + // have been automatically synchronized from primary to backup by the background job, so no + // more records to consume from the replicated topic by the same consumer group at backup cluster + assertEquals("consumer record size is not zero", 0, records.count()); + + // now create a new topic in primary cluster + primary.kafka().createTopic("test-topic-2", NUM_PARTITIONS); + backup.kafka().createTopic("primary.test-topic-2", 1); + // produce some records to the new topic in primary cluster + produceMessages(primary, "test-topic-2"); + + // create a consumer at primary cluster to consume the new topic + try (Consumer<byte[], byte[]> consumer1 = primary.kafka().createConsumerAndSubscribeTo(Collections.singletonMap( + "group.id", "consumer-group-1"), "test-topic-2")) { + // we need to wait for consuming all the records for MM2 replicating the expected offsets + waitForConsumingAllRecords(consumer1, NUM_RECORDS_PRODUCED); + } + + // create a consumer at backup cluster with same consumer group Id to consume old and new topic + backupConsumer = backup.kafka().createConsumerAndSubscribeTo(Collections.singletonMap( + "group.id", consumerGroupName), "primary.test-topic-1", "primary.test-topic-2"); + + waitForConsumerGroupOffsetSync(backup, backupConsumer, Arrays.asList("primary.test-topic-1", "primary.test-topic-2"), + consumerGroupName, NUM_RECORDS_PRODUCED); + + records = backupConsumer.poll(Duration.ofMillis(CONSUMER_POLL_TIMEOUT_MS)); + // similar reasoning as above, no more records to consume by the same consumer group at backup cluster + assertEquals("consumer record size is not zero", 0, records.count()); + backupConsumer.close(); + } + + /* + * launch the connectors on kafka connect cluster and check if they are running + */ + private static void waitUntilMirrorMakerIsRunning(EmbeddedConnectCluster connectCluster, + List<Class<? extends Connector>> connectorClasses, MirrorMakerConfig mm2Config, + String primary, String backup) throws InterruptedException { + for (int i = 0; i < connectorClasses.size(); i++) { + String connector = connectorClasses.get(i).getSimpleName(); + connectCluster.configureConnector(connector, mm2Config.connectorBaseConfig( + new SourceAndTarget(primary, backup), connectorClasses.get(i))); + } + + // we wait for the connector and tasks to come up for each connector, so that when we do the + // actual testing, we are certain that the tasks are up and running; this will prevent + // flaky tests where the connector and tasks didn't start up in time for the tests to be run + List<String> connectorNames = connectorClasses.stream().map(x -> x.getSimpleName()) + .collect(Collectors.toList()); + for (String connector : connectorNames) { + connectCluster.assertions().assertConnectorAndAtLeastNumTasksAreRunning(connector, 1, + "Connector " + connector + " tasks did not start in time on cluster: " + connectCluster); + } + } + + /* + * delete all topics of the input kafka cluster + */ + private static void deleteAllTopics(EmbeddedKafkaCluster cluster) { + Admin client = cluster.createAdminClient(); + try { + client.deleteTopics(client.listTopics().names().get()); + } catch (Throwable e) { + // should not run into exception normally. In case of Exception, + // simply fail the test and investigate + } + } + + /* + * retrieve the config value based on the input cluster, topic and config name + */ + private static String getTopicConfig(EmbeddedKafkaCluster cluster, String topic, String configName) { + Admin client = cluster.createAdminClient(); + Collection<ConfigResource> cr = Collections.singleton( + new ConfigResource(ConfigResource.Type.TOPIC, topic)); + try { + DescribeConfigsResult configsResult = client.describeConfigs(cr); + Config allConfigs = (Config) configsResult.all().get().values().toArray()[0]; + Iterator<ConfigEntry> configIterator = allConfigs.entries().iterator(); + while (configIterator.hasNext()) { + ConfigEntry currentConfig = configIterator.next(); + if (currentConfig.name().equals(configName)) { + return currentConfig.value(); + } + } + } catch (Throwable e) { + // should not run into exception normally. In case of Exception, + // simply fail the test and investigate + } + return null; + } + + /* + * produce messages to the cluster and topic + */ + protected void produceMessages(EmbeddedConnectCluster cluster, String topicName) { + Map<String, String> recordSent = generateRecords(NUM_RECORDS_PRODUCED); + for (Map.Entry<String, String> entry : recordSent.entrySet()) { + cluster.kafka().produce(topicName, entry.getKey(), entry.getValue()); + } + } + + /* + * produce messages to the cluster and topic partition less than numPartitions + */ + protected void produceMessages(EmbeddedConnectCluster cluster, String topicName, int numPartitions) { + int cnt = 0; + for (int r = 0; r < NUM_RECORDS_PER_PARTITION; r++) + for (int p = 0; p < numPartitions; p++) + cluster.kafka().produce(topicName, p, "key", "value-" + cnt++); + } + + /* + * given consumer group, topics and expected number of records, make sure the consumer group + * offsets are eventually synced to the expected offset numbers + */ + private static <T> void waitForConsumerGroupOffsetSync(EmbeddedConnectCluster connect, + Consumer<T, T> consumer, List<String> topics, String consumerGroupId, int numRecords) + throws InterruptedException { + Admin adminClient = connect.kafka().createAdminClient(); + List<TopicPartition> tps = new ArrayList<>(NUM_PARTITIONS * topics.size()); + for (int partitionIndex = 0; partitionIndex < NUM_PARTITIONS; partitionIndex++) { + for (String topic : topics) { + tps.add(new TopicPartition(topic, partitionIndex)); + } + } + long expectedTotalOffsets = numRecords * topics.size(); + + waitForCondition(() -> { + Map<TopicPartition, OffsetAndMetadata> consumerGroupOffsets = + adminClient.listConsumerGroupOffsets(consumerGroupId).partitionsToOffsetAndMetadata().get(); + long consumerGroupOffsetTotal = consumerGroupOffsets.values().stream() + .mapToLong(metadata -> metadata.offset()).sum(); + + Map<TopicPartition, Long> offsets = consumer.endOffsets(tps, Duration.ofMillis(CONSUMER_POLL_TIMEOUT_MS)); + long totalOffsets = offsets.values().stream().mapToLong(l -> l).sum(); + + // make sure the consumer group offsets are synced to expected number + return totalOffsets == expectedTotalOffsets && consumerGroupOffsetTotal > 0; + }, OFFSET_SYNC_DURATION_MS, "Consumer group offset sync is not complete in time"); + } + + /* + * make sure the consumer to consume expected number of records + */ + private static <T> void waitForConsumingAllRecords(Consumer<T, T> consumer, int numExpectedRecords) + throws InterruptedException { + final AtomicInteger totalConsumedRecords = new AtomicInteger(0); + waitForCondition(() -> { + ConsumerRecords<T, T> records = consumer.poll(Duration.ofMillis(CONSUMER_POLL_TIMEOUT_MS)); + return numExpectedRecords == totalConsumedRecords.addAndGet(records.count()); + }, RECORD_CONSUME_DURATION_MS, "Consumer cannot consume all records in time"); + consumer.commitSync(); + consumer.close(); Review comment: Yes but the code that created the consumer should close it. If I call `waitForConsumingAllRecords()`, I'd not expect it to close my consumer instance. ########## File path: connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/MirrorConnectorsIntegrationBaseTest.java ########## @@ -0,0 +1,577 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.connect.mirror.integration; + +import org.apache.kafka.clients.admin.Admin; +import org.apache.kafka.clients.admin.Config; +import org.apache.kafka.clients.admin.ConfigEntry; +import org.apache.kafka.clients.admin.DescribeConfigsResult; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.OffsetAndMetadata; +import org.apache.kafka.common.config.ConfigResource; +import org.apache.kafka.common.config.TopicConfig; +import org.apache.kafka.common.utils.Exit; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.connect.connector.Connector; +import org.apache.kafka.connect.mirror.MirrorClient; +import org.apache.kafka.connect.mirror.MirrorHeartbeatConnector; +import org.apache.kafka.connect.mirror.MirrorMakerConfig; +import org.apache.kafka.connect.mirror.MirrorSourceConnector; +import org.apache.kafka.connect.mirror.SourceAndTarget; +import org.apache.kafka.connect.mirror.MirrorCheckpointConnector; +import org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster; +import org.apache.kafka.connect.util.clusters.EmbeddedKafkaCluster; +import org.apache.kafka.test.IntegrationTest; +import static org.apache.kafka.test.TestUtils.waitForCondition; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Properties; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNotNull; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.After; +import org.junit.Before; + +import static org.apache.kafka.connect.mirror.TestUtils.generateRecords; + +/** + * Tests MM2 replication and failover/failback logic. + * + * MM2 is configured with active/active replication between two Kafka clusters. Tests validate that + * records sent to either cluster arrive at the other cluster. Then, a consumer group is migrated from + * one cluster to the other and back. Tests validate that consumer offsets are translated and replicated + * between clusters during this failover and failback. + */ +@Category(IntegrationTest.class) +public abstract class MirrorConnectorsIntegrationBaseTest { + private static final Logger log = LoggerFactory.getLogger(MirrorConnectorsIntegrationBaseTest.class); + + private static final int NUM_RECORDS_PER_PARTITION = 10; + private static final int NUM_PARTITIONS = 10; + private static final int NUM_RECORDS_PRODUCED = NUM_PARTITIONS * NUM_RECORDS_PER_PARTITION; + private static final int RECORD_TRANSFER_DURATION_MS = 30_000; + private static final int CHECKPOINT_DURATION_MS = 20_000; + private static final int RECORD_CONSUME_DURATION_MS = 20_000; + private static final int OFFSET_SYNC_DURATION_MS = 30_000; + private static final int NUM_WORKERS = 3; + private static final Duration CONSUMER_POLL_TIMEOUT_MS = Duration.ofMillis(500); + private static final int BROKER_RESTART_TIMEOUT_MS = 10_000; + protected static final String PRIMARY_CLUSTER_ALIAS = "primary"; + protected static final String BACKUP_CLUSTER_ALIAS = "backup"; + private static final List<Class<? extends Connector>> CONNECTOR_LIST = + Arrays.asList(MirrorSourceConnector.class, MirrorCheckpointConnector.class, MirrorHeartbeatConnector.class); + + protected Map<String, String> mm2Props = new HashMap<>(); + private MirrorMakerConfig mm2Config; + private EmbeddedConnectCluster primary; + private EmbeddedConnectCluster backup; + + private final AtomicBoolean exited = new AtomicBoolean(false); + protected Properties primaryBrokerProps = new Properties(); + protected Properties backupBrokerProps = new Properties(); + protected Map<String, String> primaryWorkerProps = new HashMap<>(); + protected Map<String, String> backupWorkerProps = new HashMap<>(); + + @Before + public void startClusters() throws InterruptedException { + primaryBrokerProps.put("auto.create.topics.enable", "false"); + backupBrokerProps.put("auto.create.topics.enable", "false"); + + mm2Props.putAll(basicMM2Config()); + + mm2Config = new MirrorMakerConfig(mm2Props); + primaryWorkerProps = mm2Config.workerConfig(new SourceAndTarget(BACKUP_CLUSTER_ALIAS, PRIMARY_CLUSTER_ALIAS)); + backupWorkerProps.putAll(mm2Config.workerConfig(new SourceAndTarget(PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS))); + + primary = new EmbeddedConnectCluster.Builder() + .name(PRIMARY_CLUSTER_ALIAS + "-connect-cluster") + .numWorkers(NUM_WORKERS) + .numBrokers(1) + .brokerProps(primaryBrokerProps) + .workerProps(primaryWorkerProps) + .build(); + + backup = new EmbeddedConnectCluster.Builder() + .name(BACKUP_CLUSTER_ALIAS + "-connect-cluster") + .numWorkers(NUM_WORKERS) + .numBrokers(1) + .brokerProps(backupBrokerProps) + .workerProps(backupWorkerProps) + .build(); + + primary.start(); + primary.assertions().assertAtLeastNumWorkersAreUp(NUM_WORKERS, + "Workers of " + PRIMARY_CLUSTER_ALIAS + "-connect-cluster did not start in time."); + + backup.start(); + backup.assertions().assertAtLeastNumWorkersAreUp(NUM_WORKERS, + "Workers of " + BACKUP_CLUSTER_ALIAS + "-connect-cluster did not start in time."); + + createTopics(); + + warmUpConsumer(); + + log.info(PRIMARY_CLUSTER_ALIAS + " REST service: {}", primary.endpointForResource("connectors")); + log.info(BACKUP_CLUSTER_ALIAS + " REST service: {}", backup.endpointForResource("connectors")); + log.info(PRIMARY_CLUSTER_ALIAS + " brokers: {}", primary.kafka().bootstrapServers()); + log.info(BACKUP_CLUSTER_ALIAS + " brokers: {}", backup.kafka().bootstrapServers()); + + // now that the brokers are running, we can finish setting up the Connectors + mm2Props.put(PRIMARY_CLUSTER_ALIAS + ".bootstrap.servers", primary.kafka().bootstrapServers()); + mm2Props.put(BACKUP_CLUSTER_ALIAS + ".bootstrap.servers", backup.kafka().bootstrapServers()); + + Exit.setExitProcedure((status, errorCode) -> exited.set(true)); + } + + @After + public void shutdownClusters() throws Exception { + for (String x : primary.connectors()) { + primary.deleteConnector(x); + } + for (String x : backup.connectors()) { + backup.deleteConnector(x); + } + deleteAllTopics(primary.kafka()); + deleteAllTopics(backup.kafka()); + primary.stop(); + backup.stop(); + try { + assertFalse(exited.get()); + } finally { + Exit.resetExitProcedure(); + } + } + + @Test + public void testReplication() throws Exception { + produceMessages(primary, "test-topic-1"); + produceMessages(backup, "test-topic-1"); + String consumerGroupName = "consumer-group-testReplication"; + Map<String, Object> consumerProps = new HashMap<String, Object>() {{ + put("group.id", consumerGroupName); + put("auto.offset.reset", "latest"); + }}; + // create consumers before starting the connectors so we don't need to wait for discovery + waitForConsumingAllRecords(primary.kafka().createConsumerAndSubscribeTo(consumerProps, "test-topic-1"), 0); + waitForConsumingAllRecords(backup.kafka().createConsumerAndSubscribeTo(consumerProps, "test-topic-1"), 0); + + mm2Config = new MirrorMakerConfig(mm2Props); + + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + waitUntilMirrorMakerIsRunning(primary, CONNECTOR_LIST, mm2Config, BACKUP_CLUSTER_ALIAS, PRIMARY_CLUSTER_ALIAS); + + MirrorClient primaryClient = new MirrorClient(mm2Config.clientConfig(PRIMARY_CLUSTER_ALIAS)); + MirrorClient backupClient = new MirrorClient(mm2Config.clientConfig(BACKUP_CLUSTER_ALIAS)); + + assertEquals("topic config was not synced", TopicConfig.CLEANUP_POLICY_COMPACT, + getTopicConfig(backup.kafka(), "primary.test-topic-1", TopicConfig.CLEANUP_POLICY_CONFIG)); + + assertEquals("Records were not produced to primary cluster.", NUM_RECORDS_PRODUCED, + primary.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "test-topic-1").count()); + assertEquals("Records were not replicated to backup cluster.", NUM_RECORDS_PRODUCED, + backup.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "primary.test-topic-1").count()); + assertEquals("Records were not produced to backup cluster.", NUM_RECORDS_PRODUCED, + backup.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "test-topic-1").count()); + assertEquals("Records were not replicated to primary cluster.", NUM_RECORDS_PRODUCED, + primary.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "backup.test-topic-1").count()); + + assertEquals("Primary cluster doesn't have all records from both clusters.", NUM_RECORDS_PRODUCED * 2, + primary.kafka().consume(NUM_RECORDS_PRODUCED * 2, RECORD_TRANSFER_DURATION_MS, "backup.test-topic-1", "test-topic-1").count()); + assertEquals("Backup cluster doesn't have all records from both clusters.", NUM_RECORDS_PRODUCED * 2, + backup.kafka().consume(NUM_RECORDS_PRODUCED * 2, RECORD_TRANSFER_DURATION_MS, "primary.test-topic-1", "test-topic-1").count()); + + assertTrue("Heartbeats were not emitted to primary cluster.", primary.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "heartbeats").count() > 0); + assertTrue("Heartbeats were not emitted to backup cluster.", backup.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "heartbeats").count() > 0); + assertTrue("Heartbeats were not replicated downstream to backup cluster.", backup.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "primary.heartbeats").count() > 0); + assertTrue("Heartbeats were not replicated downstream to primary cluster.", primary.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "backup.heartbeats").count() > 0); + + assertTrue("Did not find upstream primary cluster.", backupClient.upstreamClusters().contains(PRIMARY_CLUSTER_ALIAS)); + assertEquals("Did not calculate replication hops correctly.", 1, backupClient.replicationHops(PRIMARY_CLUSTER_ALIAS)); + assertTrue("Did not find upstream backup cluster.", primaryClient.upstreamClusters().contains(BACKUP_CLUSTER_ALIAS)); + assertEquals("Did not calculate replication hops correctly.", 1, primaryClient.replicationHops(BACKUP_CLUSTER_ALIAS)); + assertTrue("Checkpoints were not emitted downstream to backup cluster.", backup.kafka().consume(1, + CHECKPOINT_DURATION_MS, "primary.checkpoints.internal").count() > 0); + + Map<TopicPartition, OffsetAndMetadata> backupOffsets = backupClient.remoteConsumerOffsets(consumerGroupName, PRIMARY_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)); + + assertTrue("Offsets not translated downstream to backup cluster. Found: " + backupOffsets, backupOffsets.containsKey( + new TopicPartition("primary.test-topic-1", 0))); + + // Failover consumer group to backup cluster. + try (Consumer<byte[], byte[]> primaryConsumer = backup.kafka().createConsumer(Collections.singletonMap("group.id", consumerGroupName))) { + primaryConsumer.assign(backupOffsets.keySet()); + backupOffsets.forEach(primaryConsumer::seek); + primaryConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + primaryConsumer.commitAsync(); + + assertTrue("Consumer failedover to zero offset.", primaryConsumer.position(new TopicPartition("primary.test-topic-1", 0)) > 0); + assertTrue("Consumer failedover beyond expected offset.", primaryConsumer.position( + new TopicPartition("primary.test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + assertTrue("Checkpoints were not emitted upstream to primary cluster.", primary.kafka().consume(1, + CHECKPOINT_DURATION_MS, "backup.checkpoints.internal").count() > 0); + } + + waitForCondition(() -> { + return primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)).containsKey(new TopicPartition("backup.test-topic-1", 0)); + }, CHECKPOINT_DURATION_MS, "Offsets not translated downstream to primary cluster."); + + waitForCondition(() -> { + return primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)).containsKey(new TopicPartition("test-topic-1", 0)); + }, CHECKPOINT_DURATION_MS, "Offsets not translated upstream to primary cluster."); + + Map<TopicPartition, OffsetAndMetadata> primaryOffsets = primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)); + + // Failback consumer group to primary cluster + try (Consumer<byte[], byte[]> backupConsumer = primary.kafka().createConsumer(Collections.singletonMap("group.id", consumerGroupName))) { + backupConsumer.assign(primaryOffsets.keySet()); + primaryOffsets.forEach(backupConsumer::seek); + backupConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + backupConsumer.commitAsync(); + + assertTrue("Consumer failedback to zero upstream offset.", backupConsumer.position(new TopicPartition("test-topic-1", 0)) > 0); + assertTrue("Consumer failedback to zero downstream offset.", backupConsumer.position(new TopicPartition("backup.test-topic-1", 0)) > 0); + assertTrue("Consumer failedback beyond expected upstream offset.", backupConsumer.position( + new TopicPartition("test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + assertTrue("Consumer failedback beyond expected downstream offset.", backupConsumer.position( + new TopicPartition("backup.test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + } + + // create more matching topics + primary.kafka().createTopic("test-topic-2", NUM_PARTITIONS); + backup.kafka().createTopic("test-topic-3", NUM_PARTITIONS); + + // only produce messages to the first partition + produceMessages(primary, "test-topic-2", 1); + produceMessages(backup, "test-topic-3", 1); + + // expect total consumed messages equals to NUM_RECORDS_PER_PARTITION + assertEquals("Records were not produced to primary cluster.", NUM_RECORDS_PER_PARTITION, + primary.kafka().consume(NUM_RECORDS_PER_PARTITION, RECORD_TRANSFER_DURATION_MS, "test-topic-2").count()); + assertEquals("Records were not produced to backup cluster.", NUM_RECORDS_PER_PARTITION, + backup.kafka().consume(NUM_RECORDS_PER_PARTITION, RECORD_TRANSFER_DURATION_MS, "test-topic-3").count()); + + assertEquals("New topic was not replicated to primary cluster.", NUM_RECORDS_PER_PARTITION, + primary.kafka().consume(NUM_RECORDS_PER_PARTITION, 2 * RECORD_TRANSFER_DURATION_MS, "backup.test-topic-3").count()); + assertEquals("New topic was not replicated to backup cluster.", NUM_RECORDS_PER_PARTITION, + backup.kafka().consume(NUM_RECORDS_PER_PARTITION, 2 * RECORD_TRANSFER_DURATION_MS, "primary.test-topic-2").count()); + + } + + @Test + public void testReplicationWithEmptyPartition() throws Exception { + String consumerGroupName = "consumer-group-testReplicationWithEmptyPartition"; + Map<String, Object> consumerProps = Collections.singletonMap("group.id", consumerGroupName); + + // create topic + String topic = "test-topic-with-empty-partition"; + primary.kafka().createTopic(topic, NUM_PARTITIONS); + + // produce to all test-topic-empty's partitions, except the last partition + produceMessages(primary, topic, NUM_PARTITIONS - 1); + + // consume before starting the connectors so we don't need to wait for discovery + int expectedRecords = NUM_RECORDS_PER_PARTITION * (NUM_PARTITIONS - 1); + try (Consumer<byte[], byte[]> primaryConsumer = primary.kafka().createConsumerAndSubscribeTo(consumerProps, topic)) { + waitForConsumingAllRecords(primaryConsumer, expectedRecords); + } + + // one way replication from primary to backup + mm2Props.put(BACKUP_CLUSTER_ALIAS + "->" + PRIMARY_CLUSTER_ALIAS + ".enabled", "false"); + mm2Config = new MirrorMakerConfig(mm2Props); + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + + // sleep few seconds to have MM2 finish replication so that "end" consumer will consume some record + Thread.sleep(TimeUnit.SECONDS.toMillis(3)); + + // consume all records from backup cluster + try (Consumer<byte[], byte[]> backupConsumer = backup.kafka().createConsumerAndSubscribeTo(consumerProps, + PRIMARY_CLUSTER_ALIAS + "." + topic)) { + waitForConsumingAllRecords(backupConsumer, expectedRecords); + } + + Admin backupClient = backup.kafka().createAdminClient(); + // retrieve the consumer group offset from backup cluster + Map<TopicPartition, OffsetAndMetadata> remoteOffsets = + backupClient.listConsumerGroupOffsets(consumerGroupName).partitionsToOffsetAndMetadata().get(); + // pinpoint the offset of the last partition which does not receive records + OffsetAndMetadata offset = remoteOffsets.get(new TopicPartition(PRIMARY_CLUSTER_ALIAS + "." + topic, NUM_PARTITIONS - 1)); + // offset of the last partition should exist, but its value should be 0 + assertNotNull("Offset of last partition was not replicated", offset); + assertEquals("Offset of last partition is not zero", 0, offset.offset()); + } + + @Test + public void testOneWayReplicationWithAutoOffsetSync() throws InterruptedException { + produceMessages(primary, "test-topic-1"); + String consumerGroupName = "consumer-group-testOneWayReplicationWithAutoOffsetSync"; + Map<String, Object> consumerProps = new HashMap<String, Object>() {{ + put("group.id", consumerGroupName); + put("auto.offset.reset", "earliest"); + }}; + // create consumers before starting the connectors so we don't need to wait for discovery + try (Consumer<byte[], byte[]> primaryConsumer = primary.kafka().createConsumerAndSubscribeTo(consumerProps, + "test-topic-1")) { + // we need to wait for consuming all the records for MM2 replicating the expected offsets + waitForConsumingAllRecords(primaryConsumer, NUM_RECORDS_PRODUCED); + } + + // enable automated consumer group offset sync + mm2Props.put("sync.group.offsets.enabled", "true"); + mm2Props.put("sync.group.offsets.interval.seconds", "1"); + // one way replication from primary to backup + mm2Props.put(BACKUP_CLUSTER_ALIAS + "->" + PRIMARY_CLUSTER_ALIAS + ".enabled", "false"); + + mm2Config = new MirrorMakerConfig(mm2Props); + + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + + // create a consumer at backup cluster with same consumer group Id to consume 1 topic + Consumer<byte[], byte[]> backupConsumer = backup.kafka().createConsumerAndSubscribeTo( + consumerProps, "primary.test-topic-1"); + + waitForConsumerGroupOffsetSync(backup, backupConsumer, Collections.singletonList("primary.test-topic-1"), + consumerGroupName, NUM_RECORDS_PRODUCED); + + ConsumerRecords<byte[], byte[]> records = backupConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + + // the size of consumer record should be zero, because the offsets of the same consumer group + // have been automatically synchronized from primary to backup by the background job, so no + // more records to consume from the replicated topic by the same consumer group at backup cluster + assertEquals("consumer record size is not zero", 0, records.count()); + + // now create a new topic in primary cluster + primary.kafka().createTopic("test-topic-2", NUM_PARTITIONS); + backup.kafka().createTopic("primary.test-topic-2", 1); + // produce some records to the new topic in primary cluster + produceMessages(primary, "test-topic-2"); + + // create a consumer at primary cluster to consume the new topic + try (Consumer<byte[], byte[]> consumer1 = primary.kafka().createConsumerAndSubscribeTo(Collections.singletonMap( + "group.id", "consumer-group-1"), "test-topic-2")) { + // we need to wait for consuming all the records for MM2 replicating the expected offsets + waitForConsumingAllRecords(consumer1, NUM_RECORDS_PRODUCED); + } + + // create a consumer at backup cluster with same consumer group Id to consume old and new topic + backupConsumer = backup.kafka().createConsumerAndSubscribeTo(Collections.singletonMap( + "group.id", consumerGroupName), "primary.test-topic-1", "primary.test-topic-2"); + + waitForConsumerGroupOffsetSync(backup, backupConsumer, Arrays.asList("primary.test-topic-1", "primary.test-topic-2"), + consumerGroupName, NUM_RECORDS_PRODUCED); + + records = backupConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + // similar reasoning as above, no more records to consume by the same consumer group at backup cluster + assertEquals("consumer record size is not zero", 0, records.count()); + backupConsumer.close(); + } + + /* + * launch the connectors on kafka connect cluster and check if they are running + */ + private static void waitUntilMirrorMakerIsRunning(EmbeddedConnectCluster connectCluster, + List<Class<? extends Connector>> connectorClasses, MirrorMakerConfig mm2Config, + String primary, String backup) throws InterruptedException { + for (int i = 0; i < connectorClasses.size(); i++) { + String connector = connectorClasses.get(i).getSimpleName(); + connectCluster.configureConnector(connector, mm2Config.connectorBaseConfig( + new SourceAndTarget(primary, backup), connectorClasses.get(i))); + } + + // we wait for the connector and tasks to come up for each connector, so that when we do the + // actual testing, we are certain that the tasks are up and running; this will prevent + // flaky tests where the connector and tasks didn't start up in time for the tests to be run + List<String> connectorNames = connectorClasses.stream().map(x -> x.getSimpleName()) + .collect(Collectors.toList()); + for (String connector : connectorNames) { + connectCluster.assertions().assertConnectorAndAtLeastNumTasksAreRunning(connector, 1, + "Connector " + connector + " tasks did not start in time on cluster: " + connectCluster); + } + } + + /* + * delete all topics of the input kafka cluster + */ + private static void deleteAllTopics(EmbeddedKafkaCluster cluster) throws Exception { + Admin client = cluster.createAdminClient(); + client.deleteTopics(client.listTopics().names().get()); + } + + /* + * retrieve the config value based on the input cluster, topic and config name + */ + private static String getTopicConfig(EmbeddedKafkaCluster cluster, String topic, String configName) + throws Exception { + Admin client = cluster.createAdminClient(); + Collection<ConfigResource> cr = Collections.singleton( + new ConfigResource(ConfigResource.Type.TOPIC, topic)); + + DescribeConfigsResult configsResult = client.describeConfigs(cr); + Config allConfigs = (Config) configsResult.all().get().values().toArray()[0]; Review comment: We can use `Config.get()` to directly access the configuration we want, see https://kafka.apache.org/26/javadoc/org/apache/kafka/clients/admin/Config.html#get-java.lang.String- ########## File path: connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/MirrorConnectorsIntegrationBaseTest.java ########## @@ -0,0 +1,577 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.connect.mirror.integration; + +import org.apache.kafka.clients.admin.Admin; +import org.apache.kafka.clients.admin.Config; +import org.apache.kafka.clients.admin.ConfigEntry; +import org.apache.kafka.clients.admin.DescribeConfigsResult; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.OffsetAndMetadata; +import org.apache.kafka.common.config.ConfigResource; +import org.apache.kafka.common.config.TopicConfig; +import org.apache.kafka.common.utils.Exit; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.connect.connector.Connector; +import org.apache.kafka.connect.mirror.MirrorClient; +import org.apache.kafka.connect.mirror.MirrorHeartbeatConnector; +import org.apache.kafka.connect.mirror.MirrorMakerConfig; +import org.apache.kafka.connect.mirror.MirrorSourceConnector; +import org.apache.kafka.connect.mirror.SourceAndTarget; +import org.apache.kafka.connect.mirror.MirrorCheckpointConnector; +import org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster; +import org.apache.kafka.connect.util.clusters.EmbeddedKafkaCluster; +import org.apache.kafka.test.IntegrationTest; +import static org.apache.kafka.test.TestUtils.waitForCondition; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Properties; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNotNull; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.After; +import org.junit.Before; + +import static org.apache.kafka.connect.mirror.TestUtils.generateRecords; + +/** + * Tests MM2 replication and failover/failback logic. + * + * MM2 is configured with active/active replication between two Kafka clusters. Tests validate that + * records sent to either cluster arrive at the other cluster. Then, a consumer group is migrated from + * one cluster to the other and back. Tests validate that consumer offsets are translated and replicated + * between clusters during this failover and failback. + */ +@Category(IntegrationTest.class) +public abstract class MirrorConnectorsIntegrationBaseTest { + private static final Logger log = LoggerFactory.getLogger(MirrorConnectorsIntegrationBaseTest.class); + + private static final int NUM_RECORDS_PER_PARTITION = 10; + private static final int NUM_PARTITIONS = 10; + private static final int NUM_RECORDS_PRODUCED = NUM_PARTITIONS * NUM_RECORDS_PER_PARTITION; + private static final int RECORD_TRANSFER_DURATION_MS = 30_000; + private static final int CHECKPOINT_DURATION_MS = 20_000; + private static final int RECORD_CONSUME_DURATION_MS = 20_000; + private static final int OFFSET_SYNC_DURATION_MS = 30_000; + private static final int NUM_WORKERS = 3; + private static final Duration CONSUMER_POLL_TIMEOUT_MS = Duration.ofMillis(500); + private static final int BROKER_RESTART_TIMEOUT_MS = 10_000; + protected static final String PRIMARY_CLUSTER_ALIAS = "primary"; + protected static final String BACKUP_CLUSTER_ALIAS = "backup"; + private static final List<Class<? extends Connector>> CONNECTOR_LIST = + Arrays.asList(MirrorSourceConnector.class, MirrorCheckpointConnector.class, MirrorHeartbeatConnector.class); + + protected Map<String, String> mm2Props = new HashMap<>(); + private MirrorMakerConfig mm2Config; + private EmbeddedConnectCluster primary; + private EmbeddedConnectCluster backup; + + private final AtomicBoolean exited = new AtomicBoolean(false); + protected Properties primaryBrokerProps = new Properties(); + protected Properties backupBrokerProps = new Properties(); + protected Map<String, String> primaryWorkerProps = new HashMap<>(); + protected Map<String, String> backupWorkerProps = new HashMap<>(); + + @Before + public void startClusters() throws InterruptedException { + primaryBrokerProps.put("auto.create.topics.enable", "false"); + backupBrokerProps.put("auto.create.topics.enable", "false"); + + mm2Props.putAll(basicMM2Config()); + + mm2Config = new MirrorMakerConfig(mm2Props); + primaryWorkerProps = mm2Config.workerConfig(new SourceAndTarget(BACKUP_CLUSTER_ALIAS, PRIMARY_CLUSTER_ALIAS)); + backupWorkerProps.putAll(mm2Config.workerConfig(new SourceAndTarget(PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS))); + + primary = new EmbeddedConnectCluster.Builder() + .name(PRIMARY_CLUSTER_ALIAS + "-connect-cluster") + .numWorkers(NUM_WORKERS) + .numBrokers(1) + .brokerProps(primaryBrokerProps) + .workerProps(primaryWorkerProps) + .build(); + + backup = new EmbeddedConnectCluster.Builder() + .name(BACKUP_CLUSTER_ALIAS + "-connect-cluster") + .numWorkers(NUM_WORKERS) + .numBrokers(1) + .brokerProps(backupBrokerProps) + .workerProps(backupWorkerProps) + .build(); + + primary.start(); + primary.assertions().assertAtLeastNumWorkersAreUp(NUM_WORKERS, + "Workers of " + PRIMARY_CLUSTER_ALIAS + "-connect-cluster did not start in time."); + + backup.start(); + backup.assertions().assertAtLeastNumWorkersAreUp(NUM_WORKERS, + "Workers of " + BACKUP_CLUSTER_ALIAS + "-connect-cluster did not start in time."); + + createTopics(); + + warmUpConsumer(); + + log.info(PRIMARY_CLUSTER_ALIAS + " REST service: {}", primary.endpointForResource("connectors")); + log.info(BACKUP_CLUSTER_ALIAS + " REST service: {}", backup.endpointForResource("connectors")); + log.info(PRIMARY_CLUSTER_ALIAS + " brokers: {}", primary.kafka().bootstrapServers()); + log.info(BACKUP_CLUSTER_ALIAS + " brokers: {}", backup.kafka().bootstrapServers()); + + // now that the brokers are running, we can finish setting up the Connectors + mm2Props.put(PRIMARY_CLUSTER_ALIAS + ".bootstrap.servers", primary.kafka().bootstrapServers()); + mm2Props.put(BACKUP_CLUSTER_ALIAS + ".bootstrap.servers", backup.kafka().bootstrapServers()); + + Exit.setExitProcedure((status, errorCode) -> exited.set(true)); + } + + @After + public void shutdownClusters() throws Exception { + for (String x : primary.connectors()) { + primary.deleteConnector(x); + } + for (String x : backup.connectors()) { + backup.deleteConnector(x); + } + deleteAllTopics(primary.kafka()); + deleteAllTopics(backup.kafka()); + primary.stop(); + backup.stop(); + try { + assertFalse(exited.get()); + } finally { + Exit.resetExitProcedure(); + } + } + + @Test + public void testReplication() throws Exception { + produceMessages(primary, "test-topic-1"); + produceMessages(backup, "test-topic-1"); + String consumerGroupName = "consumer-group-testReplication"; + Map<String, Object> consumerProps = new HashMap<String, Object>() {{ + put("group.id", consumerGroupName); + put("auto.offset.reset", "latest"); + }}; + // create consumers before starting the connectors so we don't need to wait for discovery + waitForConsumingAllRecords(primary.kafka().createConsumerAndSubscribeTo(consumerProps, "test-topic-1"), 0); + waitForConsumingAllRecords(backup.kafka().createConsumerAndSubscribeTo(consumerProps, "test-topic-1"), 0); + + mm2Config = new MirrorMakerConfig(mm2Props); + + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + waitUntilMirrorMakerIsRunning(primary, CONNECTOR_LIST, mm2Config, BACKUP_CLUSTER_ALIAS, PRIMARY_CLUSTER_ALIAS); + + MirrorClient primaryClient = new MirrorClient(mm2Config.clientConfig(PRIMARY_CLUSTER_ALIAS)); + MirrorClient backupClient = new MirrorClient(mm2Config.clientConfig(BACKUP_CLUSTER_ALIAS)); + + assertEquals("topic config was not synced", TopicConfig.CLEANUP_POLICY_COMPACT, + getTopicConfig(backup.kafka(), "primary.test-topic-1", TopicConfig.CLEANUP_POLICY_CONFIG)); + + assertEquals("Records were not produced to primary cluster.", NUM_RECORDS_PRODUCED, + primary.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "test-topic-1").count()); + assertEquals("Records were not replicated to backup cluster.", NUM_RECORDS_PRODUCED, + backup.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "primary.test-topic-1").count()); + assertEquals("Records were not produced to backup cluster.", NUM_RECORDS_PRODUCED, + backup.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "test-topic-1").count()); + assertEquals("Records were not replicated to primary cluster.", NUM_RECORDS_PRODUCED, + primary.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "backup.test-topic-1").count()); + + assertEquals("Primary cluster doesn't have all records from both clusters.", NUM_RECORDS_PRODUCED * 2, + primary.kafka().consume(NUM_RECORDS_PRODUCED * 2, RECORD_TRANSFER_DURATION_MS, "backup.test-topic-1", "test-topic-1").count()); + assertEquals("Backup cluster doesn't have all records from both clusters.", NUM_RECORDS_PRODUCED * 2, + backup.kafka().consume(NUM_RECORDS_PRODUCED * 2, RECORD_TRANSFER_DURATION_MS, "primary.test-topic-1", "test-topic-1").count()); + + assertTrue("Heartbeats were not emitted to primary cluster.", primary.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "heartbeats").count() > 0); + assertTrue("Heartbeats were not emitted to backup cluster.", backup.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "heartbeats").count() > 0); + assertTrue("Heartbeats were not replicated downstream to backup cluster.", backup.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "primary.heartbeats").count() > 0); + assertTrue("Heartbeats were not replicated downstream to primary cluster.", primary.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "backup.heartbeats").count() > 0); + + assertTrue("Did not find upstream primary cluster.", backupClient.upstreamClusters().contains(PRIMARY_CLUSTER_ALIAS)); + assertEquals("Did not calculate replication hops correctly.", 1, backupClient.replicationHops(PRIMARY_CLUSTER_ALIAS)); + assertTrue("Did not find upstream backup cluster.", primaryClient.upstreamClusters().contains(BACKUP_CLUSTER_ALIAS)); + assertEquals("Did not calculate replication hops correctly.", 1, primaryClient.replicationHops(BACKUP_CLUSTER_ALIAS)); + assertTrue("Checkpoints were not emitted downstream to backup cluster.", backup.kafka().consume(1, + CHECKPOINT_DURATION_MS, "primary.checkpoints.internal").count() > 0); + + Map<TopicPartition, OffsetAndMetadata> backupOffsets = backupClient.remoteConsumerOffsets(consumerGroupName, PRIMARY_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)); + + assertTrue("Offsets not translated downstream to backup cluster. Found: " + backupOffsets, backupOffsets.containsKey( + new TopicPartition("primary.test-topic-1", 0))); + + // Failover consumer group to backup cluster. + try (Consumer<byte[], byte[]> primaryConsumer = backup.kafka().createConsumer(Collections.singletonMap("group.id", consumerGroupName))) { + primaryConsumer.assign(backupOffsets.keySet()); + backupOffsets.forEach(primaryConsumer::seek); + primaryConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + primaryConsumer.commitAsync(); + + assertTrue("Consumer failedover to zero offset.", primaryConsumer.position(new TopicPartition("primary.test-topic-1", 0)) > 0); + assertTrue("Consumer failedover beyond expected offset.", primaryConsumer.position( + new TopicPartition("primary.test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + assertTrue("Checkpoints were not emitted upstream to primary cluster.", primary.kafka().consume(1, + CHECKPOINT_DURATION_MS, "backup.checkpoints.internal").count() > 0); + } + + waitForCondition(() -> { + return primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)).containsKey(new TopicPartition("backup.test-topic-1", 0)); + }, CHECKPOINT_DURATION_MS, "Offsets not translated downstream to primary cluster."); + + waitForCondition(() -> { + return primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)).containsKey(new TopicPartition("test-topic-1", 0)); + }, CHECKPOINT_DURATION_MS, "Offsets not translated upstream to primary cluster."); + + Map<TopicPartition, OffsetAndMetadata> primaryOffsets = primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)); + + // Failback consumer group to primary cluster + try (Consumer<byte[], byte[]> backupConsumer = primary.kafka().createConsumer(Collections.singletonMap("group.id", consumerGroupName))) { + backupConsumer.assign(primaryOffsets.keySet()); + primaryOffsets.forEach(backupConsumer::seek); + backupConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + backupConsumer.commitAsync(); + + assertTrue("Consumer failedback to zero upstream offset.", backupConsumer.position(new TopicPartition("test-topic-1", 0)) > 0); + assertTrue("Consumer failedback to zero downstream offset.", backupConsumer.position(new TopicPartition("backup.test-topic-1", 0)) > 0); + assertTrue("Consumer failedback beyond expected upstream offset.", backupConsumer.position( + new TopicPartition("test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + assertTrue("Consumer failedback beyond expected downstream offset.", backupConsumer.position( + new TopicPartition("backup.test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + } + + // create more matching topics + primary.kafka().createTopic("test-topic-2", NUM_PARTITIONS); + backup.kafka().createTopic("test-topic-3", NUM_PARTITIONS); + + // only produce messages to the first partition + produceMessages(primary, "test-topic-2", 1); + produceMessages(backup, "test-topic-3", 1); + + // expect total consumed messages equals to NUM_RECORDS_PER_PARTITION + assertEquals("Records were not produced to primary cluster.", NUM_RECORDS_PER_PARTITION, + primary.kafka().consume(NUM_RECORDS_PER_PARTITION, RECORD_TRANSFER_DURATION_MS, "test-topic-2").count()); + assertEquals("Records were not produced to backup cluster.", NUM_RECORDS_PER_PARTITION, + backup.kafka().consume(NUM_RECORDS_PER_PARTITION, RECORD_TRANSFER_DURATION_MS, "test-topic-3").count()); + + assertEquals("New topic was not replicated to primary cluster.", NUM_RECORDS_PER_PARTITION, + primary.kafka().consume(NUM_RECORDS_PER_PARTITION, 2 * RECORD_TRANSFER_DURATION_MS, "backup.test-topic-3").count()); + assertEquals("New topic was not replicated to backup cluster.", NUM_RECORDS_PER_PARTITION, + backup.kafka().consume(NUM_RECORDS_PER_PARTITION, 2 * RECORD_TRANSFER_DURATION_MS, "primary.test-topic-2").count()); + + } + + @Test + public void testReplicationWithEmptyPartition() throws Exception { + String consumerGroupName = "consumer-group-testReplicationWithEmptyPartition"; + Map<String, Object> consumerProps = Collections.singletonMap("group.id", consumerGroupName); + + // create topic + String topic = "test-topic-with-empty-partition"; + primary.kafka().createTopic(topic, NUM_PARTITIONS); + + // produce to all test-topic-empty's partitions, except the last partition + produceMessages(primary, topic, NUM_PARTITIONS - 1); + + // consume before starting the connectors so we don't need to wait for discovery + int expectedRecords = NUM_RECORDS_PER_PARTITION * (NUM_PARTITIONS - 1); + try (Consumer<byte[], byte[]> primaryConsumer = primary.kafka().createConsumerAndSubscribeTo(consumerProps, topic)) { + waitForConsumingAllRecords(primaryConsumer, expectedRecords); + } + + // one way replication from primary to backup + mm2Props.put(BACKUP_CLUSTER_ALIAS + "->" + PRIMARY_CLUSTER_ALIAS + ".enabled", "false"); + mm2Config = new MirrorMakerConfig(mm2Props); + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + + // sleep few seconds to have MM2 finish replication so that "end" consumer will consume some record + Thread.sleep(TimeUnit.SECONDS.toMillis(3)); + + // consume all records from backup cluster + try (Consumer<byte[], byte[]> backupConsumer = backup.kafka().createConsumerAndSubscribeTo(consumerProps, + PRIMARY_CLUSTER_ALIAS + "." + topic)) { + waitForConsumingAllRecords(backupConsumer, expectedRecords); + } + + Admin backupClient = backup.kafka().createAdminClient(); + // retrieve the consumer group offset from backup cluster + Map<TopicPartition, OffsetAndMetadata> remoteOffsets = + backupClient.listConsumerGroupOffsets(consumerGroupName).partitionsToOffsetAndMetadata().get(); + // pinpoint the offset of the last partition which does not receive records + OffsetAndMetadata offset = remoteOffsets.get(new TopicPartition(PRIMARY_CLUSTER_ALIAS + "." + topic, NUM_PARTITIONS - 1)); + // offset of the last partition should exist, but its value should be 0 + assertNotNull("Offset of last partition was not replicated", offset); + assertEquals("Offset of last partition is not zero", 0, offset.offset()); + } + + @Test + public void testOneWayReplicationWithAutoOffsetSync() throws InterruptedException { + produceMessages(primary, "test-topic-1"); + String consumerGroupName = "consumer-group-testOneWayReplicationWithAutoOffsetSync"; + Map<String, Object> consumerProps = new HashMap<String, Object>() {{ + put("group.id", consumerGroupName); + put("auto.offset.reset", "earliest"); + }}; + // create consumers before starting the connectors so we don't need to wait for discovery + try (Consumer<byte[], byte[]> primaryConsumer = primary.kafka().createConsumerAndSubscribeTo(consumerProps, + "test-topic-1")) { + // we need to wait for consuming all the records for MM2 replicating the expected offsets + waitForConsumingAllRecords(primaryConsumer, NUM_RECORDS_PRODUCED); + } + + // enable automated consumer group offset sync + mm2Props.put("sync.group.offsets.enabled", "true"); + mm2Props.put("sync.group.offsets.interval.seconds", "1"); + // one way replication from primary to backup + mm2Props.put(BACKUP_CLUSTER_ALIAS + "->" + PRIMARY_CLUSTER_ALIAS + ".enabled", "false"); + + mm2Config = new MirrorMakerConfig(mm2Props); + + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + + // create a consumer at backup cluster with same consumer group Id to consume 1 topic + Consumer<byte[], byte[]> backupConsumer = backup.kafka().createConsumerAndSubscribeTo( + consumerProps, "primary.test-topic-1"); + + waitForConsumerGroupOffsetSync(backup, backupConsumer, Collections.singletonList("primary.test-topic-1"), + consumerGroupName, NUM_RECORDS_PRODUCED); + + ConsumerRecords<byte[], byte[]> records = backupConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + + // the size of consumer record should be zero, because the offsets of the same consumer group + // have been automatically synchronized from primary to backup by the background job, so no + // more records to consume from the replicated topic by the same consumer group at backup cluster + assertEquals("consumer record size is not zero", 0, records.count()); + + // now create a new topic in primary cluster + primary.kafka().createTopic("test-topic-2", NUM_PARTITIONS); + backup.kafka().createTopic("primary.test-topic-2", 1); + // produce some records to the new topic in primary cluster + produceMessages(primary, "test-topic-2"); + + // create a consumer at primary cluster to consume the new topic + try (Consumer<byte[], byte[]> consumer1 = primary.kafka().createConsumerAndSubscribeTo(Collections.singletonMap( + "group.id", "consumer-group-1"), "test-topic-2")) { + // we need to wait for consuming all the records for MM2 replicating the expected offsets + waitForConsumingAllRecords(consumer1, NUM_RECORDS_PRODUCED); + } + + // create a consumer at backup cluster with same consumer group Id to consume old and new topic + backupConsumer = backup.kafka().createConsumerAndSubscribeTo(Collections.singletonMap( + "group.id", consumerGroupName), "primary.test-topic-1", "primary.test-topic-2"); + + waitForConsumerGroupOffsetSync(backup, backupConsumer, Arrays.asList("primary.test-topic-1", "primary.test-topic-2"), + consumerGroupName, NUM_RECORDS_PRODUCED); + + records = backupConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + // similar reasoning as above, no more records to consume by the same consumer group at backup cluster + assertEquals("consumer record size is not zero", 0, records.count()); + backupConsumer.close(); + } + + /* + * launch the connectors on kafka connect cluster and check if they are running + */ + private static void waitUntilMirrorMakerIsRunning(EmbeddedConnectCluster connectCluster, + List<Class<? extends Connector>> connectorClasses, MirrorMakerConfig mm2Config, + String primary, String backup) throws InterruptedException { + for (int i = 0; i < connectorClasses.size(); i++) { + String connector = connectorClasses.get(i).getSimpleName(); + connectCluster.configureConnector(connector, mm2Config.connectorBaseConfig( + new SourceAndTarget(primary, backup), connectorClasses.get(i))); + } + + // we wait for the connector and tasks to come up for each connector, so that when we do the + // actual testing, we are certain that the tasks are up and running; this will prevent + // flaky tests where the connector and tasks didn't start up in time for the tests to be run + List<String> connectorNames = connectorClasses.stream().map(x -> x.getSimpleName()) Review comment: This intermediate `List` is not really useful. We could just change the loop below to iterate over the connector classes and call `getSimpleName()` on each of them ########## File path: connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/MirrorConnectorsIntegrationBaseTest.java ########## @@ -0,0 +1,577 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.connect.mirror.integration; + +import org.apache.kafka.clients.admin.Admin; +import org.apache.kafka.clients.admin.Config; +import org.apache.kafka.clients.admin.ConfigEntry; +import org.apache.kafka.clients.admin.DescribeConfigsResult; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.OffsetAndMetadata; +import org.apache.kafka.common.config.ConfigResource; +import org.apache.kafka.common.config.TopicConfig; +import org.apache.kafka.common.utils.Exit; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.connect.connector.Connector; +import org.apache.kafka.connect.mirror.MirrorClient; +import org.apache.kafka.connect.mirror.MirrorHeartbeatConnector; +import org.apache.kafka.connect.mirror.MirrorMakerConfig; +import org.apache.kafka.connect.mirror.MirrorSourceConnector; +import org.apache.kafka.connect.mirror.SourceAndTarget; +import org.apache.kafka.connect.mirror.MirrorCheckpointConnector; +import org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster; +import org.apache.kafka.connect.util.clusters.EmbeddedKafkaCluster; +import org.apache.kafka.test.IntegrationTest; +import static org.apache.kafka.test.TestUtils.waitForCondition; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Properties; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNotNull; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.After; +import org.junit.Before; + +import static org.apache.kafka.connect.mirror.TestUtils.generateRecords; + +/** + * Tests MM2 replication and failover/failback logic. + * + * MM2 is configured with active/active replication between two Kafka clusters. Tests validate that + * records sent to either cluster arrive at the other cluster. Then, a consumer group is migrated from + * one cluster to the other and back. Tests validate that consumer offsets are translated and replicated + * between clusters during this failover and failback. + */ +@Category(IntegrationTest.class) +public abstract class MirrorConnectorsIntegrationBaseTest { + private static final Logger log = LoggerFactory.getLogger(MirrorConnectorsIntegrationBaseTest.class); + + private static final int NUM_RECORDS_PER_PARTITION = 10; + private static final int NUM_PARTITIONS = 10; + private static final int NUM_RECORDS_PRODUCED = NUM_PARTITIONS * NUM_RECORDS_PER_PARTITION; + private static final int RECORD_TRANSFER_DURATION_MS = 30_000; + private static final int CHECKPOINT_DURATION_MS = 20_000; + private static final int RECORD_CONSUME_DURATION_MS = 20_000; + private static final int OFFSET_SYNC_DURATION_MS = 30_000; + private static final int NUM_WORKERS = 3; + private static final Duration CONSUMER_POLL_TIMEOUT_MS = Duration.ofMillis(500); + private static final int BROKER_RESTART_TIMEOUT_MS = 10_000; + protected static final String PRIMARY_CLUSTER_ALIAS = "primary"; + protected static final String BACKUP_CLUSTER_ALIAS = "backup"; + private static final List<Class<? extends Connector>> CONNECTOR_LIST = + Arrays.asList(MirrorSourceConnector.class, MirrorCheckpointConnector.class, MirrorHeartbeatConnector.class); + + protected Map<String, String> mm2Props = new HashMap<>(); + private MirrorMakerConfig mm2Config; + private EmbeddedConnectCluster primary; + private EmbeddedConnectCluster backup; + + private final AtomicBoolean exited = new AtomicBoolean(false); + protected Properties primaryBrokerProps = new Properties(); + protected Properties backupBrokerProps = new Properties(); + protected Map<String, String> primaryWorkerProps = new HashMap<>(); + protected Map<String, String> backupWorkerProps = new HashMap<>(); + + @Before + public void startClusters() throws InterruptedException { + primaryBrokerProps.put("auto.create.topics.enable", "false"); + backupBrokerProps.put("auto.create.topics.enable", "false"); + + mm2Props.putAll(basicMM2Config()); + + mm2Config = new MirrorMakerConfig(mm2Props); + primaryWorkerProps = mm2Config.workerConfig(new SourceAndTarget(BACKUP_CLUSTER_ALIAS, PRIMARY_CLUSTER_ALIAS)); + backupWorkerProps.putAll(mm2Config.workerConfig(new SourceAndTarget(PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS))); + + primary = new EmbeddedConnectCluster.Builder() + .name(PRIMARY_CLUSTER_ALIAS + "-connect-cluster") + .numWorkers(NUM_WORKERS) + .numBrokers(1) + .brokerProps(primaryBrokerProps) + .workerProps(primaryWorkerProps) + .build(); + + backup = new EmbeddedConnectCluster.Builder() + .name(BACKUP_CLUSTER_ALIAS + "-connect-cluster") + .numWorkers(NUM_WORKERS) + .numBrokers(1) + .brokerProps(backupBrokerProps) + .workerProps(backupWorkerProps) + .build(); + + primary.start(); + primary.assertions().assertAtLeastNumWorkersAreUp(NUM_WORKERS, + "Workers of " + PRIMARY_CLUSTER_ALIAS + "-connect-cluster did not start in time."); + + backup.start(); + backup.assertions().assertAtLeastNumWorkersAreUp(NUM_WORKERS, + "Workers of " + BACKUP_CLUSTER_ALIAS + "-connect-cluster did not start in time."); + + createTopics(); + + warmUpConsumer(); + + log.info(PRIMARY_CLUSTER_ALIAS + " REST service: {}", primary.endpointForResource("connectors")); + log.info(BACKUP_CLUSTER_ALIAS + " REST service: {}", backup.endpointForResource("connectors")); + log.info(PRIMARY_CLUSTER_ALIAS + " brokers: {}", primary.kafka().bootstrapServers()); + log.info(BACKUP_CLUSTER_ALIAS + " brokers: {}", backup.kafka().bootstrapServers()); + + // now that the brokers are running, we can finish setting up the Connectors + mm2Props.put(PRIMARY_CLUSTER_ALIAS + ".bootstrap.servers", primary.kafka().bootstrapServers()); + mm2Props.put(BACKUP_CLUSTER_ALIAS + ".bootstrap.servers", backup.kafka().bootstrapServers()); + + Exit.setExitProcedure((status, errorCode) -> exited.set(true)); + } + + @After + public void shutdownClusters() throws Exception { + for (String x : primary.connectors()) { + primary.deleteConnector(x); + } + for (String x : backup.connectors()) { + backup.deleteConnector(x); + } + deleteAllTopics(primary.kafka()); + deleteAllTopics(backup.kafka()); + primary.stop(); + backup.stop(); + try { + assertFalse(exited.get()); + } finally { + Exit.resetExitProcedure(); + } + } + + @Test + public void testReplication() throws Exception { + produceMessages(primary, "test-topic-1"); + produceMessages(backup, "test-topic-1"); + String consumerGroupName = "consumer-group-testReplication"; + Map<String, Object> consumerProps = new HashMap<String, Object>() {{ + put("group.id", consumerGroupName); + put("auto.offset.reset", "latest"); + }}; + // create consumers before starting the connectors so we don't need to wait for discovery + waitForConsumingAllRecords(primary.kafka().createConsumerAndSubscribeTo(consumerProps, "test-topic-1"), 0); + waitForConsumingAllRecords(backup.kafka().createConsumerAndSubscribeTo(consumerProps, "test-topic-1"), 0); + + mm2Config = new MirrorMakerConfig(mm2Props); + + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + waitUntilMirrorMakerIsRunning(primary, CONNECTOR_LIST, mm2Config, BACKUP_CLUSTER_ALIAS, PRIMARY_CLUSTER_ALIAS); + + MirrorClient primaryClient = new MirrorClient(mm2Config.clientConfig(PRIMARY_CLUSTER_ALIAS)); + MirrorClient backupClient = new MirrorClient(mm2Config.clientConfig(BACKUP_CLUSTER_ALIAS)); + + assertEquals("topic config was not synced", TopicConfig.CLEANUP_POLICY_COMPACT, + getTopicConfig(backup.kafka(), "primary.test-topic-1", TopicConfig.CLEANUP_POLICY_CONFIG)); + + assertEquals("Records were not produced to primary cluster.", NUM_RECORDS_PRODUCED, + primary.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "test-topic-1").count()); + assertEquals("Records were not replicated to backup cluster.", NUM_RECORDS_PRODUCED, + backup.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "primary.test-topic-1").count()); + assertEquals("Records were not produced to backup cluster.", NUM_RECORDS_PRODUCED, + backup.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "test-topic-1").count()); + assertEquals("Records were not replicated to primary cluster.", NUM_RECORDS_PRODUCED, + primary.kafka().consume(NUM_RECORDS_PRODUCED, RECORD_TRANSFER_DURATION_MS, "backup.test-topic-1").count()); + + assertEquals("Primary cluster doesn't have all records from both clusters.", NUM_RECORDS_PRODUCED * 2, + primary.kafka().consume(NUM_RECORDS_PRODUCED * 2, RECORD_TRANSFER_DURATION_MS, "backup.test-topic-1", "test-topic-1").count()); + assertEquals("Backup cluster doesn't have all records from both clusters.", NUM_RECORDS_PRODUCED * 2, + backup.kafka().consume(NUM_RECORDS_PRODUCED * 2, RECORD_TRANSFER_DURATION_MS, "primary.test-topic-1", "test-topic-1").count()); + + assertTrue("Heartbeats were not emitted to primary cluster.", primary.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "heartbeats").count() > 0); + assertTrue("Heartbeats were not emitted to backup cluster.", backup.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "heartbeats").count() > 0); + assertTrue("Heartbeats were not replicated downstream to backup cluster.", backup.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "primary.heartbeats").count() > 0); + assertTrue("Heartbeats were not replicated downstream to primary cluster.", primary.kafka().consume(1, + RECORD_TRANSFER_DURATION_MS, "backup.heartbeats").count() > 0); + + assertTrue("Did not find upstream primary cluster.", backupClient.upstreamClusters().contains(PRIMARY_CLUSTER_ALIAS)); + assertEquals("Did not calculate replication hops correctly.", 1, backupClient.replicationHops(PRIMARY_CLUSTER_ALIAS)); + assertTrue("Did not find upstream backup cluster.", primaryClient.upstreamClusters().contains(BACKUP_CLUSTER_ALIAS)); + assertEquals("Did not calculate replication hops correctly.", 1, primaryClient.replicationHops(BACKUP_CLUSTER_ALIAS)); + assertTrue("Checkpoints were not emitted downstream to backup cluster.", backup.kafka().consume(1, + CHECKPOINT_DURATION_MS, "primary.checkpoints.internal").count() > 0); + + Map<TopicPartition, OffsetAndMetadata> backupOffsets = backupClient.remoteConsumerOffsets(consumerGroupName, PRIMARY_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)); + + assertTrue("Offsets not translated downstream to backup cluster. Found: " + backupOffsets, backupOffsets.containsKey( + new TopicPartition("primary.test-topic-1", 0))); + + // Failover consumer group to backup cluster. + try (Consumer<byte[], byte[]> primaryConsumer = backup.kafka().createConsumer(Collections.singletonMap("group.id", consumerGroupName))) { + primaryConsumer.assign(backupOffsets.keySet()); + backupOffsets.forEach(primaryConsumer::seek); + primaryConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + primaryConsumer.commitAsync(); + + assertTrue("Consumer failedover to zero offset.", primaryConsumer.position(new TopicPartition("primary.test-topic-1", 0)) > 0); + assertTrue("Consumer failedover beyond expected offset.", primaryConsumer.position( + new TopicPartition("primary.test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + assertTrue("Checkpoints were not emitted upstream to primary cluster.", primary.kafka().consume(1, + CHECKPOINT_DURATION_MS, "backup.checkpoints.internal").count() > 0); + } + + waitForCondition(() -> { + return primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)).containsKey(new TopicPartition("backup.test-topic-1", 0)); + }, CHECKPOINT_DURATION_MS, "Offsets not translated downstream to primary cluster."); + + waitForCondition(() -> { + return primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)).containsKey(new TopicPartition("test-topic-1", 0)); + }, CHECKPOINT_DURATION_MS, "Offsets not translated upstream to primary cluster."); + + Map<TopicPartition, OffsetAndMetadata> primaryOffsets = primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS, + Duration.ofMillis(CHECKPOINT_DURATION_MS)); + + // Failback consumer group to primary cluster + try (Consumer<byte[], byte[]> backupConsumer = primary.kafka().createConsumer(Collections.singletonMap("group.id", consumerGroupName))) { + backupConsumer.assign(primaryOffsets.keySet()); + primaryOffsets.forEach(backupConsumer::seek); + backupConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + backupConsumer.commitAsync(); + + assertTrue("Consumer failedback to zero upstream offset.", backupConsumer.position(new TopicPartition("test-topic-1", 0)) > 0); + assertTrue("Consumer failedback to zero downstream offset.", backupConsumer.position(new TopicPartition("backup.test-topic-1", 0)) > 0); + assertTrue("Consumer failedback beyond expected upstream offset.", backupConsumer.position( + new TopicPartition("test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + assertTrue("Consumer failedback beyond expected downstream offset.", backupConsumer.position( + new TopicPartition("backup.test-topic-1", 0)) <= NUM_RECORDS_PRODUCED); + } + + // create more matching topics + primary.kafka().createTopic("test-topic-2", NUM_PARTITIONS); + backup.kafka().createTopic("test-topic-3", NUM_PARTITIONS); + + // only produce messages to the first partition + produceMessages(primary, "test-topic-2", 1); + produceMessages(backup, "test-topic-3", 1); + + // expect total consumed messages equals to NUM_RECORDS_PER_PARTITION + assertEquals("Records were not produced to primary cluster.", NUM_RECORDS_PER_PARTITION, + primary.kafka().consume(NUM_RECORDS_PER_PARTITION, RECORD_TRANSFER_DURATION_MS, "test-topic-2").count()); + assertEquals("Records were not produced to backup cluster.", NUM_RECORDS_PER_PARTITION, + backup.kafka().consume(NUM_RECORDS_PER_PARTITION, RECORD_TRANSFER_DURATION_MS, "test-topic-3").count()); + + assertEquals("New topic was not replicated to primary cluster.", NUM_RECORDS_PER_PARTITION, + primary.kafka().consume(NUM_RECORDS_PER_PARTITION, 2 * RECORD_TRANSFER_DURATION_MS, "backup.test-topic-3").count()); + assertEquals("New topic was not replicated to backup cluster.", NUM_RECORDS_PER_PARTITION, + backup.kafka().consume(NUM_RECORDS_PER_PARTITION, 2 * RECORD_TRANSFER_DURATION_MS, "primary.test-topic-2").count()); + + } + + @Test + public void testReplicationWithEmptyPartition() throws Exception { + String consumerGroupName = "consumer-group-testReplicationWithEmptyPartition"; + Map<String, Object> consumerProps = Collections.singletonMap("group.id", consumerGroupName); + + // create topic + String topic = "test-topic-with-empty-partition"; + primary.kafka().createTopic(topic, NUM_PARTITIONS); + + // produce to all test-topic-empty's partitions, except the last partition + produceMessages(primary, topic, NUM_PARTITIONS - 1); + + // consume before starting the connectors so we don't need to wait for discovery + int expectedRecords = NUM_RECORDS_PER_PARTITION * (NUM_PARTITIONS - 1); + try (Consumer<byte[], byte[]> primaryConsumer = primary.kafka().createConsumerAndSubscribeTo(consumerProps, topic)) { + waitForConsumingAllRecords(primaryConsumer, expectedRecords); + } + + // one way replication from primary to backup + mm2Props.put(BACKUP_CLUSTER_ALIAS + "->" + PRIMARY_CLUSTER_ALIAS + ".enabled", "false"); + mm2Config = new MirrorMakerConfig(mm2Props); + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + + // sleep few seconds to have MM2 finish replication so that "end" consumer will consume some record + Thread.sleep(TimeUnit.SECONDS.toMillis(3)); + + // consume all records from backup cluster + try (Consumer<byte[], byte[]> backupConsumer = backup.kafka().createConsumerAndSubscribeTo(consumerProps, + PRIMARY_CLUSTER_ALIAS + "." + topic)) { + waitForConsumingAllRecords(backupConsumer, expectedRecords); + } + + Admin backupClient = backup.kafka().createAdminClient(); + // retrieve the consumer group offset from backup cluster + Map<TopicPartition, OffsetAndMetadata> remoteOffsets = + backupClient.listConsumerGroupOffsets(consumerGroupName).partitionsToOffsetAndMetadata().get(); + // pinpoint the offset of the last partition which does not receive records + OffsetAndMetadata offset = remoteOffsets.get(new TopicPartition(PRIMARY_CLUSTER_ALIAS + "." + topic, NUM_PARTITIONS - 1)); + // offset of the last partition should exist, but its value should be 0 + assertNotNull("Offset of last partition was not replicated", offset); + assertEquals("Offset of last partition is not zero", 0, offset.offset()); + } + + @Test + public void testOneWayReplicationWithAutoOffsetSync() throws InterruptedException { + produceMessages(primary, "test-topic-1"); + String consumerGroupName = "consumer-group-testOneWayReplicationWithAutoOffsetSync"; + Map<String, Object> consumerProps = new HashMap<String, Object>() {{ + put("group.id", consumerGroupName); + put("auto.offset.reset", "earliest"); + }}; + // create consumers before starting the connectors so we don't need to wait for discovery + try (Consumer<byte[], byte[]> primaryConsumer = primary.kafka().createConsumerAndSubscribeTo(consumerProps, + "test-topic-1")) { + // we need to wait for consuming all the records for MM2 replicating the expected offsets + waitForConsumingAllRecords(primaryConsumer, NUM_RECORDS_PRODUCED); + } + + // enable automated consumer group offset sync + mm2Props.put("sync.group.offsets.enabled", "true"); + mm2Props.put("sync.group.offsets.interval.seconds", "1"); + // one way replication from primary to backup + mm2Props.put(BACKUP_CLUSTER_ALIAS + "->" + PRIMARY_CLUSTER_ALIAS + ".enabled", "false"); + + mm2Config = new MirrorMakerConfig(mm2Props); + + waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS); + + // create a consumer at backup cluster with same consumer group Id to consume 1 topic + Consumer<byte[], byte[]> backupConsumer = backup.kafka().createConsumerAndSubscribeTo( + consumerProps, "primary.test-topic-1"); + + waitForConsumerGroupOffsetSync(backup, backupConsumer, Collections.singletonList("primary.test-topic-1"), + consumerGroupName, NUM_RECORDS_PRODUCED); + + ConsumerRecords<byte[], byte[]> records = backupConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + + // the size of consumer record should be zero, because the offsets of the same consumer group + // have been automatically synchronized from primary to backup by the background job, so no + // more records to consume from the replicated topic by the same consumer group at backup cluster + assertEquals("consumer record size is not zero", 0, records.count()); + + // now create a new topic in primary cluster + primary.kafka().createTopic("test-topic-2", NUM_PARTITIONS); + backup.kafka().createTopic("primary.test-topic-2", 1); + // produce some records to the new topic in primary cluster + produceMessages(primary, "test-topic-2"); + + // create a consumer at primary cluster to consume the new topic + try (Consumer<byte[], byte[]> consumer1 = primary.kafka().createConsumerAndSubscribeTo(Collections.singletonMap( + "group.id", "consumer-group-1"), "test-topic-2")) { + // we need to wait for consuming all the records for MM2 replicating the expected offsets + waitForConsumingAllRecords(consumer1, NUM_RECORDS_PRODUCED); + } + + // create a consumer at backup cluster with same consumer group Id to consume old and new topic + backupConsumer = backup.kafka().createConsumerAndSubscribeTo(Collections.singletonMap( + "group.id", consumerGroupName), "primary.test-topic-1", "primary.test-topic-2"); + + waitForConsumerGroupOffsetSync(backup, backupConsumer, Arrays.asList("primary.test-topic-1", "primary.test-topic-2"), + consumerGroupName, NUM_RECORDS_PRODUCED); + + records = backupConsumer.poll(CONSUMER_POLL_TIMEOUT_MS); + // similar reasoning as above, no more records to consume by the same consumer group at backup cluster + assertEquals("consumer record size is not zero", 0, records.count()); + backupConsumer.close(); + } + + /* + * launch the connectors on kafka connect cluster and check if they are running + */ + private static void waitUntilMirrorMakerIsRunning(EmbeddedConnectCluster connectCluster, + List<Class<? extends Connector>> connectorClasses, MirrorMakerConfig mm2Config, + String primary, String backup) throws InterruptedException { + for (int i = 0; i < connectorClasses.size(); i++) { + String connector = connectorClasses.get(i).getSimpleName(); + connectCluster.configureConnector(connector, mm2Config.connectorBaseConfig( + new SourceAndTarget(primary, backup), connectorClasses.get(i))); + } + + // we wait for the connector and tasks to come up for each connector, so that when we do the + // actual testing, we are certain that the tasks are up and running; this will prevent + // flaky tests where the connector and tasks didn't start up in time for the tests to be run + List<String> connectorNames = connectorClasses.stream().map(x -> x.getSimpleName()) Review comment: I'm assuming it's worth keeping this 2nd loop separate from the first one for performance reasons. At first glance it looks strange to iterate over the same collection twice in a row ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org