[ https://issues.apache.org/jira/browse/CASSANDRA-15160?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17193900#comment-17193900 ]
David Capwell commented on CASSANDRA-15160: ------------------------------------------- Finished testing. What I see is that with the flag you can get the same behavior as 2.1 and without the flag (default) we keep the current behavior. LGTM +1. It would be nice to add the tests below, as it tests the different conditions where the neighbors can be empty, also it would be nice if the Python dtests could be jvm dtests but I think we need https://issues.apache.org/jira/browse/CASSANDRA-16120 merged/released before we could so the Python dtests are fine by me. {code} package org.apache.cassandra.distributed.test; import java.io.IOException; import org.junit.Test; import org.apache.cassandra.distributed.Cluster; import org.apache.cassandra.distributed.api.ConsistencyLevel; import org.apache.cassandra.distributed.api.IInvokableInstance; import org.apache.cassandra.distributed.shared.Versions; import org.assertj.core.api.Assertions; public class RepairFilteringTest extends TestBaseImpl { private static final Versions VERSIONS = Versions.find(); // private static final Versions.Version VERSION = VERSIONS.getLatest(Versions.Major.v22); private static final Versions.Version VERSION = VERSIONS.getLatest(Versions.Major.v4); @Test public void dcFilterOnEmptyDC() throws IOException { try (Cluster cluster = Cluster.build().withVersion(VERSION).withRacks(2, 1, 2).start()) { // 1-2 : datacenter1 // 3-4 : datacenter2 cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}"); cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int PRIMARY KEY, i int)"); for (int i = 0; i < 10; i++) cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + ".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i); cluster.forEach(i -> i.flush(KEYSPACE)); // choose a node in the DC that doesn't have any replicas IInvokableInstance node = cluster.get(3); Assertions.assertThat(node.config().localDatacenter()).isEqualTo("datacenter2"); // fails with "the local data center must be part of the repair" node.nodetoolResult("repair", "-full", "-dc", "datacenter1", "-dc", "datacenter2", "--ignore-unreplicated-keyspaces", "-st", "0", "-et", "1000", KEYSPACE, "tbl") .asserts().success(); } } @Test public void hostFilterDifferentDC() throws IOException { try (Cluster cluster = Cluster.build().withVersion(VERSION).withRacks(2, 1, 2).start()) { // 1-2 : datacenter1 // 3-4 : datacenter2 cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}"); cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int PRIMARY KEY, i int)"); for (int i = 0; i < 10; i++) cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + ".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i); cluster.forEach(i -> i.flush(KEYSPACE)); // choose a node in the DC that doesn't have any replicas IInvokableInstance node = cluster.get(3); Assertions.assertThat(node.config().localDatacenter()).isEqualTo("datacenter2"); // fails with "Specified hosts [127.0.0.3, 127.0.0.1] do not share range (0,1000] needed for repair. Either restrict repair ranges with -st/-et options, or specify one of the neighbors that share this range with this node: [].. Check the logs on the repair participants for further details" node.nodetoolResult("repair", "-full", "-hosts", cluster.get(1).broadcastAddress().getAddress().getHostAddress(), "-hosts", node.broadcastAddress().getAddress().getHostAddress(), "--ignore-unreplicated-keyspaces", "-st", "0", "-et", "1000", KEYSPACE, "tbl") .asserts().success(); } } @Test public void emptyDC() throws IOException { try (Cluster cluster = Cluster.build().withVersion(VERSION).withRacks(2, 1, 2).start()) { // 1-2 : datacenter1 // 3-4 : datacenter2 cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}"); cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int PRIMARY KEY, i int)"); for (int i = 0; i < 10; i++) cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + ".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i); cluster.forEach(i -> i.flush(KEYSPACE)); // choose a node in the DC that doesn't have any replicas IInvokableInstance node = cluster.get(3); Assertions.assertThat(node.config().localDatacenter()).isEqualTo("datacenter2"); // fails with [2020-09-10 11:30:04,139] Repair command #1 failed with error Nothing to repair for (0,1000] in distributed_test_keyspace - aborting. Check the logs on the repair participants for further details node.nodetoolResult("repair", "-full", "--ignore-unreplicated-keyspaces", "-st", "0", "-et", "1000", KEYSPACE, "tbl") .asserts().success(); } } @Test public void mainDC() throws IOException { try (Cluster cluster = Cluster.build().withVersion(VERSION).withRacks(2, 1, 2).start()) { // 1-2 : datacenter1 // 3-4 : datacenter2 cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}"); cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int PRIMARY KEY, i int)"); for (int i = 0; i < 10; i++) cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + ".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i); cluster.forEach(i -> i.flush(KEYSPACE)); // choose a node in the DC that doesn't have any replicas IInvokableInstance node = cluster.get(1); Assertions.assertThat(node.config().localDatacenter()).isEqualTo("datacenter1"); node.nodetoolResult("repair", "-full", "--ignore-unreplicated-keyspaces", "-st", "0", "-et", "1000", KEYSPACE, "tbl") .asserts().success(); } } } {code} > Add flag to ignore unreplicated keyspaces during repair > ------------------------------------------------------- > > Key: CASSANDRA-15160 > URL: https://issues.apache.org/jira/browse/CASSANDRA-15160 > Project: Cassandra > Issue Type: Improvement > Components: Consistency/Repair > Reporter: Marcus Eriksson > Assignee: Marcus Eriksson > Priority: Normal > > When a repair is triggered on a node in 'dc2' for a keyspace with replication > factor {'dc1':3, 'dc2':0} we just ignore the repair in versions < 4. In 4.0 > we fail the repair to make sure the operator does not think the keyspace is > fully repaired. > There might be tooling that relies on the old behaviour though, so we should > add a flag to ignore those unreplicated keyspaces > -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org