[ https://issues.apache.org/jira/browse/CASSANDRA-18792?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17765556#comment-17765556 ]
Brandon Williams edited comment on CASSANDRA-18792 at 9/15/23 10:59 AM: ------------------------------------------------------------------------ [Here|https://app.circleci.com/pipelines/github/driftx/cassandra/1269/workflows/d3c9e9e5-cb6b-43fd-84cd-0487d421f7a3/jobs/51409] is another run that confirms it wasn't just luck. Created CASSANDRA-18854 to handle this. was (Author: brandon.williams): [Here|https://app.circleci.com/pipelines/github/driftx/cassandra/1269/workflows/d3c9e9e5-cb6b-43fd-84cd-0487d421f7a3/jobs/51409] is another run that confirms it wasn't just luck. > Test failure: > transient_replication_ring_test.py::TestTransientReplicationRing::test_move_forwards_between_and_cleanup > ---------------------------------------------------------------------------------------------------------------------- > > Key: CASSANDRA-18792 > URL: https://issues.apache.org/jira/browse/CASSANDRA-18792 > Project: Cassandra > Issue Type: Bug > Components: Test/dtest/python > Reporter: Andres de la Peña > Assignee: Brandon Williams > Priority: Normal > Fix For: 5.0.x, 5.x > > > The Python dtest {{Test failure: > transient_replication_ring_test.py::TestTransientReplicationRing::test_move_forwards_between_and_cleanup}} > seems to be flaky at least in {{trunk}}: > * > https://app.circleci.com/pipelines/github/instaclustr/cassandra/2993/workflows/80ac4db3-fc3d-4908-bc39-dfff6ab88871/jobs/105464/tests > * > https://app.circleci.com/pipelines/github/adelapena/cassandra/3128/workflows/b0cf2754-81fd-491e-bac4-cc7fe8b0ac1b/jobs/70390/tests > {code} > ccmlib.node.ToolError: Subprocess ['nodetool', '-h', 'localhost', '-p', > '7200', 'cleanup'] exited with non-zero status; exit status: 2; > stderr: error: Node is involved in cluster membership changes. Not safe to > run cleanup. > -- StackTrace -- > java.lang.RuntimeException: Node is involved in cluster membership changes. > Not safe to run cleanup. > at > org.apache.cassandra.service.StorageService.forceKeyspaceCleanup(StorageService.java:4037) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:568) > at sun.reflect.misc.Trampoline.invoke(MethodUtil.java:72) > at jdk.internal.reflect.GeneratedMethodAccessor1.invoke(Unknown Source) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:568) > at java.base/sun.reflect.misc.MethodUtil.invoke(MethodUtil.java:262) > at > java.management/com.sun.jmx.mbeanserver.StandardMBeanIntrospector.invokeM2(StandardMBeanIntrospector.java:112) > at > java.management/com.sun.jmx.mbeanserver.StandardMBeanIntrospector.invokeM2(StandardMBeanIntrospector.java:46) > at > java.management/com.sun.jmx.mbeanserver.MBeanIntrospector.invokeM(MBeanIntrospector.java:237) > at > java.management/com.sun.jmx.mbeanserver.PerInterface.invoke(PerInterface.java:138) > at > java.management/com.sun.jmx.mbeanserver.MBeanSupport.invoke(MBeanSupport.java:252) > at > java.management/com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.invoke(DefaultMBeanServerInterceptor.java:814) > at > java.management/com.sun.jmx.mbeanserver.JmxMBeanServer.invoke(JmxMBeanServer.java:802) > at > java.management.rmi/javax.management.remote.rmi.RMIConnectionImpl.doOperation(RMIConnectionImpl.java:1472) > at > java.management.rmi/javax.management.remote.rmi.RMIConnectionImpl$PrivilegedOperation.run(RMIConnectionImpl.java:1310) > at > java.management.rmi/javax.management.remote.rmi.RMIConnectionImpl.doPrivilegedOperation(RMIConnectionImpl.java:1405) > at > java.management.rmi/javax.management.remote.rmi.RMIConnectionImpl.invoke(RMIConnectionImpl.java:829) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:568) > at > java.rmi/sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:360) > at java.rmi/sun.rmi.transport.Transport$1.run(Transport.java:200) > at java.rmi/sun.rmi.transport.Transport$1.run(Transport.java:197) > at > java.base/java.security.AccessController.doPrivileged(AccessController.java:712) > at java.rmi/sun.rmi.transport.Transport.serviceCall(Transport.java:196) > at > java.rmi/sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:587) > at > java.rmi/sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:828) > at > java.rmi/sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(TCPTransport.java:705) > at > java.base/java.security.AccessController.doPrivileged(AccessController.java:399) > at > java.rmi/sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:704) > at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) > at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) > at java.base/java.lang.Thread.run(Thread.java:833) > self = <transient_replication_ring_test.TestTransientReplicationRing object > at 0x7f6d38295710> > @flaky(max_runs=1) > @pytest.mark.no_vnodes > def test_move_forwards_between_and_cleanup(self): > """Test moving a node forwards past a neighbor token""" > move_token = '00025' > expected_after_move = [gen_expected(range(0, 26), range(31, 40, 2)), > gen_expected(range(0, 21, 2), range(31, 40)), > gen_expected(range(1, 11, 2), range(11, 21, > 2), range(21, 31)), > gen_expected(range(21, 26, 2), range(26, 40))] > expected_after_repair = [gen_expected(range(0, 26)), > gen_expected(range(0, 21), range(31, 40)), > gen_expected(range(21, 31),), > gen_expected(range(26, 40))] > > self.move_test(move_token, expected_after_move, expected_after_repair) > transient_replication_ring_test.py:291: > _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > _ > transient_replication_ring_test.py:268: in move_test > cleanup_nodes(nodes) > transient_replication_ring_test.py:43: in cleanup_nodes > node.nodetool('cleanup') > ../env3.6/lib/python3.6/site-packages/ccmlib/node.py:1018: in nodetool > return handle_external_tool_process(p, ['nodetool', '-h', 'localhost', > '-p', str(self.jmx_port)] + shlex.split(cmd)) > _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > _ > process = <subprocess.Popen object at 0x7f6d376055c0> > cmd_args = ['nodetool', '-h', 'localhost', '-p', '7200', 'cleanup'] > def handle_external_tool_process(process, cmd_args): > out, err = process.communicate() > if (out is not None) and isinstance(out, bytes): > out = out.decode() > if (err is not None) and isinstance(err, bytes): > err = err.decode() > rc = process.returncode > > if rc != 0: > > raise ToolError(cmd_args, rc, out, err) > E ccmlib.node.ToolError: Subprocess ['nodetool', '-h', 'localhost', > '-p', '7200', 'cleanup'] exited with non-zero status; exit status: 2; > E stderr: error: Node is involved in cluster membership changes. > Not safe to run cleanup. > E -- StackTrace -- > E java.lang.RuntimeException: Node is involved in cluster > membership changes. Not safe to run cleanup. > E at > org.apache.cassandra.service.StorageService.forceKeyspaceCleanup(StorageService.java:4037) > E at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > E at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) > E at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > E at java.base/java.lang.reflect.Method.invoke(Method.java:568) > E at sun.reflect.misc.Trampoline.invoke(MethodUtil.java:72) > E at jdk.internal.reflect.GeneratedMethodAccessor1.invoke(Unknown > Source) > E at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > E at java.base/java.lang.reflect.Method.invoke(Method.java:568) > E at > java.base/sun.reflect.misc.MethodUtil.invoke(MethodUtil.java:262) > E at > java.management/com.sun.jmx.mbeanserver.StandardMBeanIntrospector.invokeM2(StandardMBeanIntrospector.java:112) > E at > java.management/com.sun.jmx.mbeanserver.StandardMBeanIntrospector.invokeM2(StandardMBeanIntrospector.java:46) > E at > java.management/com.sun.jmx.mbeanserver.MBeanIntrospector.invokeM(MBeanIntrospector.java:237) > E at > java.management/com.sun.jmx.mbeanserver.PerInterface.invoke(PerInterface.java:138) > E at > java.management/com.sun.jmx.mbeanserver.MBeanSupport.invoke(MBeanSupport.java:252) > E at > java.management/com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.invoke(DefaultMBeanServerInterceptor.java:814) > E at > java.management/com.sun.jmx.mbeanserver.JmxMBeanServer.invoke(JmxMBeanServer.java:802) > E at > java.management.rmi/javax.management.remote.rmi.RMIConnectionImpl.doOperation(RMIConnectionImpl.java:1472) > E at > java.management.rmi/javax.management.remote.rmi.RMIConnectionImpl$PrivilegedOperation.run(RMIConnectionImpl.java:1310) > E at > java.management.rmi/javax.management.remote.rmi.RMIConnectionImpl.doPrivilegedOperation(RMIConnectionImpl.java:1405) > E at > java.management.rmi/javax.management.remote.rmi.RMIConnectionImpl.invoke(RMIConnectionImpl.java:829) > E at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > E at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) > E at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > E at java.base/java.lang.reflect.Method.invoke(Method.java:568) > E at > java.rmi/sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:360) > E at > java.rmi/sun.rmi.transport.Transport$1.run(Transport.java:200) > E at > java.rmi/sun.rmi.transport.Transport$1.run(Transport.java:197) > E at > java.base/java.security.AccessController.doPrivileged(AccessController.java:712) > E at > java.rmi/sun.rmi.transport.Transport.serviceCall(Transport.java:196) > E at > java.rmi/sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:587) > E at > java.rmi/sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:828) > E at > java.rmi/sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(TCPTransport.java:705) > E at > java.base/java.security.AccessController.doPrivileged(AccessController.java:399) > E at > java.rmi/sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:704) > E at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) > E at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) > E at java.base/java.lang.Thread.run(Thread.java:833) > ../env3.6/lib/python3.6/site-packages/ccmlib/node.py:2318: ToolError > {code} > This hasn't been seen yet on Butler but on ad-hoc PRs. It can also be > reproduced with the multiplexer: > {code} > .circleci/generate.sh -p \ > -e REPEATED_DTESTS_COUNT=500 \ > -e > REPEATED_DTESTS=transient_replication_ring_test.py::TestTransientReplicationRing::test_move_forwards_between_and_cleanup > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org