[ https://issues.apache.org/jira/browse/CASSANDRA-19085?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17814613#comment-17814613 ]
Berenguer Blasi commented on CASSANDRA-19085: --------------------------------------------- Thx for the review [~brandon.williams]. The SCM setting is only for CI to fully exercise with that setting. Only the gossiper fix is needed. Given that jenkins is back I'll just merge this to prevent any failures arising from it and duplicating efforts. > In-jvm dtest RepairTest fails with storage_compatibility_mode: NONE > ------------------------------------------------------------------- > > Key: CASSANDRA-19085 > URL: https://issues.apache.org/jira/browse/CASSANDRA-19085 > Project: Cassandra > Issue Type: Bug > Components: Consistency/Repair > Reporter: Branimir Lambov > Assignee: Berenguer Blasi > Priority: Normal > Fix For: 5.0-rc, 5.x > > > More precisely, when the {{MessagingService}} version to {{{}VERSION_50{}}}, > the test fails with an exception that appears to be a genuine problem: > {code:java} > junit.framework.AssertionFailedError: Exception found expected null, but > was:<java.lang.RuntimeException: Did not get replies from all endpoints. > at > org.apache.cassandra.service.ActiveRepairService.lambda$prepareForRepair$2(ActiveRepairService.java:678) > at > org.apache.cassandra.concurrent.ExecutionFailure$1.run(ExecutionFailure.java:133) > at > java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:539) > at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) > at > java.base/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304) > at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) > at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) > at > io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) > at java.base/java.lang.Thread.run(Thread.java:833) > > > at > org.apache.cassandra.distributed.test.DistributedRepairUtils.lambda$assertParentRepairSuccess$4(DistributedRepairUtils.java:129) > at > org.apache.cassandra.distributed.test.DistributedRepairUtils.validateExistingParentRepair(DistributedRepairUtils.java:164) > at > org.apache.cassandra.distributed.test.DistributedRepairUtils.assertParentRepairSuccess(DistributedRepairUtils.java:124) > at > org.apache.cassandra.distributed.test.RepairTest.testForcedNormalRepairWithOneNodeDown(RepairTest.java:211) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > org.apache.cassandra.distributed.shared.ShutdownException: Uncaught > exceptions were thrown during test > at > org.apache.cassandra.distributed.impl.AbstractCluster.checkAndResetUncaughtExceptions(AbstractCluster.java:1117) > at > org.apache.cassandra.distributed.impl.AbstractCluster.close(AbstractCluster.java:1103) > at > org.apache.cassandra.distributed.test.RepairTest.closeCluster(RepairTest.java:160) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > Suppressed: java.lang.IllegalStateException: complete already: > (failure: java.lang.RuntimeException: Did not get replies from all endpoints.) > at > org.apache.cassandra.utils.concurrent.AsyncPromise.setSuccess(AsyncPromise.java:106) > at > org.apache.cassandra.service.ActiveRepairService$2.ack(ActiveRepairService.java:721) > at > org.apache.cassandra.service.ActiveRepairService$2.onResponse(ActiveRepairService.java:697) > at > org.apache.cassandra.repair.messages.RepairMessage$2.onResponse(RepairMessage.java:187) > at > org.apache.cassandra.net.ResponseVerbHandler.doVerb(ResponseVerbHandler.java:58) > at > org.apache.cassandra.net.InboundSink.lambda$new$0(InboundSink.java:78) > at > org.apache.cassandra.net.InboundSink$Filtered.accept(InboundSink.java:64) > at > org.apache.cassandra.net.InboundSink$Filtered.accept(InboundSink.java:50) > at > org.apache.cassandra.net.InboundSink.accept(InboundSink.java:97) > at > org.apache.cassandra.net.InboundSink.accept(InboundSink.java:45) > at > org.apache.cassandra.net.InboundMessageHandler$ProcessMessage.run(InboundMessageHandler.java:430) > at > org.apache.cassandra.concurrent.ExecutionFailure$1.run(ExecutionFailure.java:133) > at > org.apache.cassandra.concurrent.SEPWorker.run(SEPWorker.java:143) > at > io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) > at java.base/java.lang.Thread.run(Thread.java:833){code} > The updates to {{pending}} in ActiveRepairService are not concurrency-safe, > but fixing them by doing e.g. > {code:java} > Index: src/java/org/apache/cassandra/service/ActiveRepairService.java > IDEA additional info: > Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP > <+>UTF-8 > =================================================================== > diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java > b/src/java/org/apache/cassandra/service/ActiveRepairService.java > --- a/src/java/org/apache/cassandra/service/ActiveRepairService.java > (revision 04552046f74f596e69e2d98c3f3e522fb5888c99) > +++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java (date > 1700839874092) > @@ -675,7 +675,7 @@ > if (promise.isDone()) > return; > String errorMsg = "Did not get replies from all endpoints."; > - if (promise.tryFailure(new RuntimeException(errorMsg))) > + if (pending.getAndSet(-1) > 0 && promise.tryFailure(new > RuntimeException(errorMsg))) > participateFailed(parentRepairSession, errorMsg); > }, timeoutMillis, MILLISECONDS); > > @@ -703,8 +703,8 @@ > failedNodes.add(from.toString()); > if (failureReason == RequestFailureReason.TIMEOUT) > { > - pending.set(-1); > - > promise.setFailure(failRepairException(parentRepairSession, "Did not get > replies from all endpoints.")); > + if (pending.getAndSet(-1) > 0) > + > promise.setFailure(failRepairException(parentRepairSession, "Did not get > replies from all endpoints.")); > } > else > { > {code} > still results in a test failure: > {code:java} > java.lang.AssertionError: Exception found expected null, but > was:<java.lang.RuntimeException: Did not get replies from all endpoints. at > org.apache.cassandra.service.ActiveRepairService.lambda$prepareForRepair$2(ActiveRepairService.java:678) > at > org.apache.cassandra.concurrent.ExecutionFailure$1.run(ExecutionFailure.java:133) > at > java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) > at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) > at > java.base/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304) > at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > at > io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) > at java.base/java.lang.Thread.run(Thread.java:829)> > at org.junit.Assert.fail(Assert.java:88) at > org.junit.Assert.failNotNull(Assert.java:755) at > org.junit.Assert.assertNull(Assert.java:737) at > org.apache.cassandra.distributed.test.DistributedRepairUtils.lambda$assertParentRepairSuccess$4(DistributedRepairUtils.java:129) > at > org.apache.cassandra.distributed.test.DistributedRepairUtils.validateExistingParentRepair(DistributedRepairUtils.java:164) > at > org.apache.cassandra.distributed.test.DistributedRepairUtils.assertParentRepairSuccess(DistributedRepairUtils.java:124) > at > org.apache.cassandra.distributed.test.RepairTest.testForcedNormalRepairWithOneNodeDown(RepairTest.java:211) > at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native > Method) at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:566) at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325) at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290) at > org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71) at > org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288) at > org.junit.runners.ParentRunner.access$000(ParentRunner.java:58) at > org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268) at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at > org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) > at org.junit.runners.ParentRunner.run(ParentRunner.java:363) at > org.junit.runner.JUnitCore.run(JUnitCore.java:137) at > com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:69) > at > com.intellij.rt.junit.IdeaTestRunner$Repeater$1.execute(IdeaTestRunner.java:38) > at > com.intellij.rt.execution.junit.TestsRepeater.repeat(TestsRepeater.java:11) > at > com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:35) > at > com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:232) > at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:55) > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org