[ https://issues.apache.org/jira/browse/CASSANDRA-14831?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
David Capwell reassigned CASSANDRA-14831: ----------------------------------------- Assignee: (was: David Capwell) > Nodetool repair hangs with java.net.SocketException: End-of-stream reached > -------------------------------------------------------------------------- > > Key: CASSANDRA-14831 > URL: https://issues.apache.org/jira/browse/CASSANDRA-14831 > Project: Cassandra > Issue Type: Bug > Components: Consistency/Repair > Reporter: Tania S Engel > Priority: Normal > Fix For: 3.11.x > > Attachments: Cassandra - 14831 Logs.mht > > > Using Cassandra 3.11.1. > Ran >nodetool repair <keyspacename> on a small 3 node cluster from node > 3eef. Node 9160 and 3f5e experienced a stream failure. > *NODE 9160:* > ERROR [STREAM-IN-/fd70:616e:6761:6561:ae1f:6bff:fe12:3f5e:7000] 2018-10-16 > 01:45:00,400 StreamSession.java:593 - [Stream > #103fe070-d0e5-11e8-a993-5929a1c131b4] Streaming error occurred on session > with peer fd70:616e:6761:6561:ae1f:6bff:fe12:3f5e > *java.net.SocketException: End-of-stream reached* > at > org.apache.cassandra.streaming.messages.StreamMessage.deserialize(StreamMessage.java:71) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.streaming.ConnectionHandler$IncomingMessageHandler.run(ConnectionHandler.java:311) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at java.lang.Thread.run(Thread.java:748) [na:1.8.0_152] > > *NODE 3f5e:* > ERROR [STREAM-IN-/fd70:616e:6761:6561:ec4:7aff:fece:9160:59676] 2018-10-16 > 01:45:09,474 StreamSession.java:593 - [Stream > #103ef610-d0e5-11e8-a993-5929a1c131b4] Streaming error occurred on session > with peer fd70:616e:6761:6561:ec4:7aff:fece:9160 > java.io.IOException: An existing connection was forcibly closed by the remote > host > at sun.nio.ch.SocketDispatcher.read0(Native Method) ~[na:1.8.0_152] > at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:43) ~[na:1.8.0_152] > at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:223) ~[na:1.8.0_152] > at sun.nio.ch.IOUtil.read(IOUtil.java:197) ~[na:1.8.0_152] > at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:380) > ~[na:1.8.0_152] > at sun.nio.ch.SocketAdaptor$SocketInputStream.read(SocketAdaptor.java:206) > ~[na:1.8.0_152] > at sun.nio.ch.ChannelInputStream.read(ChannelInputStream.java:103) > ~[na:1.8.0_152] > at java.nio.channels.Channels$ReadableByteChannelImpl.read(Channels.java:385) > ~[na:1.8.0_152] > at > org.apache.cassandra.streaming.messages.StreamMessage.deserialize(StreamMessage.java:56) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.streaming.ConnectionHandler$IncomingMessageHandler.run(ConnectionHandler.java:311) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at java.lang.Thread.run(Thread.java:748) [na:1.8.0_152] > > *NODE 3EEF:* > ERROR [RepairJobTask:14] 2018-10-16 01:45:00,457 RepairSession.java:281 - > [repair #f2ab3eb0-d0e4-11e8-9926-bf64f35712c1] Session completed with the > following error > org.apache.cassandra.exceptions.RepairException: [repair > #f2ab3eb0-d0e4-11e8-9926-bf64f35712c1 on logs/{color:#333333}XXXXXX{color}, > [(-8271925838625565988,-8266397600493941101], > (2290821710735817606,2299380749828706426] > …(-8701313305140908434,-8686533141993948378]]] Sync failed between > /fd70:616e:6761:6561:ec4:7aff:fece:9160 and > /fd70:616e:6761:6561:ae1f:6bff:fe12:3f5e > at > org.apache.cassandra.repair.RemoteSyncTask.syncComplete(RemoteSyncTask.java:67) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.repair.RepairSession.syncComplete(RepairSession.java:202) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.service.ActiveRepairService.handleMessage(ActiveRepairService.java:495) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.repair.RepairMessageVerbHandler.doVerb(RepairMessageVerbHandler.java:162) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.net.MessageDeliveryTask.run(MessageDeliveryTask.java:66) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > ~[na:1.8.0_152] > at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[na:1.8.0_152] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [na:1.8.0_152] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [na:1.8.0_152] > at > org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:81) > [apache-cassandra-3.11.1.jar:3.11.1] > at java.lang.Thread.run(Thread.java:748) ~[na:1.8.0_152] > > ERROR [RepairJobTask:14] 2018-10-16 01:45:00,459 RepairRunnable.java:276 - > Repair session f2ab3eb0-d0e4-11e8-9926-bf64f35712c1 for range > [(-8271925838625565988,-8266397600493941101],…(-6146831664074703724,-6117107236121156255], > (4842256698807887573,4848113042863615717], > (-8701313305140908434,-8686533141993948378]] failed with error [repair > #f2ab3eb0-d0e4-11e8-9926-bf64f35712c1 on > logs/auditsearchlog,…(-8701313305140908434,-8686533141993948378]]] Sync > failed between /fd70:616e:6761:6561:ec4:7aff:fece:9160 and > /fd70:616e:6761:6561:ae1f:6bff:fe12:3f5e > org.apache.cassandra.exceptions.RepairException: [repair > #f2ab3eb0-d0e4-11e8-9926-bf64f35712c1 on logs/auditsearchlog, > [(-8271925838625565988,-8266397600493941101], > …(4842256698807887573,4848113042863615717], > (-8701313305140908434,-8686533141993948378]]] Sync failed between > /fd70:616e:6761:6561:ec4:7aff:fece:9160 and > /fd70:616e:6761:6561:ae1f:6bff:fe12:3f5e > at > org.apache.cassandra.repair.RemoteSyncTask.syncComplete(RemoteSyncTask.java:67) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.repair.RepairSession.syncComplete(RepairSession.java:202) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.service.ActiveRepairService.handleMessage(ActiveRepairService.java:495) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.repair.RepairMessageVerbHandler.doVerb(RepairMessageVerbHandler.java:162) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.net.MessageDeliveryTask.run(MessageDeliveryTask.java:66) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > ~[na:1.8.0_152] > at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[na:1.8.0_152] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [na:1.8.0_152] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [na:1.8.0_152] > at > org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:81) > [apache-cassandra-3.11.1.jar:3.11.1] > at java.lang.Thread.run(Thread.java:748) ~[na:1.8.0_152] > > *NODETOOL OUTPUT: shows the failure but then never returns.* > > [2018-10-16 01:43:57,310] Starting repair command #8 > (f26bc4b0-d0e4-11e8-9926-bf64f35712c1), repairing keyspace logs with repair > options (parallelism: parallel, primary range: false, incremental: true, job > threads: 1, ColumnFamilies: [], dataCenters: [], hosts: [], # of ranges: 768, > pull repair: false) > [2018-10-16 01:45:00,462] Repair session f2ab3eb0-d0e4-11e8-9926-bf64f35712c1 > for range … > (4842256698807887573,4848113042863615717], > (-8701313305140908434,-8686533141993948378]] failed with error [repair > #f2ab3eb0-d0e4-11e8-9926-bf64f35712c1 on logs/XXXXXX, > [(-8271925838625565988,-8266397600493941101], > (2290821710735817606,2299380749828706426], … > (4842256698807887573,4848113042863615717], > (-8701313305140908434,-8686533141993948378]]] Sync failed between > /fd70:616e:6761:6561:ec4:7aff:fece:9160 and > /fd70:616e:6761:6561:ae1f:6bff:fe12:3f5e (progress: 0%) > > The streaming does continue between the 3 nodes. See the attached partial > logs from all 3 nodes. Then it stops. We never see that the repair command > finished. Then about 15 hours later, we run >nodetool repair logs again. It > fails. This time the error indicates there is an active repair session. The > only thing that seemed to get us out of this state was reboot of all the > nodes. > > *ERROR [ValidationExecutor:27] 2018-10-16 17:14:39,241 > ActiveRepairService.java:558 - Cannot start multiple repair sessions over the > same sstables* > ERROR [ValidationExecutor:27] 2018-10-16 17:14:39,241 Validator.java:268 - > Failed creating a merkle tree for [repair > #da436780-d166-11e8-9926-bf64f35712c1 on logs/YYYYY, > [(-8271925838625565988,-8266397600493941101], ... > /fd70:616e:6761:6561:ae1f:6bff:fe12:3ee4 (see log for details) > ERROR [ValidationExecutor:27] 2018-10-16 17:14:39,244 > CassandraDaemon.java:228 - Exception in thread > Thread[ValidationExecutor:27,1,main] > java.lang.RuntimeException: Cannot start multiple repair sessions over the > same sstables > at > org.apache.cassandra.service.ActiveRepairService$ParentRepairSession.markSSTablesRepairing(ActiveRepairService.java:559) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.db.compaction.CompactionManager.getSSTablesToValidate(CompactionManager.java:1446) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.db.compaction.CompactionManager.doValidationCompaction(CompactionManager.java:1348) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.db.compaction.CompactionManager.access$700(CompactionManager.java:86) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at > org.apache.cassandra.db.compaction.CompactionManager$13.call(CompactionManager.java:942) > ~[apache-cassandra-3.11.1.jar:3.11.1] > at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[na:1.8.0_152] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > ~[na:1.8.0_152] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [na:1.8.0_152] > at > org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:81) > [apache-cassandra-3.11.1.jar:3.11.1] > at java.lang.Thread.run(Thread.java:748) ~[na:1.8.0_152] -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org