[ https://issues.apache.org/jira/browse/IGNITE-22319?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Nikita Amelchev updated IGNITE-22319: ------------------------------------- Fix Version/s: 2.17 > Node crashes if a snapshot restore cancelled due to network issues > ------------------------------------------------------------------- > > Key: IGNITE-22319 > URL: https://issues.apache.org/jira/browse/IGNITE-22319 > Project: Ignite > Issue Type: Bug > Reporter: Nikita Amelchev > Assignee: Nikita Amelchev > Priority: Major > Fix For: 2.17 > > > Node crashes if a snapshot restore (not same topology) cancelled due to > network issues. > There are several possible reasons: > 1. Assertion error (node left or socket timeout): > {noformat} > [2024-05-21T18:39:22,212][ERROR][disco-event-worker-#1479%snapshot.IgniteSnapshotRestoreFromRemoteTest2%][GridEventStorageManager] > Unexpected exception in listener notification for event: DiscoveryEvent > [evtNode=TcpDiscoveryNode [id=1b573ddc-b0eb-4909-978b-9d418c100000, > consistentId=snapshot.IgniteSnapshotRestoreFromRemoteTest0, addrs=ArrayList > [127.0.0.1], sockAddrs=HashSet [/127.0.0.1:47500], discPort=47500, order=1, > intOrder=1, loc=false, ver=2.17.0#20240512-sha1:27cef45b, isClient=false], > topVer=4, msgTemplate=null, > span=o.a.i.i.processors.tracing.NoopSpan@48b53ef5, nodeId8=9992b67a, msg=Node > left, type=NODE_LEFT, tstamp=1716305962203] > java.lang.AssertionError: null > at > org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager.onException(IgniteSnapshotManager.java:4022) > ~[classes/:?] > at > org.apache.ignite.internal.managers.communication.GridIoManager.interruptReceiver(GridIoManager.java:2799) > ~[classes/:?] > at > org.apache.ignite.internal.managers.communication.GridIoManager$6.onEvent(GridIoManager.java:972) > ~[classes/:?] > at > org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager$LocalListenerWrapper.onEvent(GridEventStorageManager.java:1403) > ~[classes/:?] > at > org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.notifyListeners(GridEventStorageManager.java:898) > [classes/:?] > at > org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.notifyListeners(GridEventStorageManager.java:883) > [classes/:?] > at > org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.record0(GridEventStorageManager.java:354) > [classes/:?] > at > org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.record(GridEventStorageManager.java:317) > [classes/:?] > at > org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$DiscoveryWorker.recordEvent(GridDiscoveryManager.java:3036) > [classes/:?] > at > org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$DiscoveryWorker.body0(GridDiscoveryManager.java:3223) > [classes/:?] > at > org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$DiscoveryWorker.body(GridDiscoveryManager.java:3056) > [classes/:?] > at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:125) > [classes/:?] > at java.base/java.lang.Thread.run(Thread.java:829) [?:?] > {noformat} > 2. Deadlock of system threads. > {noformat} > ##### DEADLOCKED Thread > [name="pub-#956%snapshot.IgniteSnapshotRestoreFromRemoteTest2%", id=1056, > state=BLOCKED, blockCnt=10, waitCnt=0] > Lock > [object=o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier@4a114695, > > ownerName=disco-event-worker-#853%snapshot.IgniteSnapshotRestoreFromRemoteTest2%, > ownerId=952] > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.init(IgniteSnapshotManager.java:3698) > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager.submit(IgniteSnapshotManager.java:3836) > - locked > o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager@1da11e24 > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager.scheduleNext(IgniteSnapshotManager.java:3849) > - locked > o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager@1da11e24 > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager$$Lambda$1787/0x00000008009c1c40.run(Unknown > Source) > at > app//o.a.i.i.util.future.GridFutureAdapter.lambda$listen$8a14a590$1(GridFutureAdapter.java:363) > at > app//o.a.i.i.util.future.GridFutureAdapter$$Lambda$1215/0x00000008007ba040.apply(Unknown > Source) > at > app//o.a.i.i.util.future.GridFutureAdapter.notifyListener(GridFutureAdapter.java:474) > at > app//o.a.i.i.util.future.GridFutureAdapter.unblock(GridFutureAdapter.java:350) > at > app//o.a.i.i.util.future.GridFutureAdapter.unblockAll(GridFutureAdapter.java:338) > at > app//o.a.i.i.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:586) > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.onDone(IgniteSnapshotManager.java:3773) > - locked > o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier@21a97efb > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.onDone(IgniteSnapshotManager.java:3642) > at > app//o.a.i.i.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:565) > at > app//o.a.i.i.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:553) > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.acceptException(IgniteSnapshotManager.java:3740) > - locked > o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier@21a97efb > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.acceptFile(IgniteSnapshotManager.java:3754) > - locked > o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier@21a97efb > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager$1.accept(IgniteSnapshotManager.java:4111) > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager$1.accept(IgniteSnapshotManager.java:4098) > at > app//o.a.i.i.managers.communication.FileReceiver.receive(FileReceiver.java:95) > at > app//o.a.i.i.managers.communication.GridIoManager.receiveFromChannel(GridIoManager.java:2948) > at > app//o.a.i.i.managers.communication.GridIoManager.processOpenedChannel(GridIoManager.java:2877) > at > app//o.a.i.i.managers.communication.GridIoManager$7.run(GridIoManager.java:1231) > at > java.base@11.0.23/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > at > java.base@11.0.23/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > at java.base@11.0.23/java.lang.Thread.run(Thread.java:829) > ##### DEADLOCKED Thread > [name="disco-event-worker-#853%snapshot.IgniteSnapshotRestoreFromRemoteTest2%", > id=952, state=BLOCKED, blockCnt=1, waitCnt=74] > Lock > [object=o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager@1da11e24, > ownerName=pub-#956%snapshot.IgniteSnapshotRestoreFromRemoteTest2%, > ownerId=1056] > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager.scheduleNext(IgniteSnapshotManager.java:3844) > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager$$Lambda$1787/0x00000008009c1c40.run(Unknown > Source) > at > app//o.a.i.i.util.future.GridFutureAdapter.lambda$listen$8a14a590$1(GridFutureAdapter.java:363) > at > app//o.a.i.i.util.future.GridFutureAdapter$$Lambda$1215/0x00000008007ba040.apply(Unknown > Source) > at > app//o.a.i.i.util.future.GridFutureAdapter.notifyListener(GridFutureAdapter.java:474) > at > app//o.a.i.i.util.future.GridFutureAdapter.unblock(GridFutureAdapter.java:350) > at > app//o.a.i.i.util.future.GridFutureAdapter.unblockAll(GridFutureAdapter.java:338) > at > app//o.a.i.i.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:586) > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.onDone(IgniteSnapshotManager.java:3773) > - locked > o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier@4a114695 > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.onDone(IgniteSnapshotManager.java:3642) > at > app//o.a.i.i.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:565) > at > app//o.a.i.i.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:553) > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.acceptException(IgniteSnapshotManager.java:3740) > - locked > o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier@4a114695 > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager.lambda$onNodeLeft$0(IgniteSnapshotManager.java:3888) > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager$$Lambda$1658/0x0000000800926840.accept(Unknown > Source) > at java.base@11.0.23/java.lang.Iterable.forEach(Iterable.java:75) > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager.onNodeLeft(IgniteSnapshotManager.java:3886) > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager.lambda$start0$20(IgniteSnapshotManager.java:668) > at > app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$$Lambda$547/0x00000008004edc40.onEvent(Unknown > Source) > at > app//o.a.i.i.managers.eventstorage.GridEventStorageManager$DiscoveryListenerWrapper.onEvent(GridEventStorageManager.java:1453) > at > app//o.a.i.i.managers.eventstorage.GridEventStorageManager.notifyListeners(GridEventStorageManager.java:898) > at > app//o.a.i.i.managers.eventstorage.GridEventStorageManager.notifyListeners(GridEventStorageManager.java:883) > at > app//o.a.i.i.managers.eventstorage.GridEventStorageManager.record0(GridEventStorageManager.java:354) > at > app//o.a.i.i.managers.eventstorage.GridEventStorageManager.record(GridEventStorageManager.java:317) > at > app//o.a.i.i.managers.discovery.GridDiscoveryManager$DiscoveryWorker.recordEvent(GridDiscoveryManager.java:3036) > at > app//o.a.i.i.managers.discovery.GridDiscoveryManager$DiscoveryWorker.body0(GridDiscoveryManager.java:3223) > at > app//o.a.i.i.managers.discovery.GridDiscoveryManager$DiscoveryWorker.body(GridDiscoveryManager.java:3056) > at app//o.a.i.i.util.worker.GridWorker.run(GridWorker.java:125) > at java.base@11.0.23/java.lang.Thread.run(Thread.java:829) > {noformat} -- This message was sent by Atlassian Jira (v8.20.10#820010)