[ 
https://issues.apache.org/jira/browse/IGNITE-22319?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Nikita Amelchev updated IGNITE-22319:
-------------------------------------
    Fix Version/s: 2.17

> Node crashes if a snapshot restore cancelled due to network issues 
> -------------------------------------------------------------------
>
>                 Key: IGNITE-22319
>                 URL: https://issues.apache.org/jira/browse/IGNITE-22319
>             Project: Ignite
>          Issue Type: Bug
>            Reporter: Nikita Amelchev
>            Assignee: Nikita Amelchev
>            Priority: Major
>             Fix For: 2.17
>
>
> Node crashes if a snapshot restore (not same topology) cancelled due to 
> network issues.
> There are several possible reasons:
> 1. Assertion error (node left or socket timeout):
> {noformat}
> [2024-05-21T18:39:22,212][ERROR][disco-event-worker-#1479%snapshot.IgniteSnapshotRestoreFromRemoteTest2%][GridEventStorageManager]
>  Unexpected exception in listener notification for event: DiscoveryEvent 
> [evtNode=TcpDiscoveryNode [id=1b573ddc-b0eb-4909-978b-9d418c100000, 
> consistentId=snapshot.IgniteSnapshotRestoreFromRemoteTest0, addrs=ArrayList 
> [127.0.0.1], sockAddrs=HashSet [/127.0.0.1:47500], discPort=47500, order=1, 
> intOrder=1, loc=false, ver=2.17.0#20240512-sha1:27cef45b, isClient=false], 
> topVer=4, msgTemplate=null, 
> span=o.a.i.i.processors.tracing.NoopSpan@48b53ef5, nodeId8=9992b67a, msg=Node 
> left, type=NODE_LEFT, tstamp=1716305962203]
> java.lang.AssertionError: null
> at 
> org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager.onException(IgniteSnapshotManager.java:4022)
>  ~[classes/:?]
> at 
> org.apache.ignite.internal.managers.communication.GridIoManager.interruptReceiver(GridIoManager.java:2799)
>  ~[classes/:?]
> at 
> org.apache.ignite.internal.managers.communication.GridIoManager$6.onEvent(GridIoManager.java:972)
>  ~[classes/:?]
> at 
> org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager$LocalListenerWrapper.onEvent(GridEventStorageManager.java:1403)
>  ~[classes/:?]
> at 
> org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.notifyListeners(GridEventStorageManager.java:898)
>  [classes/:?]
> at 
> org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.notifyListeners(GridEventStorageManager.java:883)
>  [classes/:?]
> at 
> org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.record0(GridEventStorageManager.java:354)
>  [classes/:?]
> at 
> org.apache.ignite.internal.managers.eventstorage.GridEventStorageManager.record(GridEventStorageManager.java:317)
>  [classes/:?]
> at 
> org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$DiscoveryWorker.recordEvent(GridDiscoveryManager.java:3036)
>  [classes/:?]
> at 
> org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$DiscoveryWorker.body0(GridDiscoveryManager.java:3223)
>  [classes/:?]
> at 
> org.apache.ignite.internal.managers.discovery.GridDiscoveryManager$DiscoveryWorker.body(GridDiscoveryManager.java:3056)
>  [classes/:?]
> at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:125) 
> [classes/:?]
> at java.base/java.lang.Thread.run(Thread.java:829) [?:?]
> {noformat}
> 2. Deadlock of system threads.
> {noformat}
> ##### DEADLOCKED Thread 
> [name="pub-#956%snapshot.IgniteSnapshotRestoreFromRemoteTest2%", id=1056, 
> state=BLOCKED, blockCnt=10, waitCnt=0]
>     Lock 
> [object=o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier@4a114695,
>  
> ownerName=disco-event-worker-#853%snapshot.IgniteSnapshotRestoreFromRemoteTest2%,
>  ownerId=952]
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.init(IgniteSnapshotManager.java:3698)
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager.submit(IgniteSnapshotManager.java:3836)
>         - locked 
> o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager@1da11e24
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager.scheduleNext(IgniteSnapshotManager.java:3849)
>         - locked 
> o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager@1da11e24
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager$$Lambda$1787/0x00000008009c1c40.run(Unknown
>  Source)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.lambda$listen$8a14a590$1(GridFutureAdapter.java:363)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter$$Lambda$1215/0x00000008007ba040.apply(Unknown
>  Source)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.notifyListener(GridFutureAdapter.java:474)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.unblock(GridFutureAdapter.java:350)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.unblockAll(GridFutureAdapter.java:338)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:586)
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.onDone(IgniteSnapshotManager.java:3773)
>         - locked 
> o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier@21a97efb
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.onDone(IgniteSnapshotManager.java:3642)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:565)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:553)
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.acceptException(IgniteSnapshotManager.java:3740)
>         - locked 
> o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier@21a97efb
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.acceptFile(IgniteSnapshotManager.java:3754)
>         - locked 
> o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier@21a97efb
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager$1.accept(IgniteSnapshotManager.java:4111)
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager$1.accept(IgniteSnapshotManager.java:4098)
>         at 
> app//o.a.i.i.managers.communication.FileReceiver.receive(FileReceiver.java:95)
>         at 
> app//o.a.i.i.managers.communication.GridIoManager.receiveFromChannel(GridIoManager.java:2948)
>         at 
> app//o.a.i.i.managers.communication.GridIoManager.processOpenedChannel(GridIoManager.java:2877)
>         at 
> app//o.a.i.i.managers.communication.GridIoManager$7.run(GridIoManager.java:1231)
>         at 
> java.base@11.0.23/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
>         at 
> java.base@11.0.23/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
>         at java.base@11.0.23/java.lang.Thread.run(Thread.java:829)
> ##### DEADLOCKED Thread 
> [name="disco-event-worker-#853%snapshot.IgniteSnapshotRestoreFromRemoteTest2%",
>  id=952, state=BLOCKED, blockCnt=1, waitCnt=74]
>     Lock 
> [object=o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager@1da11e24,
>  ownerName=pub-#956%snapshot.IgniteSnapshotRestoreFromRemoteTest2%, 
> ownerId=1056]
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager.scheduleNext(IgniteSnapshotManager.java:3844)
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager$$Lambda$1787/0x00000008009c1c40.run(Unknown
>  Source)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.lambda$listen$8a14a590$1(GridFutureAdapter.java:363)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter$$Lambda$1215/0x00000008007ba040.apply(Unknown
>  Source)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.notifyListener(GridFutureAdapter.java:474)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.unblock(GridFutureAdapter.java:350)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.unblockAll(GridFutureAdapter.java:338)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:586)
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.onDone(IgniteSnapshotManager.java:3773)
>         - locked 
> o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier@4a114695
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.onDone(IgniteSnapshotManager.java:3642)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:565)
>         at 
> app//o.a.i.i.util.future.GridFutureAdapter.onDone(GridFutureAdapter.java:553)
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier.acceptException(IgniteSnapshotManager.java:3740)
>         - locked 
> o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$RemoteSnapshotFilesRecevier@4a114695
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager.lambda$onNodeLeft$0(IgniteSnapshotManager.java:3888)
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager$$Lambda$1658/0x0000000800926840.accept(Unknown
>  Source)
>         at java.base@11.0.23/java.lang.Iterable.forEach(Iterable.java:75)
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$SequentialRemoteSnapshotManager.onNodeLeft(IgniteSnapshotManager.java:3886)
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager.lambda$start0$20(IgniteSnapshotManager.java:668)
>         at 
> app//o.a.i.i.processors.cache.persistence.snapshot.IgniteSnapshotManager$$Lambda$547/0x00000008004edc40.onEvent(Unknown
>  Source)
>         at 
> app//o.a.i.i.managers.eventstorage.GridEventStorageManager$DiscoveryListenerWrapper.onEvent(GridEventStorageManager.java:1453)
>         at 
> app//o.a.i.i.managers.eventstorage.GridEventStorageManager.notifyListeners(GridEventStorageManager.java:898)
>         at 
> app//o.a.i.i.managers.eventstorage.GridEventStorageManager.notifyListeners(GridEventStorageManager.java:883)
>         at 
> app//o.a.i.i.managers.eventstorage.GridEventStorageManager.record0(GridEventStorageManager.java:354)
>         at 
> app//o.a.i.i.managers.eventstorage.GridEventStorageManager.record(GridEventStorageManager.java:317)
>         at 
> app//o.a.i.i.managers.discovery.GridDiscoveryManager$DiscoveryWorker.recordEvent(GridDiscoveryManager.java:3036)
>         at 
> app//o.a.i.i.managers.discovery.GridDiscoveryManager$DiscoveryWorker.body0(GridDiscoveryManager.java:3223)
>         at 
> app//o.a.i.i.managers.discovery.GridDiscoveryManager$DiscoveryWorker.body(GridDiscoveryManager.java:3056)
>         at app//o.a.i.i.util.worker.GridWorker.run(GridWorker.java:125)
>         at java.base@11.0.23/java.lang.Thread.run(Thread.java:829)
> {noformat}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to