[ 
https://issues.apache.org/jira/browse/IGNITE-10720?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16744790#comment-16744790
 ] 

Ivan Artukhov commented on IGNITE-10720:
----------------------------------------

Got the following assertion when I start a server node which was previously 
stopped (killed) during checkpoint:

{code:java}
[2019-01-16 15:57:48,646]‌‌[INFO ]‌‌[main] Starting binary memory restore for: 
[-1282194281, -1282194277, -1282194278, -1282194279, -1282194280, -1282194273, 
-1282194274, -623411239, -1282194275, -1282194276, -1282194303, -1282194304, 
-1282194249, -1282194250, -1282194245, -1282194246, -1282194247, -1282194248, 
-1282194241, -1282194242, -1282194243, -1282194244, -1282194272, -2100569601, 
-1282194217, -1282194218, -1282194219, -1282194213, -1282194214, -1282194215, 
-1282194216, -1282194210, -1282194211, -1282194212, -1093321804, -1282194185, 
-1282194186, -1282194187, -1282194188, -1282194181, -1282194182, -1282194183, 
-1282194184, -1282194433, -1309826943, -1282194434, -1282194179, -1282194435, 
-1282194436, -1282194180, 692518463, 692455107, -1282194405, -623474595, 
-1282194401, -1282194402, -1282194403, -1282194404, -1282194429, -1282194430, 
-1282194431, -1282194432, -1282194427, -1282194428, -2131765427, -1282194373, 
-1282194374, -1282194369, -1282194370, -1282194371, -1282194372, -1282194397, 
-1282194398, -1282194399, -1282194400, -1282194396, -1282194341, -1282194342, 
-1282194343, -1282194337, -1282194338, -1282194339, -1282194340, -1282194365, 
-1282194366, -1282194367, -1282194368, -1974223438, -1282194309, -1282194310, 
-1282194311, -1282194312, -1282194305, -1282194306, -1282194307, -1282194308, 
-1282194334, -1282194335, 374280892, -1941895502, -1282194336, 374280891, 
374280890, 374280889, 374280888, 374280887, 374280886, 374280885, 374280884]
[2019-01-16 15:57:52,885]‌‌[INFO ]‌‌[main] Read checkpoint status 
[startMarker=/storage/ssd/prtagent/poc/work/db/poc_tester_server_172_25_1_22_id_0/cp/1547643332579-8f0f1aca-36de-4cfd-a5ea-08daae99c80c-START.bin,
 
endMarker=/storage/ssd/prtagent/poc/work/db/poc_tester_server_172_25_1_22_id_0/cp/1547643312802-93a29247-8743-492c-951e-035a2126ddde-END.bin]
[2019-01-16 15:57:52,885]‌‌[INFO ]‌‌[main] Checking memory state 
[lastValidPos=FileWALPointer [idx=3302, fileOff=1319234, len=2055203], 
lastMarked=FileWALPointer [idx=3338, fileOff=59, len=2055203], 
lastCheckpointId=8f0f1aca-36de-4cfd-a5ea-08daae99c80c]
[2019-01-16 15:57:52,886]‌‌[WARN ]‌‌[main] Ignite node stopped in the middle of 
checkpoint. Will restore memory state and finish checkpoint on node start.
[2019-01-16 15:58:01,586]‌‌[ERROR]‌‌[main] Exception during start processors, 
node will be stopped and close connections
java.lang.AssertionError: 0002ffff00000000
        at 
org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.writeUnlockPage(PageMemoryImpl.java:1551)
 ~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.writeUnlock(PageMemoryImpl.java:470)
 ~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.performBinaryMemoryRestore(GridCacheDatabaseSharedManager.java:2228)
 ~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.restoreBinaryMemory(GridCacheDatabaseSharedManager.java:942)
 ~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.startMemoryRestore(GridCacheDatabaseSharedManager.java:1946)
 ~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.IgniteKernal.start(IgniteKernal.java:1054) 
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start0(IgnitionEx.java:2041)
 [ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start(IgnitionEx.java:1732)
 [ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at org.apache.ignite.internal.IgnitionEx.start0(IgnitionEx.java:1158) 
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:656) 
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at org.apache.ignite.IgniteSpring.start(IgniteSpring.java:66) 
[ignite-spring-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.scenario.internal.utils.IgniteNode.start(IgniteNode.java:242) 
[poc-tester-0.1.0-SNAPSHOT.jar:?]
        at 
org.apache.ignite.scenario.internal.utils.IgniteNode.main(IgniteNode.java:82) 
[poc-tester-0.1.0-SNAPSHOT.jar:?]
[2019-01-16 15:58:01,594]‌‌[ERROR]‌‌[main] Got exception while starting (will 
rollback startup routine).
java.lang.AssertionError: 0002ffff00000000
        at 
org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.writeUnlockPage(PageMemoryImpl.java:1551)
 ~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.writeUnlock(PageMemoryImpl.java:470)
 ~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.performBinaryMemoryRestore(GridCacheDatabaseSharedManager.java:2228)
 ~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.restoreBinaryMemory(GridCacheDatabaseSharedManager.java:942)
 ~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.startMemoryRestore(GridCacheDatabaseSharedManager.java:1946)
 ~[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.IgniteKernal.start(IgniteKernal.java:1054) 
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start0(IgnitionEx.java:2041)
 [ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start(IgnitionEx.java:1732)
 [ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at org.apache.ignite.internal.IgnitionEx.start0(IgnitionEx.java:1158) 
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:656) 
[ignite-core-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at org.apache.ignite.IgniteSpring.start(IgniteSpring.java:66) 
[ignite-spring-2.8.0-SNAPSHOT.jar:2.8.0-SNAPSHOT]
        at 
org.apache.ignite.scenario.internal.utils.IgniteNode.start(IgniteNode.java:242) 
[poc-tester-0.1.0-SNAPSHOT.jar:?]
        at 
org.apache.ignite.scenario.internal.utils.IgniteNode.main(IgniteNode.java:82) 
[poc-tester-0.1.0-SNAPSHOT.jar:?]
{code}


> Decrease time to save metadata during checkpoint
> ------------------------------------------------
>
>                 Key: IGNITE-10720
>                 URL: https://issues.apache.org/jira/browse/IGNITE-10720
>             Project: Ignite
>          Issue Type: Improvement
>            Reporter: Anton Kalashnikov
>            Assignee: Anton Kalashnikov
>            Priority: Major
>          Time Spent: 10m
>  Remaining Estimate: 0h
>
> Looks like it's not neccessery save all metadata(like free list) under write 
> checkpoint lock because sometimes it's too long.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to