[ https://issues.apache.org/jira/browse/IGNITE-8320?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16716739#comment-16716739 ]
ASF GitHub Bot commented on IGNITE-8320: ---------------------------------------- Github user Jokser closed the pull request at: https://github.com/apache/ignite/pull/4016 > Page corruption during the rebalancing cache. > --------------------------------------------- > > Key: IGNITE-8320 > URL: https://issues.apache.org/jira/browse/IGNITE-8320 > Project: Ignite > Issue Type: Bug > Components: persistence > Affects Versions: 2.4 > Reporter: Vyacheslav Koptilin > Assignee: Pavel Kovalenko > Priority: Major > Fix For: 2.5 > > > Cache rebalance may result in page memory corruption. > {noformat} > [2018-04-18T14:33:23,260][ERROR][sys-#54][GridCacheIoManager] Failed > processing message [senderId=95f06c25-e6bb-48f7-a3e5-4c05fc1c49be, > msg=GridDhtPartitionSupplyMessage [rebalanceId=37, > topVer=AffinityTopologyVersion [topVer=53, minorTopVer=1], missed=null, > clean=null, msgSize=525350, estimatedKeysCnt=1690216, size=2, parts=[1, 2], > super=GridCacheGroupIdMessage [grpId=-1831596270]]] > org.apache.ignite.IgniteException: Runtime failure on row: Row@33b6805c[ > key: xxxx [idHash=773709078, hash=-630455542, ...], val: xxxx > [idHash=1309051286, hash=-1321165334, ver: GridCacheVersion > [topVer=135435024, order=1523963943331, nodeOrder=4] ] > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.doPut(BPlusTree.java:2102) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.putx(BPlusTree.java:2049) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.query.h2.database.H2TreeIndex.putx(H2TreeIndex.java:247) > ~[ignite-indexing-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.query.h2.opt.GridH2Table.update(GridH2Table.java:454) > ~[ignite-indexing-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.query.h2.IgniteH2Indexing.store(IgniteH2Indexing.java:653) > ~[ignite-indexing-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.query.GridQueryProcessor.store(GridQueryProcessor.java:1866) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.query.GridCacheQueryManager.store(GridCacheQueryManager.java:407) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManagerImpl$CacheDataStoreImpl.finishUpdate(IgniteCacheOffheapManagerImpl.java:1391) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManagerImpl$CacheDataStoreImpl.invoke(IgniteCacheOffheapManagerImpl.java:1255) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.GridCacheOffheapManager$GridCacheDataStore.invoke(GridCacheOffheapManager.java:1451) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManagerImpl.invoke(IgniteCacheOffheapManagerImpl.java:352) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.GridCacheMapEntry.storeValue(GridCacheMapEntry.java:3527) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.GridCacheMapEntry.initialValue(GridCacheMapEntry.java:2735) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionDemander.preloadEntry(GridDhtPartitionDemander.java:823) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionDemander.handleSupplyMessage(GridDhtPartitionDemander.java:704) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPreloader.handleSupplyMessage(GridDhtPreloader.java:347) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$5.apply(GridCachePartitionExchangeManager.java:365) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$5.apply(GridCachePartitionExchangeManager.java:355) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.GridCacheIoManager.processMessage(GridCacheIoManager.java:1054) > [ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.GridCacheIoManager.onMessage0(GridCacheIoManager.java:579) > [ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.GridCacheIoManager.access$700(GridCacheIoManager.java:99) > [ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.GridCacheIoManager$OrderedMessageListener.onMessage(GridCacheIoManager.java:1603) > [ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.managers.communication.GridIoManager.invokeListener(GridIoManager.java:1555) > [ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.managers.communication.GridIoManager.access$4100(GridIoManager.java:126) > [ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.managers.communication.GridIoManager$GridCommunicationMessageSet.unwind(GridIoManager.java:2751) > [ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.managers.communication.GridIoManager.unwindMessageSet(GridIoManager.java:1515) > [ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.managers.communication.GridIoManager.access$4400(GridIoManager.java:126) > [ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.managers.communication.GridIoManager$10.run(GridIoManager.java:1484) > [ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [?:1.8.0_151] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [?:1.8.0_151] > at java.lang.Thread.run(Thread.java:748) [?:1.8.0_151] > Caused by: java.lang.IllegalStateException: Failed to get page IO instance > (page content is corrupted) > at > org.apache.ignite.internal.processors.cache.persistence.tree.io.IOVersions.forVersion(IOVersions.java:83) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.io.IOVersions.forPage(IOVersions.java:95) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.CacheDataRowAdapter.initFromLink(CacheDataRowAdapter.java:148) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.CacheDataRowAdapter.initFromLink(CacheDataRowAdapter.java:102) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.query.h2.database.H2RowFactory.getRow(H2RowFactory.java:61) > ~[ignite-indexing-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.query.h2.database.H2Tree.createRowFromLink(H2Tree.java:149) > ~[ignite-indexing-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.query.h2.database.io.H2LeafIO.getLookupRow(H2LeafIO.java:67) > ~[ignite-indexing-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.query.h2.database.io.H2LeafIO.getLookupRow(H2LeafIO.java:33) > ~[ignite-indexing-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.query.h2.database.H2Tree.getRow(H2Tree.java:167) > ~[ignite-indexing-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.query.h2.database.H2Tree.getRow(H2Tree.java:46) > ~[ignite-indexing-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.getRow(BPlusTree.java:4436) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.query.h2.database.H2Tree.compare(H2Tree.java:209) > ~[ignite-indexing-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.query.h2.database.H2Tree.compare(H2Tree.java:46) > ~[ignite-indexing-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.compare(BPlusTree.java:4423) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.findInsertionPoint(BPlusTree.java:4343) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.access$1500(BPlusTree.java:82) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$Search.run0(BPlusTree.java:270) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$GetPageHandler.run(BPlusTree.java:4770) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree$GetPageHandler.run(BPlusTree.java:4755) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.util.PageHandler.readPage(PageHandler.java:158) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.DataStructure.read(DataStructure.java:320) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.putDown(BPlusTree.java:2317) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.putDown(BPlusTree.java:2329) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.putDown(BPlusTree.java:2329) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > at > org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.doPut(BPlusTree.java:2069) > ~[ignite-core-2.4.4.b1.jar:2.4.4.b1] > ... 30 more > {noformat} > Possible cause and reproducer: > 1) Start partition eviction > 2) Force kill node (kill -9) after partition file truncate > 3) Start node again and iterate over index > The main problem that file truncation is not synchronized with actual > checkpoint which can lead to the situation, that after crash recovery we have > links in index tree to the data pages which were already removed during file > truncation. > One of the possible solutions is to mark such partition files for deletion > and safely truncate them on the next checkpoint. > This mechanism can be ressurected from ignite-2.0.2.b1 branch. > See > {noformat} > org/gridgain/grid/internal/processors/cache/database/GridCacheDatabaseSharedManager.java:3059 > org.gridgain.grid.cache.db.GridCacheOffheapManager#destroyCacheDataStore > {noformat} -- This message was sent by Atlassian JIRA (v7.6.3#76005)