This is an automated email from the ASF dual-hosted git repository.

sk0x50 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git


The following commit(s) were added to refs/heads/master by this push:
     new 8ec5d50896a IGNITE-17279 Fixed incorrect detection of lost partition 
on the coordinator node. Fixes #10126
8ec5d50896a is described below

commit 8ec5d50896a2b5f2d008d0bb8f67f7f3d7cdf584
Author: Slava Koptilin <[email protected]>
AuthorDate: Wed Jul 6 23:42:54 2022 +0300

    IGNITE-17279 Fixed incorrect detection of lost partition on the coordinator 
node. Fixes #10126
---
 .../dht/topology/GridClientPartitionTopology.java  |  9 +++
 .../dht/topology/GridDhtPartitionTopologyImpl.java |  9 +++
 .../CachePartitionLossWithPersistenceTest.java     | 71 +++++++++++++++++++++-
 .../CachePartitionLossWithRestartsTest.java        |  6 ++
 ...CachePartitionLostAfterSupplierHasLeftTest.java |  5 ++
 5 files changed, 99 insertions(+), 1 deletion(-)

diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridClientPartitionTopology.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridClientPartitionTopology.java
index 0314c46c4eb..08b240a8fce 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridClientPartitionTopology.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridClientPartitionTopology.java
@@ -1316,6 +1316,15 @@ public class GridClientPartitionTopology implements 
GridDhtPartitionTopology {
             for (Map.Entry<Integer, Set<UUID>> entry : 
ownersByUpdCounters.entrySet())
                 part2node.put(entry.getKey(), entry.getValue());
 
+            if (lostParts != null) {
+                for (Integer lostPart : lostParts) {
+                    for (GridDhtPartitionMap partMap : node2part.values()) {
+                        if (partMap.containsKey(lostPart))
+                            partMap.put(lostPart, LOST);
+                    }
+                }
+            }
+
             updateSeq.incrementAndGet();
         }
         finally {
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtPartitionTopologyImpl.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtPartitionTopologyImpl.java
index 728fd4f3857..e85453d76f0 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtPartitionTopologyImpl.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/topology/GridDhtPartitionTopologyImpl.java
@@ -2447,6 +2447,15 @@ public class GridDhtPartitionTopologyImpl implements 
GridDhtPartitionTopology {
                     }
                 }
 
+                if (lostParts != null) {
+                    for (Integer lostPart : lostParts) {
+                        for (GridDhtPartitionMap partMap : node2part.values()) 
{
+                            if (partMap.containsKey(lostPart))
+                                partMap.put(lostPart, LOST);
+                        }
+                    }
+                }
+
                 node2part = new GridDhtPartitionFullMap(node2part, 
updateSeq.incrementAndGet());
             }
             finally {
diff --git 
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CachePartitionLossWithPersistenceTest.java
 
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CachePartitionLossWithPersistenceTest.java
index 7df1db6b618..c154b744d6b 100644
--- 
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CachePartitionLossWithPersistenceTest.java
+++ 
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CachePartitionLossWithPersistenceTest.java
@@ -38,7 +38,11 @@ import org.apache.ignite.internal.IgniteEx;
 import org.apache.ignite.internal.TestRecordingCommunicationSpi;
 import org.apache.ignite.internal.processors.cache.IgniteInternalCache;
 import 
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionDemandMessage;
+import 
org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition;
+import 
org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState;
+import 
org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology;
 import org.apache.ignite.internal.util.typedef.G;
+import org.apache.ignite.internal.util.typedef.internal.CU;
 import org.apache.ignite.lang.IgniteBiPredicate;
 import org.apache.ignite.plugin.extensions.communication.Message;
 import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
@@ -46,6 +50,8 @@ import org.junit.Ignore;
 import org.junit.Test;
 
 import static org.apache.ignite.cache.PartitionLossPolicy.READ_WRITE_SAFE;
+import static 
org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.EVICTED;
+import static 
org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.LOST;
 
 /**
  *
@@ -221,6 +227,10 @@ public class CachePartitionLossWithPersistenceTest extends 
GridCommonAbstractTes
             });
         }
 
+        // Check that all lost partitions have the same state on all cluster 
nodes.
+        for (Integer lostPart : lostParts)
+            checkLostPartitionAcrossCluster(DEFAULT_CACHE_NAME, lostPart);
+
         if (partResetMode == 0)
             crd.resetLostPartitions(Collections.singleton(DEFAULT_CACHE_NAME));
 
@@ -228,8 +238,13 @@ public class CachePartitionLossWithPersistenceTest extends 
GridCommonAbstractTes
 
         final Collection<Integer> g2LostParts = 
g2.cache(DEFAULT_CACHE_NAME).lostPartitions();
 
-        if (partResetMode != 0)
+        if (partResetMode != 0) {
+            // Check that all lost partitions have the same state on all 
cluster nodes.
+            for (Integer lostPart : lostParts)
+                checkLostPartitionAcrossCluster(DEFAULT_CACHE_NAME, lostPart);
+
             assertEquals(lostParts, g2LostParts);
+        }
 
         if (partResetMode == 1)
             crd.resetLostPartitions(Collections.singleton(DEFAULT_CACHE_NAME));
@@ -261,4 +276,58 @@ public class CachePartitionLossWithPersistenceTest extends 
GridCommonAbstractTes
                 assertEquals("Partition " + p, 0, 
ignite.cache(DEFAULT_CACHE_NAME).get(p));
         }
     }
+
+    /**
+     * Checks partition states on all nodes.
+     *
+     * @param cacheName Cache name to check.
+     * @param partId Partition to check.
+     * @return {@code true} if partition state of the given partition equals 
to {@code state} on all nodes.
+     */
+    public static void checkLostPartitionAcrossCluster(String cacheName, int 
partId) {
+        for (Ignite grid : G.allGrids()) {
+            IgniteEx g = (IgniteEx)grid;
+
+            boolean affNode = CU.affinityNode(
+                g.localNode(),
+                
g.cache(cacheName).getConfiguration(CacheConfiguration.class).getNodeFilter());
+
+            // skip non affinity nodes
+            if (!affNode)
+                continue;
+
+            GridDhtPartitionTopology top = 
g.context().cache().cacheGroup(CU.cacheId(cacheName)).topology();
+
+            GridDhtLocalPartition p = top.localPartition(partId);
+
+            if (p != null) {
+                GridDhtPartitionState actualState = p.state();
+
+                // check lost partitions
+                if (!top.lostPartitions().contains(partId)) {
+                    fail("Unexpected partition state [partId=" + partId + ", 
nodeId=" + g.localNode().id() +
+                        ", actualPartState=" + actualState + ", 
expectedPartState=" + LOST + ", markedAsLost=false]");
+                }
+
+                // check actual partition state
+                if (actualState != LOST && actualState != EVICTED) {
+                    fail("Unexpected partition state [partId=" + partId + ", 
nodeId=" + g.localNode().id() +
+                        ", actualPartState=" + actualState + ", 
expectedPartState=" + LOST + ", markedAsLost=true]");
+                }
+
+                // check node2part mapping
+                for (Ignite node : G.allGrids()) {
+                    IgniteEx n = (IgniteEx)node;
+
+                    GridDhtPartitionState s = 
top.partitionState(n.localNode().id(), partId);
+
+                    if (s != LOST && s != EVICTED) {
+                        fail("Unexpected partition state [partId=" + partId + 
", nodeId=" + g.localNode().id() +
+                            ", node2partNodeId=" + n.localNode().id() +
+                            ", node2partState=" + s + ", expectedPartState=" + 
LOST + ", markedAsLost=true]");
+                    }
+                }
+            }
+        }
+    }
 }
diff --git 
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CachePartitionLossWithRestartsTest.java
 
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CachePartitionLossWithRestartsTest.java
index da527e4284a..e8bfdcc0894 100644
--- 
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CachePartitionLossWithRestartsTest.java
+++ 
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CachePartitionLossWithRestartsTest.java
@@ -44,6 +44,8 @@ import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
+import static 
org.apache.ignite.internal.processors.cache.distributed.CachePartitionLossWithPersistenceTest.checkLostPartitionAcrossCluster;
+
 /**
  *
  */
@@ -205,6 +207,10 @@ public class CachePartitionLossWithRestartsTest extends 
GridCommonAbstractTest {
         GridDhtPartitionTopology top = 
startGrid(1).cachex(DEFAULT_CACHE_NAME).context().topology();
         assertEquals(lost1, top.lostPartitions());
 
+        // Check that all lost partitions have the same state on all cluster 
nodes.
+        for (Integer lostPart : lost1)
+            checkLostPartitionAcrossCluster(DEFAULT_CACHE_NAME, lostPart);
+
         // TODO https://issues.apache.org/jira/browse/IGNITE-13053
         grid(1).resetLostPartitions(Collections.singleton(DEFAULT_CACHE_NAME));
 
diff --git 
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CachePartitionLostAfterSupplierHasLeftTest.java
 
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CachePartitionLostAfterSupplierHasLeftTest.java
index 6cd5d2e9374..ad8005a092b 100644
--- 
a/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CachePartitionLostAfterSupplierHasLeftTest.java
+++ 
b/modules/core/src/test/java/org/apache/ignite/internal/processors/cache/distributed/CachePartitionLostAfterSupplierHasLeftTest.java
@@ -51,6 +51,7 @@ import org.junit.Test;
 import static 
org.apache.ignite.IgniteSystemProperties.IGNITE_PREFER_WAL_REBALANCE;
 import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
 import static org.apache.ignite.cache.CacheMode.PARTITIONED;
+import static 
org.apache.ignite.internal.processors.cache.distributed.CachePartitionLossWithPersistenceTest.checkLostPartitionAcrossCluster;
 
 /**
  * Test scenario: last supplier has left while a partition on demander is 
cleared before sending first demand request.
@@ -288,6 +289,10 @@ public class CachePartitionLostAfterSupplierHasLeftTest 
extends GridCommonAbstra
 
         assertEquals(PARTS_CNT, lostParts2.size());
 
+        // Check that all lost partitions have the same state on all cluster 
nodes.
+        for (Integer lostPart : lostParts2)
+            checkLostPartitionAcrossCluster(DEFAULT_CACHE_NAME, lostPart);
+
         spi1.stopBlock();
 
         g0.resetLostPartitions(Collections.singletonList(DEFAULT_CACHE_NAME));

Reply via email to