[ https://issues.apache.org/jira/browse/IGNITE-9188?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16585708#comment-16585708 ]
ASF GitHub Bot commented on IGNITE-9188: ---------------------------------------- GitHub user ascherbakoff opened a pull request: https://github.com/apache/ignite/pull/4578 IGNITE-9188 Unexpected eviction leading to data loss You can merge this pull request into a Git repository by running: $ git pull https://github.com/gridgain/apache-ignite ignite-9188 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/ignite/pull/4578.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #4578 ---- commit b207bd0b992aa09885242c5da0b7f2c6fdd621f2 Author: ascherbakoff <alexey.scherbakoff@...> Date: 2018-08-06T20:46:00Z IGNITE-9188 Unexpected eviction leading to data loss in a scenario. commit 88bfeb16422634761eb9f1e412bbc21231387446 Author: Aleksei Scherbakov <alexey.scherbakoff@...> Date: 2018-08-08T10:03:43Z IGNITE-9188 Unexpected eviction leading to data loss. commit b9a2ecb7ffe80bc3e40d842e211b1037ed0b7af7 Author: Aleksei Scherbakov <alexey.scherbakoff@...> Date: 2018-08-14T14:39:35Z Merge branch 'master' of https://github.com/apache/ignite into ignite-9188 commit cf3a88a9e2268c2f7e35f556d36da0d00fd7cfef Author: Aleksei Scherbakov <alexey.scherbakoff@...> Date: 2018-08-14T17:24:34Z IGNITE-9188 Unexpected eviction leading to data loss. commit d721ce138a4920e4d7c7297c98017ffcbecc81eb Author: Aleksei Scherbakov <alexey.scherbakoff@...> Date: 2018-08-15T17:23:43Z IGNITE-9188 Unexpected eviction leading to data loss. commit 0e04bedfd9727076bf2fe969c4c3fb8c0cb48757 Author: Aleksei Scherbakov <alexey.scherbakoff@...> Date: 2018-08-20T09:10:52Z IGNITE-9188 wip. commit 9e1df5550deaf34bdbc8b76b001d16684aa15803 Author: Aleksei Scherbakov <alexey.scherbakoff@...> Date: 2018-08-20T09:11:11Z Merge branch 'master' of https://github.com/apache/ignite into ignite-9188 ---- > Unexpected eviction leading to data loss in a scenario with > stopping/restarting nodes during rebalancing > -------------------------------------------------------------------------------------------------------- > > Key: IGNITE-9188 > URL: https://issues.apache.org/jira/browse/IGNITE-9188 > Project: Ignite > Issue Type: Bug > Reporter: Alexei Scherbakov > Assignee: Alexei Scherbakov > Priority: Major > Fix For: 2.7 > > > Scenario: > 1. Split grid nodes in two groups with distinct partition mapping. One group > holds even partitions, other - odd. Rebalancing of "odd" partitions is only > triggered when number of nodes in grid exceeds n/2 threshold. > 2. Start n/2 nodes, activate, put data into "even" partitions. > 3. Start other n/2 nodes, change BLT, delay rebalancing of "odd" partitions. > 4. Stop newly started nodes before rebalancing is finished. > Expected behavior: parttiions in "odd" group will keep owning state. > Actual behavior: "odd" partitions are evicted leading to data loss. > Unit test reproducer: > {noformat} > /* > * Licensed to the Apache Software Foundation (ASF) under one or more > * contributor license agreements. See the NOTICE file distributed with > * this work for additional information regarding copyright ownership. > * The ASF licenses this file to You under the Apache License, Version 2.0 > * (the "License"); you may not use this file except in compliance with > * the License. You may obtain a copy of the License at > * > * http://www.apache.org/licenses/LICENSE-2.0 > * > * Unless required by applicable law or agreed to in writing, software > * distributed under the License is distributed on an "AS IS" BASIS, > * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > * See the License for the specific language governing permissions and > * limitations under the License. > */ > package org.apache.ignite.internal.processors.cache.distributed; > import java.util.ArrayList; > import java.util.Collection; > import java.util.HashMap; > import java.util.List; > import java.util.Map; > import java.util.UUID; > import org.apache.ignite.Ignite; > import org.apache.ignite.cache.CacheAtomicityMode; > import org.apache.ignite.cache.CacheMode; > import org.apache.ignite.cache.affinity.AffinityFunctionContext; > import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; > import org.apache.ignite.cluster.ClusterNode; > import org.apache.ignite.configuration.CacheConfiguration; > import org.apache.ignite.configuration.DataRegionConfiguration; > import org.apache.ignite.configuration.DataStorageConfiguration; > import org.apache.ignite.configuration.IgniteConfiguration; > import org.apache.ignite.configuration.WALMode; > import org.apache.ignite.internal.TestRecordingCommunicationSpi; > import org.apache.ignite.internal.processors.cache.GridCacheUtils; > import > org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition; > import > org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionDemandMessage; > import org.apache.ignite.internal.util.typedef.G; > import org.apache.ignite.internal.util.typedef.internal.CU; > import org.apache.ignite.internal.util.typedef.internal.U; > import org.apache.ignite.lang.IgniteBiPredicate; > import org.apache.ignite.plugin.extensions.communication.Message; > import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; > import org.jetbrains.annotations.Nullable; > import static > org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionState.OWNING; > /** > * > */ > public class CacheLostPartitionsRestoreStateTest extends > GridCommonAbstractTest { > /** */ > public static final long MB = 1024 * 1024L; > /** */ > public static final String GRP_ATTR = "grp"; > /** */ > public static final int GRIDS_CNT = 6; > /** */ > public static final String CACHE_1 = "filled"; > /** */ > public static final String CACHE_2 = "empty"; > /** */ > public static final String EVEN_GRP = "event"; > /** */ > public static final String ODD_GRP = "odd"; > /** {@inheritDoc} */ > @Override protected IgniteConfiguration getConfiguration(String > igniteInstanceName) throws Exception { > IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName); > cfg.setCommunicationSpi(new TestRecordingCommunicationSpi()); > CacheConfiguration ccfg = new CacheConfiguration("default"); > ccfg.setAffinity(new RendezvousAffinityFunction(false, > CacheConfiguration.MAX_PARTITIONS_COUNT)); > cfg.setCacheConfiguration(ccfg); > cfg.setPeerClassLoadingEnabled(true); > Map<String, Object> attrs = new HashMap<>(); > attrs.put(GRP_ATTR, > grp(getTestIgniteInstanceIndex(igniteInstanceName))); > cfg.setUserAttributes(attrs); > DataStorageConfiguration memCfg = new DataStorageConfiguration() > .setDefaultDataRegionConfiguration( > new > DataRegionConfiguration().setPersistenceEnabled(true).setInitialSize(50 * > MB).setMaxSize(50 * MB)) > .setWalMode(WALMode.LOG_ONLY); > cfg.setDataStorageConfiguration(memCfg); > cfg.setCacheConfiguration(configuration(CACHE_1), > configuration(CACHE_2)); > return cfg; > } > /** > * @param name Name. > */ > private CacheConfiguration configuration(String name) { > return new CacheConfiguration(name). > setCacheMode(CacheMode.PARTITIONED). > setAtomicityMode(CacheAtomicityMode.TRANSACTIONAL). > setBackups(2). > setRebalanceBatchSize(1). > setAffinity(new TestAffinityFunction().setPartitions(32)); > } > /** > * @param idx Index. > */ > private String grp(int idx) { > return idx < GRIDS_CNT / 2 ? EVEN_GRP : ODD_GRP; > } > /** > * @throws Exception if failed. > */ > public void test() throws Exception { > try { > Ignite ignite = startGridsMultiThreaded(GRIDS_CNT / 2, false); > ignite.cluster().active(true); > awaitPartitionMapExchange(); > int blockPartId = 1; > int c = 0; > for (int i = 0; i < 1000; i++) { > if (ignite.affinity(CACHE_1).partition(i) == blockPartId) { > ignite.cache(CACHE_1).put(i, i); > c++; > } > } > assertEquals(c, ignite.cache(CACHE_1).size()); > startGridsMultiThreaded(GRIDS_CNT / 2, GRIDS_CNT / 2); > // Prevent rebalancing to new nodes. > for (Ignite ig0 : G.allGrids()) { > TestRecordingCommunicationSpi.spi(ig0).blockMessages(new > IgniteBiPredicate<ClusterNode, Message>() { > @Override public boolean apply(ClusterNode node, Message > message) { > if (message instanceof GridDhtPartitionDemandMessage) > { > assertTrue(node.order() <= GRIDS_CNT / 2); > GridDhtPartitionDemandMessage msg = > (GridDhtPartitionDemandMessage)message; > return msg.groupId() == CU.cacheId(CACHE_1) || > msg.groupId() == CU.cacheId(CACHE_2); > } > return false; > } > }); > } > ignite.cluster().setBaselineTopology(GRIDS_CNT); > for (Ignite ig0 : G.allGrids()) { > if (ig0.cluster().localNode().order() <= GRIDS_CNT / 2) > continue; > TestRecordingCommunicationSpi.spi(ig0).waitForBlocked(); > } > assertEquals(c, ignite.cache(CACHE_1).size()); > assertEquals(c, ignite.cache(CACHE_1).size()); > int i = 0; > while(i < GRIDS_CNT / 2) { > stopGrid(GRIDS_CNT / 2 + i); > i++; > } > awaitPartitionMapExchange(); > for (Ignite ig : G.allGrids()) { > GridDhtLocalPartition locPart = > dht(ig.cache(CACHE_1)).topology().localPartition(blockPartId); > assertNotNull(locPart); > assertTrue(locPart.state() == OWNING); > } > } > finally { > stopAllGrids(); > } > } > /** {@inheritDoc} */ > @Override protected void beforeTest() throws Exception { > cleanPersistenceDir(); > } > /** {@inheritDoc} */ > @Override protected void afterTest() throws Exception { > cleanPersistenceDir(); > } > /** */ > public static class TestAffinityFunction extends > RendezvousAffinityFunction { > /** */ > public TestAffinityFunction() { > } > /** */ > public TestAffinityFunction(boolean exclNeighbors) { > super(exclNeighbors); > } > /** */ > public TestAffinityFunction(boolean exclNeighbors, int parts) { > super(exclNeighbors, parts); > } > /** */ > public TestAffinityFunction(int parts, > @Nullable IgniteBiPredicate<ClusterNode, ClusterNode> > backupFilter) { > super(parts, backupFilter); > } > /** {@inheritDoc} */ > @Override public List<List<ClusterNode>> > assignPartitions(AffinityFunctionContext affCtx) { > int parts = partitions(); > List<List<ClusterNode>> assignments = new ArrayList<>(parts); > Map<UUID, Collection<ClusterNode>> neighborhoodCache = > isExcludeNeighbors() ? > GridCacheUtils.neighbors(affCtx.currentTopologySnapshot()) : > null; > List<ClusterNode> nodes = affCtx.currentTopologySnapshot(); > Map<Object, List<ClusterNode>> nodesByGrp = U.newHashMap(2); > for (ClusterNode node : nodes) { > Object grp = node.attribute(GRP_ATTR); > List<ClusterNode> grpNodes = nodesByGrp.get(grp); > if (grpNodes == null) > nodesByGrp.put(grp, (grpNodes = new ArrayList<>())); > grpNodes.add(node); > } > boolean split = nodesByGrp.size() == 2; > for (int i = 0; i < parts; i++) { > List<ClusterNode> partAssignment = assignPartition(i, split ? > nodesByGrp.get(i % 2 == 0 ? EVEN_GRP : ODD_GRP) : > nodes, > affCtx.backups(), neighborhoodCache); > assignments.add(partAssignment); > } > return assignments; > } > } > } > {noformat} -- This message was sent by Atlassian JIRA (v7.6.3#76005)