Roman Shtykh created IGNITE-11620: ------------------------------------- Summary: GridDhtInvalidPartitionException stops the cluster Key: IGNITE-11620 URL: https://issues.apache.org/jira/browse/IGNITE-11620 Project: Ignite Issue Type: Bug Affects Versions: 2.7, 2.6 Reporter: Roman Shtykh
When injecting data and having it expired at the same time rebalancing occurs, *GridDhtInvalidPartitionException* triggers *SYSTEM_WORKER_TERMINATION*. This can cause cascading failures in the cluster and take the whole cluster down. Simple test case: {noformat} import org.apache.ignite.IgniteCache; import org.apache.ignite.configuration.CacheConfiguration; import org.apache.ignite.configuration.IgniteConfiguration; import org.apache.ignite.failure.StopNodeOrHaltFailureHandler; import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi; import org.apache.ignite.spi.discovery.tcp.ipfinder.TcpDiscoveryIpFinder; import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder; import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; import javax.cache.expiry.CreatedExpiryPolicy; import javax.cache.expiry.Duration; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import static org.apache.ignite.cache.CacheAtomicityMode.ATOMIC; import static org.apache.ignite.cache.CacheMode.PARTITIONED; /** * */ public class ExpireWhileRebalanceTest extends GridCommonAbstractTest { private static final int ENTRIES = 500000; /** * */ protected static final TcpDiscoveryIpFinder IP_FINDER = new TcpDiscoveryVmIpFinder(true); /** * {@inheritDoc} */ @Override protected IgniteConfiguration getConfiguration(String gridName) throws Exception { IgniteConfiguration cfg = super.getConfiguration(gridName); ((TcpDiscoverySpi) cfg.getDiscoverySpi()).setIpFinder(IP_FINDER); cfg.setFailureHandler(new StopNodeOrHaltFailureHandler()); CacheConfiguration<Object, Object> ccfg = new CacheConfiguration<>(DEFAULT_CACHE_NAME); ccfg.setAtomicityMode(ATOMIC); ccfg.setCacheMode(PARTITIONED); ccfg.setExpiryPolicyFactory(CreatedExpiryPolicy.factoryOf(new Duration(TimeUnit.SECONDS, 1))); cfg.setCacheConfiguration(ccfg); return cfg; } /** * @throws Exception If failed. */ public void testExpireWhileRebalancing() throws Exception { startGridsMultiThreaded(4); IgniteCache<Object, Object> cache = ignite(0).cache(DEFAULT_CACHE_NAME); CountDownLatch latch = new CountDownLatch(1); new Thread(() -> { for (int i = 1; i <= ENTRIES; i++) { cache.put(i, i); if (i % (ENTRIES / 10) == 0) System.out.println(">>> Entries put: " + i); } latch.countDown(); }).start(); // stopping 0 has no effect stopGrid(3); awaitPartitionMapExchange(); startGrid(3); latch.await(10, TimeUnit.SECONDS); } /** * {@inheritDoc} */ @Override protected void afterTest() throws Exception { stopAllGrids(); } } {noformat} -- This message was sent by Atlassian JIRA (v7.6.3#76005)