This is an automated email from the ASF dual-hosted git repository. amashenkov pushed a commit to branch gg-19225 in repository https://gitbox.apache.org/repos/asf/ignite.git
commit 4cc49e4a1f30e6c64d9aac22c81db7ce7794422b Author: Sergey Chugunov <[email protected]> AuthorDate: Wed May 29 18:34:40 2019 +0300 GG-18877 additional heartbeat to prevent FailureProcessor from treating tcp-comm-worker as blocked Signed-off-by: Dmitriy Govorukhin <[email protected]> (cherry-picked from commit #8905c3f) --- .../client/suite/IgniteClientTestSuite.java | 3 + .../spi/communication/tcp/TcpCommunicationSpi.java | 5 + .../ignite/internal/IgniteClientFailuresTest.java | 160 +++++++++++++++++++++ 3 files changed, 168 insertions(+) diff --git a/modules/clients/src/test/java/org/apache/ignite/internal/client/suite/IgniteClientTestSuite.java b/modules/clients/src/test/java/org/apache/ignite/internal/client/suite/IgniteClientTestSuite.java index e0c3249..075e61c 100644 --- a/modules/clients/src/test/java/org/apache/ignite/internal/client/suite/IgniteClientTestSuite.java +++ b/modules/clients/src/test/java/org/apache/ignite/internal/client/suite/IgniteClientTestSuite.java @@ -18,6 +18,7 @@ package org.apache.ignite.internal.client.suite; import junit.framework.JUnit4TestAdapter; import junit.framework.TestSuite; +import org.apache.ignite.internal.IgniteClientFailuresTest; import org.apache.ignite.internal.TaskEventSubjectIdSelfTest; import org.apache.ignite.internal.client.ClientDefaultCacheSelfTest; import org.apache.ignite.internal.client.ClientReconnectionSelfTest; @@ -171,6 +172,8 @@ public class IgniteClientTestSuite extends TestSuite { // SSL params. suite.addTest(new JUnit4TestAdapter(ClientSslParametersTest.class)); + suite.addTest(new JUnit4TestAdapter(IgniteClientFailuresTest.class)); + return suite; } } diff --git a/modules/core/src/main/java/org/apache/ignite/spi/communication/tcp/TcpCommunicationSpi.java b/modules/core/src/main/java/org/apache/ignite/spi/communication/tcp/TcpCommunicationSpi.java index 0776a5d..2a9fb9a 100755 --- a/modules/core/src/main/java/org/apache/ignite/spi/communication/tcp/TcpCommunicationSpi.java +++ b/modules/core/src/main/java/org/apache/ignite/spi/communication/tcp/TcpCommunicationSpi.java @@ -3551,6 +3551,11 @@ public class TcpCommunicationSpi extends IgniteSpiAdapter implements Communicati break; } } + + CommunicationWorker commWorker0 = commWorker; + + if (commWorker0 != null && commWorker0.runner() == Thread.currentThread()) + commWorker0.updateHeartbeat(); } if (client != null) diff --git a/modules/core/src/test/java/org/apache/ignite/internal/IgniteClientFailuresTest.java b/modules/core/src/test/java/org/apache/ignite/internal/IgniteClientFailuresTest.java new file mode 100644 index 0000000..82522ae --- /dev/null +++ b/modules/core/src/test/java/org/apache/ignite/internal/IgniteClientFailuresTest.java @@ -0,0 +1,160 @@ +/* + * Copyright 2019 GridGain Systems, Inc. and Contributors. + * + * Licensed under the GridGain Community Edition License (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.gridgain.com/products/software/community-edition/gridgain-community-edition-license + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ignite.internal; + +import org.apache.ignite.IgniteCache; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.internal.cluster.IgniteClusterEx; +import org.apache.ignite.internal.managers.GridManagerAdapter; +import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi; +import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi; +import org.apache.ignite.testframework.GridStringLogger; +import org.apache.ignite.testframework.GridTestUtils; +import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; +import org.apache.ignite.testframework.junits.logger.GridTestLog4jLogger; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * + */ +public class IgniteClientFailuresTest extends GridCommonAbstractTest { + /** */ + private boolean clientMode; + + /** */ + private GridStringLogger inMemoryLog; + + /** {@inheritDoc} */ + @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception { + IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName); + + cfg.setClientMode(clientMode); + + if (!clientMode) { + cfg.setClientFailureDetectionTimeout(10_000); + + cfg.setSystemWorkerBlockedTimeout(5_000); + + cfg.setGridLogger(inMemoryLog); + } + + return cfg; + } + + /** */ + @Before + public void setupClientFailuresTest() { + stopAllGrids(); + } + + /** */ + @After + public void tearDownClientFailuresTest() { + stopAllGrids(); + } + + /** + * Test verifies that FailureProcessor doesn't treat tcp-comm-worker thread as blocked when + * the thread handles situation of failed client node and thus doesn't print full thread dump into logs. + * + * @throws Exception If failed. + */ + @Test + public void testNoMessagesFromFailureProcessor() throws Exception { + inMemoryLog = new GridStringLogger(false, new GridTestLog4jLogger()); + + inMemoryLog.logLength(1024 * 1024); + + IgniteEx srv = startGrid(0); + + clientMode = true; + + IgniteEx client00 = startGrid("client00"); + + client00.getOrCreateCache(new CacheConfiguration<>("cache0")); + + breakClient(client00); + + boolean waitRes = GridTestUtils.waitForCondition(() -> { + IgniteClusterEx cl = srv.cluster(); + + return (cl.topology(cl.topologyVersion()).size() == 1); + }, 30_000); + + assertTrue(waitRes); + + assertFalse(inMemoryLog.toString().contains("name=tcp-comm-worker")); + } + + /** + * Test verifies that when client node failed but not yet cleaned up from topology (because {@link IgniteConfiguration#clientFailureDetectionTimeout} has not been reached yet) + * it doesn't affect new client connected from the same address. + * + * @throws Exception If failed. + */ + @Test + public void testFailedClientLeavesTopologyAfterTimeout() throws Exception { + IgniteEx srv0 = startGrid(0); + + clientMode = true; + + IgniteEx client00 = startGrid("client00"); + + Thread.sleep(5_000); + + client00.getOrCreateCache(new CacheConfiguration<>("cache0")); + + breakClient(client00); + + final IgniteClusterEx cl = srv0.cluster(); + + assertEquals(2, cl.topology(cl.topologyVersion()).size()); + + IgniteEx client01 = startGrid("client01"); + + assertEquals(3, cl.topology(cl.topologyVersion()).size()); + + boolean waitRes = GridTestUtils.waitForCondition(() -> (cl.topology(cl.topologyVersion()).size() == 2), + 20_000); + + checkCacheOperations(client01.cache("cache0")); + + assertTrue(waitRes); + } + + /** */ + private void checkCacheOperations(IgniteCache cache) { + for (int i = 0; i < 100; i++) + cache.put(i, i); + + for (int i = 0; i < 100; i++) + assertEquals(i, cache.get(i)); + } + + /** */ + private void breakClient(IgniteEx client) { + Object discoSpi = ((Object[])GridTestUtils.getFieldValue(client.context().discovery(), GridManagerAdapter.class, "spis"))[0]; + + Object commSpi = ((Object[])GridTestUtils.getFieldValue(client.context().io(), GridManagerAdapter.class, "spis"))[0]; + + ((TcpCommunicationSpi)commSpi).simulateNodeFailure(); + + ((TcpDiscoverySpi)discoSpi).simulateNodeFailure(); + } +}
