IGNITE-8855 Throttle frequently reconnect client - Fixes #4739. Signed-off-by: Dmitriy Govorukhin <dmitriy.govoruk...@gmail.com>
Project: http://git-wip-us.apache.org/repos/asf/ignite/repo Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/58150f8f Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/58150f8f Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/58150f8f Branch: refs/heads/ignite-7251 Commit: 58150f8fed16255a48b2d110ce81afd02189d73b Parents: 1efec19 Author: ibessonov <bessonov...@gmail.com> Authored: Wed Sep 19 19:05:10 2018 +0300 Committer: Dmitriy Govorukhin <dmitriy.govoruk...@gmail.com> Committed: Wed Sep 19 19:05:10 2018 +0300 ---------------------------------------------------------------------- .../apache/ignite/IgniteSystemProperties.java | 4 ++ .../ignite/spi/discovery/tcp/ClientImpl.java | 47 ++++++++++++++++++++ 2 files changed, 51 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ignite/blob/58150f8f/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java ---------------------------------------------------------------------- diff --git a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java index 9e21814..1db7296 100644 --- a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java +++ b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java @@ -506,6 +506,10 @@ public final class IgniteSystemProperties { public static final String IGNITE_DISCOVERY_CLIENT_RECONNECT_HISTORY_SIZE = "IGNITE_DISCOVERY_CLIENT_RECONNECT_HISTORY_SIZE"; + /** Time interval that indicates that client reconnect throttle must be reset to zero. 2 minutes by default. */ + public static final String CLIENT_THROTTLE_RECONNECT_RESET_TIMEOUT_INTERVAL = + "CLIENT_THROTTLE_RECONNECT_RESET_TIMEOUT_INTERVAL"; + /** Number of cache operation retries in case of topology exceptions. */ public static final String IGNITE_CACHE_RETRIES_COUNT = "IGNITE_CACHE_RETRIES_COUNT"; http://git-wip-us.apache.org/repos/asf/ignite/blob/58150f8f/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ClientImpl.java ---------------------------------------------------------------------- diff --git a/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ClientImpl.java b/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ClientImpl.java index 312f737..673290e 100644 --- a/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ClientImpl.java +++ b/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ClientImpl.java @@ -48,6 +48,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicReference; import javax.net.ssl.SSLException; import org.apache.ignite.Ignite; @@ -60,6 +61,7 @@ import org.apache.ignite.IgniteSystemProperties; import org.apache.ignite.cache.CacheMetrics; import org.apache.ignite.cluster.ClusterMetrics; import org.apache.ignite.cluster.ClusterNode; +import org.apache.ignite.configuration.IgniteConfiguration; import org.apache.ignite.failure.FailureContext; import org.apache.ignite.internal.IgniteClientDisconnectedCheckedException; import org.apache.ignite.internal.IgniteEx; @@ -79,6 +81,7 @@ import org.apache.ignite.internal.util.worker.GridWorker; import org.apache.ignite.internal.worker.WorkersRegistry; import org.apache.ignite.lang.IgniteInClosure; import org.apache.ignite.lang.IgniteUuid; +import org.apache.ignite.spi.IgniteSpiAdapter; import org.apache.ignite.spi.IgniteSpiContext; import org.apache.ignite.spi.IgniteSpiException; import org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper; @@ -147,6 +150,12 @@ class ClientImpl extends TcpDiscoveryImpl { /** */ private static final Object SPI_RECONNECT = "SPI_RECONNECT"; + /** */ + private static final long CLIENT_THROTTLE_RECONNECT_RESET_TIMEOUT = IgniteSystemProperties.getLong( + IgniteSystemProperties.CLIENT_THROTTLE_RECONNECT_RESET_TIMEOUT_INTERVAL, + 2 * 60_000 + ); + /** Remote nodes. */ private final ConcurrentMap<UUID, TcpDiscoveryNode> rmtNodes = new ConcurrentHashMap<>(); @@ -1615,6 +1624,12 @@ class ClientImpl extends TcpDiscoveryImpl { /** */ private boolean nodeAdded; + /** */ + private long lastReconnectTimestamp = -1; + + /** */ + private long currentReconnectDelay = -1; + /** * @param log Logger. */ @@ -1697,6 +1712,8 @@ class ClientImpl extends TcpDiscoveryImpl { locNode.onClientDisconnected(newId); + throttleClientReconnect(); + tryJoin(); } } @@ -1886,6 +1903,36 @@ class ClientImpl extends TcpDiscoveryImpl { } /** + * Wait random delay before trying to reconnect. Delay will grow exponentially every time client is forced to + * reconnect, but only if all these reconnections happened in small period of time (2 minutes). Maximum delay + * could be configured with {@link IgniteSpiAdapter#clientFailureDetectionTimeout()}, default value is + * {@link IgniteConfiguration#DFLT_CLIENT_FAILURE_DETECTION_TIMEOUT}. + * + * @throws InterruptedException If thread is interrupted. + */ + private void throttleClientReconnect() throws InterruptedException { + if (U.currentTimeMillis() - lastReconnectTimestamp > CLIENT_THROTTLE_RECONNECT_RESET_TIMEOUT) + currentReconnectDelay = 0; // Skip pause on first reconnect. + else if (currentReconnectDelay == 0) + currentReconnectDelay = 200; + else { + long maxDelay = spi.failureDetectionTimeoutEnabled() + ? spi.clientFailureDetectionTimeout() + : IgniteConfiguration.DFLT_CLIENT_FAILURE_DETECTION_TIMEOUT; + + currentReconnectDelay = Math.min(maxDelay, (int)(currentReconnectDelay * 1.5)); + } + + if (currentReconnectDelay != 0) { + ThreadLocalRandom random = ThreadLocalRandom.current(); + + Thread.sleep(random.nextLong(currentReconnectDelay / 2, currentReconnectDelay)); + } + + lastReconnectTimestamp = U.currentTimeMillis(); + } + + /** * */ private void onDisconnected() {