IGNITE-6587 Critical system workers monitoring

Signed-off-by: Andrey Gura <ag...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/ignite/repo
Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/f59d29b9
Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/f59d29b9
Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/f59d29b9

Branch: refs/heads/master
Commit: f59d29b9562a8eaa4e96fabe8e37ca16065a0942
Parents: b18c28e
Author: Andrey Kuznetsov <stku...@gmail.com>
Authored: Mon Sep 24 15:24:48 2018 +0300
Committer: Andrey Gura <ag...@apache.org>
Committed: Mon Sep 24 15:31:13 2018 +0300

----------------------------------------------------------------------
 .../apache/ignite/IgniteSystemProperties.java   |   3 +-
 .../ignite/failure/AbstractFailureHandler.java  |  64 ++++++++
 .../apache/ignite/failure/FailureHandler.java   |   6 +
 .../org/apache/ignite/failure/FailureType.java  |   3 +
 .../ignite/failure/NoOpFailureHandler.java      |   6 +-
 .../failure/RestartProcessFailureHandler.java   |   6 +-
 .../ignite/failure/StopNodeFailureHandler.java  |   6 +-
 .../failure/StopNodeOrHaltFailureHandler.java   |   6 +-
 .../org/apache/ignite/internal/IgnitionEx.java  |  31 ++--
 .../discovery/GridDiscoveryManager.java         |  40 +++--
 .../GridCachePartitionExchangeManager.java      |  45 +++++-
 .../cache/GridCacheSharedTtlCleanupManager.java |   6 +
 .../GridDhtPartitionsExchangeFuture.java        |   9 +-
 .../GridCacheDatabaseSharedManager.java         | 135 +++++++++++++----
 .../cache/persistence/file/FilePageStore.java   |   2 +-
 .../wal/FileWriteAheadLogManager.java           |  89 +++++++++--
 .../wal/FsyncModeFileWriteAheadLogManager.java  |  67 +++++++--
 .../processors/failure/FailureProcessor.java    |  11 +-
 .../platform/compute/PlatformCompute.java       |  11 +-
 .../timeout/GridTimeoutProcessor.java           |  36 ++++-
 .../ignite/internal/util/IgniteUtils.java       |  16 ++
 .../ignite/internal/util/StripedExecutor.java   |  15 +-
 .../internal/util/future/GridFutureAdapter.java |  10 +-
 .../ignite/internal/util/nio/GridNioServer.java |  24 ++-
 .../ignite/internal/util/worker/GridWorker.java |  48 +++++-
 .../util/worker/GridWorkerListener.java         |   5 +
 .../util/worker/GridWorkerListenerAdapter.java  |   9 +-
 .../worker/WorkersControlMXBeanImpl.java        |  10 ++
 .../ignite/internal/worker/WorkersRegistry.java | 149 ++++++++++++++++++-
 .../ignite/mxbean/WorkersControlMXBean.java     |   7 +
 .../communication/tcp/TcpCommunicationSpi.java  |  30 +++-
 .../ignite/spi/discovery/tcp/ClientImpl.java    |  61 ++++++--
 .../ignite/spi/discovery/tcp/ServerImpl.java    |  62 ++++++--
 .../failure/AbstractFailureHandlerTest.java     |   4 +-
 .../failure/SystemWorkersBlockingTest.java      | 100 +++++++++++++
 .../failure/SystemWorkersTerminationTest.java   |  37 ++++-
 .../ignite/failure/TestFailureHandler.java      |   4 +-
 .../ignite/internal/IgniteClientRejoinTest.java |  12 +-
 .../IgnitePdsCorruptedStoreTest.java            |   6 +-
 .../persistence/IgnitePdsPageSizesTest.java     |   2 +
 .../persistence/IgnitePdsTaskCancelingTest.java |   9 +-
 .../wal/IgniteWalHistoryReservationsTest.java   |   2 +
 .../communication/GridCacheMessageSelfTest.java |   6 +-
 .../tcp/TcpDiscoverySegmentationPolicyTest.java |   6 +-
 .../IgniteHadoopFileSystemAbstractSelfTest.java |   2 +-
 .../db/wal/IgniteWalRecoveryTest.java           |  12 ++
 46 files changed, 1061 insertions(+), 169 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java 
b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
index 1db7296..71a633f 100644
--- a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
+++ b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java
@@ -31,6 +31,7 @@ import org.apache.ignite.internal.client.GridClient;
 import org.apache.ignite.internal.marshaller.optimized.OptimizedMarshaller;
 import org.apache.ignite.internal.processors.rest.GridRestCommand;
 import org.apache.ignite.internal.util.GridLogThrottle;
+import org.apache.ignite.internal.util.worker.GridWorker;
 import org.apache.ignite.stream.StreamTransformer;
 import org.jetbrains.annotations.Nullable;
 
@@ -960,7 +961,7 @@ public final class IgniteSystemProperties {
      */
     public static final String IGNITE_DUMP_THREADS_ON_FAILURE = 
"IGNITE_DUMP_THREADS_ON_FAILURE";
 
-    /**
+   /**
      * Throttling timeout in millis which avoid excessive PendingTree access 
on unwind if there is nothing to clean yet.
      *
      * Default is 500 ms.

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/failure/AbstractFailureHandler.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/failure/AbstractFailureHandler.java
 
b/modules/core/src/main/java/org/apache/ignite/failure/AbstractFailureHandler.java
new file mode 100644
index 0000000..6ca6520
--- /dev/null
+++ 
b/modules/core/src/main/java/org/apache/ignite/failure/AbstractFailureHandler.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.failure;
+
+import java.util.Collections;
+import java.util.Set;
+import org.apache.ignite.Ignite;
+import org.apache.ignite.internal.util.tostring.GridToStringInclude;
+import org.apache.ignite.internal.util.typedef.internal.S;
+
+/**
+ * Abstract superclass for {@link FailureHandler} implementations.
+ * Maintains a set of ignored failure types.
+ */
+public abstract class AbstractFailureHandler implements FailureHandler {
+    /** */
+    @GridToStringInclude
+    private Set<FailureType> ignoredFailureTypes = Collections.emptySet();
+
+    /** {@inheritDoc} */
+    @Override public void setIgnoredFailureTypes(Set<FailureType> 
failureTypes) {
+        ignoredFailureTypes = Collections.unmodifiableSet(failureTypes);
+    }
+
+    /**
+     * Returns unmodifiable set of ignored failure types.
+     */
+    public Set<FailureType> getIgnoredFailureTypes() {
+        return ignoredFailureTypes;
+    }
+
+    /** {@inheritDoc} */
+    public boolean onFailure(Ignite ignite, FailureContext failureCtx) {
+        return !ignoredFailureTypes.contains(failureCtx.type()) && 
handle(ignite, failureCtx);
+    }
+
+    /**
+     * Actual failure handling. This method is not called for ignored failure 
types.
+     *
+     * @see #setIgnoredFailureTypes(Set).
+     * @see FailureHandler#onFailure(Ignite, FailureContext).
+     */
+    protected abstract boolean handle(Ignite ignite, FailureContext 
failureCtx);
+
+    /** {@inheritDoc} */
+    @Override public String toString() {
+        return S.toString(AbstractFailureHandler.class, this);
+    }
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/failure/FailureHandler.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/failure/FailureHandler.java 
b/modules/core/src/main/java/org/apache/ignite/failure/FailureHandler.java
index 8717b16..f325e65 100644
--- a/modules/core/src/main/java/org/apache/ignite/failure/FailureHandler.java
+++ b/modules/core/src/main/java/org/apache/ignite/failure/FailureHandler.java
@@ -17,6 +17,7 @@
 
 package org.apache.ignite.failure;
 
+import java.util.Set;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.configuration.IgniteConfiguration;
 import org.apache.ignite.internal.processors.failure.FailureProcessor;
@@ -36,4 +37,9 @@ public interface FailureHandler {
      * @return Whether kernal context must be invalidated or not.
      */
     public boolean onFailure(Ignite ignite, FailureContext failureCtx);
+
+    /**
+     * Sets failure types to ignore.
+     */
+    public void setIgnoredFailureTypes(Set<FailureType> failureTypes);
 }

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/failure/FailureType.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/failure/FailureType.java 
b/modules/core/src/main/java/org/apache/ignite/failure/FailureType.java
index d933420..fbd5529f 100644
--- a/modules/core/src/main/java/org/apache/ignite/failure/FailureType.java
+++ b/modules/core/src/main/java/org/apache/ignite/failure/FailureType.java
@@ -27,6 +27,9 @@ public enum FailureType {
     /** System worker termination. */
     SYSTEM_WORKER_TERMINATION,
 
+    /** System worker has not updated its heartbeat for a long time. */
+    SYSTEM_WORKER_BLOCKED,
+
     /** Critical error - error which leads to the system's inoperability. */
     CRITICAL_ERROR
 }

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/failure/NoOpFailureHandler.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/failure/NoOpFailureHandler.java 
b/modules/core/src/main/java/org/apache/ignite/failure/NoOpFailureHandler.java
index b998d62..67c258f 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/failure/NoOpFailureHandler.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/failure/NoOpFailureHandler.java
@@ -23,14 +23,14 @@ import org.apache.ignite.internal.util.typedef.internal.S;
 /**
  * Just ignores any failure. It's useful for tests and debugging.
  */
-public class NoOpFailureHandler implements FailureHandler {
+public class NoOpFailureHandler extends AbstractFailureHandler {
     /** {@inheritDoc} */
-    @Override public boolean onFailure(Ignite ignite, FailureContext 
failureCtx) {
+    @Override protected boolean handle(Ignite ignite, FailureContext 
failureCtx) {
         return false;
     }
 
     /** {@inheritDoc} */
     @Override public String toString() {
-        return S.toString(NoOpFailureHandler.class, this);
+        return S.toString(NoOpFailureHandler.class, this, "super", 
super.toString());
     }
 }

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/failure/RestartProcessFailureHandler.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/failure/RestartProcessFailureHandler.java
 
b/modules/core/src/main/java/org/apache/ignite/failure/RestartProcessFailureHandler.java
index fe65b28..299b8af 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/failure/RestartProcessFailureHandler.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/failure/RestartProcessFailureHandler.java
@@ -27,9 +27,9 @@ import org.apache.ignite.internal.util.typedef.internal.U;
  * This handler could be used only with ignite.(sh|bat) script.
  * Process will be terminated using {@link Ignition#restart(boolean)} call.
  */
-public class RestartProcessFailureHandler implements FailureHandler {
+public class RestartProcessFailureHandler extends AbstractFailureHandler {
     /** {@inheritDoc} */
-    @Override public boolean onFailure(Ignite ignite, FailureContext 
failureCtx) {
+    @Override protected boolean handle(Ignite ignite, FailureContext 
failureCtx) {
         new Thread(
             new Runnable() {
                 @Override public void run() {
@@ -46,6 +46,6 @@ public class RestartProcessFailureHandler implements 
FailureHandler {
 
     /** {@inheritDoc} */
     @Override public String toString() {
-        return S.toString(RestartProcessFailureHandler.class, this);
+        return S.toString(RestartProcessFailureHandler.class, this, "super", 
super.toString());
     }
 }

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/failure/StopNodeFailureHandler.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/failure/StopNodeFailureHandler.java
 
b/modules/core/src/main/java/org/apache/ignite/failure/StopNodeFailureHandler.java
index 861d220..4721c50 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/failure/StopNodeFailureHandler.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/failure/StopNodeFailureHandler.java
@@ -25,9 +25,9 @@ import org.apache.ignite.internal.util.typedef.internal.U;
 /**
  * Handler will stop node in case of critical error using {@code 
IgnitionEx.stop(nodeName, true, true)} call.
  */
-public class StopNodeFailureHandler implements FailureHandler {
+public class StopNodeFailureHandler extends AbstractFailureHandler {
     /** {@inheritDoc} */
-    @Override public boolean onFailure(Ignite ignite, FailureContext 
failureCtx) {
+    @Override public boolean handle(Ignite ignite, FailureContext failureCtx) {
         new Thread(
             new Runnable() {
                 @Override public void run() {
@@ -44,6 +44,6 @@ public class StopNodeFailureHandler implements FailureHandler 
{
 
     /** {@inheritDoc} */
     @Override public String toString() {
-        return S.toString(StopNodeFailureHandler.class, this);
+        return S.toString(StopNodeFailureHandler.class, this, "super", 
super.toString());
     }
 }

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/failure/StopNodeOrHaltFailureHandler.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/failure/StopNodeOrHaltFailureHandler.java
 
b/modules/core/src/main/java/org/apache/ignite/failure/StopNodeOrHaltFailureHandler.java
index 24c8b17..ff88609 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/failure/StopNodeOrHaltFailureHandler.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/failure/StopNodeOrHaltFailureHandler.java
@@ -31,7 +31,7 @@ import org.apache.ignite.internal.util.typedef.internal.U;
  * If node can't be stopped during provided {@code timeout} or {@code tryStop} 
value is {@code false}
  * then JVM process will be terminated forcibly using {@code 
Runtime.getRuntime().halt()}.
  */
-public class StopNodeOrHaltFailureHandler implements FailureHandler {
+public class StopNodeOrHaltFailureHandler extends AbstractFailureHandler {
     /** Try stop. */
     private final boolean tryStop;
 
@@ -55,7 +55,7 @@ public class StopNodeOrHaltFailureHandler implements 
FailureHandler {
     }
 
     /** {@inheritDoc} */
-    @Override public boolean onFailure(Ignite ignite, FailureContext 
failureCtx) {
+    @Override protected boolean handle(Ignite ignite, FailureContext 
failureCtx) {
         IgniteLogger log = ignite.log();
 
         if (tryStop) {
@@ -121,6 +121,6 @@ public class StopNodeOrHaltFailureHandler implements 
FailureHandler {
 
     /** {@inheritDoc} */
     @Override public String toString() {
-        return S.toString(StopNodeOrHaltFailureHandler.class, this);
+        return S.toString(StopNodeOrHaltFailureHandler.class, this, "super", 
super.toString());
     }
 }

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/IgnitionEx.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/IgnitionEx.java 
b/modules/core/src/main/java/org/apache/ignite/internal/IgnitionEx.java
index 148bd21..8bef477 100644
--- a/modules/core/src/main/java/org/apache/ignite/internal/IgnitionEx.java
+++ b/modules/core/src/main/java/org/apache/ignite/internal/IgnitionEx.java
@@ -92,8 +92,11 @@ import org.apache.ignite.internal.util.typedef.T2;
 import org.apache.ignite.internal.util.typedef.X;
 import org.apache.ignite.internal.util.typedef.internal.A;
 import org.apache.ignite.internal.util.typedef.internal.CU;
+import org.apache.ignite.internal.util.typedef.internal.S;
 import org.apache.ignite.internal.util.typedef.internal.U;
+import org.apache.ignite.internal.util.worker.GridWorker;
 import org.apache.ignite.internal.worker.WorkersRegistry;
+import org.apache.ignite.lang.IgniteBiInClosure;
 import org.apache.ignite.lang.IgniteBiTuple;
 import org.apache.ignite.lang.IgniteInClosure;
 import org.apache.ignite.logger.LoggerNodeIdAware;
@@ -1817,7 +1820,17 @@ public class IgnitionEx {
 
             validateThreadPoolSize(cfg.getStripedPoolSize(), "stripedPool");
 
-            WorkersRegistry workerRegistry = new WorkersRegistry();
+            WorkersRegistry workerRegistry = new WorkersRegistry(
+                new IgniteBiInClosure<GridWorker, FailureType>() {
+                    @Override public void apply(GridWorker deadWorker, 
FailureType failureType) {
+                        if (grid != null)
+                            grid.context().failure().process(new 
FailureContext(
+                                failureType,
+                                new 
IgniteException(S.toString(GridWorker.class, deadWorker))));
+                    }
+                },
+                cfg.getFailureDetectionTimeout(),
+                log);
 
             stripedExecSvc = new StripedExecutor(
                 cfg.getStripedPoolSize(),
@@ -1888,7 +1901,7 @@ public class IgnitionEx {
                 cfg.getIgfsThreadPoolSize(),
                 cfg.getIgfsThreadPoolSize(),
                 DFLT_THREAD_KEEP_ALIVE_TIME,
-                new LinkedBlockingQueue<Runnable>(),
+                new LinkedBlockingQueue<>(),
                 new IgfsThreadFactory(cfg.getIgniteInstanceName(), "igfs"));
 
             igfsExecSvc.allowCoreThreadTimeOut(true);
@@ -1911,7 +1924,7 @@ public class IgnitionEx {
                     myCfg.getConnectorConfiguration().getThreadPoolSize(),
                     myCfg.getConnectorConfiguration().getThreadPoolSize(),
                     DFLT_THREAD_KEEP_ALIVE_TIME,
-                    new LinkedBlockingQueue<Runnable>(),
+                    new LinkedBlockingQueue<>(),
                     GridIoPolicy.UNDEFINED,
                     oomeHnd
                 );
@@ -1927,7 +1940,7 @@ public class IgnitionEx {
                 myCfg.getUtilityCacheThreadPoolSize(),
                 myCfg.getUtilityCacheThreadPoolSize(),
                 myCfg.getUtilityCacheKeepAliveTime(),
-                new LinkedBlockingQueue<Runnable>(),
+                new LinkedBlockingQueue<>(),
                 GridIoPolicy.UTILITY_CACHE_POOL,
                 oomeHnd);
 
@@ -1939,7 +1952,7 @@ public class IgnitionEx {
                 1,
                 1,
                 DFLT_THREAD_KEEP_ALIVE_TIME,
-                new LinkedBlockingQueue<Runnable>(),
+                new LinkedBlockingQueue<>(),
                 GridIoPolicy.AFFINITY_POOL,
                 oomeHnd);
 
@@ -1954,7 +1967,7 @@ public class IgnitionEx {
                     cpus,
                     cpus * 2,
                     3000L,
-                    new LinkedBlockingQueue<Runnable>(1000),
+                    new LinkedBlockingQueue<>(1000),
                     GridIoPolicy.IDX_POOL,
                     oomeHnd
                 );
@@ -1968,7 +1981,7 @@ public class IgnitionEx {
                 cfg.getQueryThreadPoolSize(),
                 cfg.getQueryThreadPoolSize(),
                 DFLT_THREAD_KEEP_ALIVE_TIME,
-                new LinkedBlockingQueue<Runnable>(),
+                new LinkedBlockingQueue<>(),
                 GridIoPolicy.QUERY_POOL,
                 oomeHnd);
 
@@ -1980,7 +1993,7 @@ public class IgnitionEx {
                 2,
                 2,
                 DFLT_THREAD_KEEP_ALIVE_TIME,
-                new LinkedBlockingQueue<Runnable>(),
+                new LinkedBlockingQueue<>(),
                 GridIoPolicy.SCHEMA_POOL,
                 oomeHnd);
 
@@ -1998,7 +2011,7 @@ public class IgnitionEx {
                         execCfg.getSize(),
                         execCfg.getSize(),
                         DFLT_THREAD_KEEP_ALIVE_TIME,
-                        new LinkedBlockingQueue<Runnable>(),
+                        new LinkedBlockingQueue<>(),
                         GridIoPolicy.UNDEFINED,
                         oomeHnd);
 

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/managers/discovery/GridDiscoveryManager.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/managers/discovery/GridDiscoveryManager.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/managers/discovery/GridDiscoveryManager.java
index b9af961..5ce4cb6 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/managers/discovery/GridDiscoveryManager.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/managers/discovery/GridDiscoveryManager.java
@@ -223,7 +223,7 @@ public class GridDiscoveryManager extends 
GridManagerAdapter<DiscoverySpi> {
     private final DiscoveryWorker discoWrk = new DiscoveryWorker();
 
     /** Discovery event notyfier worker. */
-    private final DiscoveryMessageNotifyerWorker discoNotifierWrk = new 
DiscoveryMessageNotifyerWorker();
+    private final DiscoveryMessageNotifierWorker discoNtfWrk = new 
DiscoveryMessageNotifierWorker();
 
     /** Network segment check worker. */
     private SegmentCheckWorker segChkWrk;
@@ -597,7 +597,7 @@ public class GridDiscoveryManager extends 
GridManagerAdapter<DiscoverySpi> {
             ) {
                 GridFutureAdapter notificationFut = new GridFutureAdapter();
 
-                discoNotifierWrk.submit(notificationFut, () -> {
+                discoNtfWrk.submit(notificationFut, () -> {
                     synchronized (discoEvtMux) {
                         onDiscovery0(type, topVer, node, topSnapshot, 
snapshots, spiCustomMsg);
                     }
@@ -930,7 +930,7 @@ public class GridDiscoveryManager extends 
GridManagerAdapter<DiscoverySpi> {
             }
         });
 
-        new IgniteThread(discoNotifierWrk).start();
+        new IgniteThread(discoNtfWrk).start();
 
         startSpi();
 
@@ -1698,9 +1698,9 @@ public class GridDiscoveryManager extends 
GridManagerAdapter<DiscoverySpi> {
 
         U.join(discoWrk, log);
 
-        U.cancel(discoNotifierWrk);
+        U.cancel(discoNtfWrk);
 
-        U.join(discoNotifierWrk, log);
+        U.join(discoNtfWrk, log);
 
         // Stop SPI itself.
         stopSpi();
@@ -2671,22 +2671,31 @@ public class GridDiscoveryManager extends 
GridManagerAdapter<DiscoverySpi> {
     /**
      *
      */
-    private class DiscoveryMessageNotifyerWorker extends GridWorker {
+    private class DiscoveryMessageNotifierWorker extends GridWorker {
         /** Queue. */
         private final BlockingQueue<T2<GridFutureAdapter, Runnable>> queue = 
new LinkedBlockingQueue<>();
 
         /**
          * Default constructor.
          */
-        protected DiscoveryMessageNotifyerWorker() {
-            super(ctx.igniteInstanceName(), "disco-notyfier-worker", 
GridDiscoveryManager.this.log, ctx.workersRegistry());
+        protected DiscoveryMessageNotifierWorker() {
+            super(ctx.igniteInstanceName(), "disco-notifier-worker", 
GridDiscoveryManager.this.log, ctx.workersRegistry());
         }
 
         /**
          *
          */
         private void body0() throws InterruptedException {
-            T2<GridFutureAdapter, Runnable> notification = queue.take();
+            T2<GridFutureAdapter, Runnable> notification;
+
+            blockingSectionBegin();
+
+            try {
+                notification = queue.take();
+            }
+            finally {
+                blockingSectionEnd();
+            }
 
             try {
                 notification.get2().run();
@@ -2848,6 +2857,8 @@ public class GridDiscoveryManager extends 
GridManagerAdapter<DiscoverySpi> {
             while (!isCancelled()) {
                 try {
                     body0();
+
+                    onIdle();
                 }
                 catch (InterruptedException e) {
                     if (!isCancelled)
@@ -2871,7 +2882,16 @@ public class GridDiscoveryManager extends 
GridManagerAdapter<DiscoverySpi> {
         @SuppressWarnings("DuplicateCondition")
         private void body0() throws InterruptedException {
             GridTuple6<Integer, AffinityTopologyVersion, ClusterNode, 
DiscoCache, Collection<ClusterNode>,
-                DiscoveryCustomMessage> evt = evts.take();
+                DiscoveryCustomMessage> evt;
+
+            blockingSectionBegin();
+
+            try {
+                evt = evts.take();
+            }
+            finally {
+                blockingSectionEnd();
+            }
 
             int type = evt.get1();
 

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCachePartitionExchangeManager.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCachePartitionExchangeManager.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCachePartitionExchangeManager.java
index ca2d1c8..acfedf8 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCachePartitionExchangeManager.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCachePartitionExchangeManager.java
@@ -2230,6 +2230,25 @@ public class GridCachePartitionExchangeManager<K, V> 
extends GridCacheSharedMana
     }
 
     /**
+     * Invokes {@link GridWorker#blockingSectionBegin()} for exchange worker.
+     * Should be called from exchange worker thread.
+     */
+    public void exchangerBlockingSectionBegin() {
+        assert exchWorker != null && Thread.currentThread() == 
exchWorker.runner();
+
+        exchWorker.blockingSectionBegin();
+    }
+
+    /**
+     * Invokes {@link GridWorker#blockingSectionEnd()} for exchange worker.
+     * Should be called from exchange worker thread.
+     */
+    public void exchangerBlockingSectionEnd() {
+        assert exchWorker != null && Thread.currentThread() == 
exchWorker.runner();
+
+        exchWorker.blockingSectionEnd();
+    }
+    /**
      * Exchange future thread. All exchanges happen only by one thread and next
      * exchange will not start until previous one completes.
      */
@@ -2483,6 +2502,8 @@ public class GridCachePartitionExchangeManager<K, V> 
extends GridCacheSharedMana
             long cnt = 0;
 
             while (!isCancelled()) {
+                onIdle();
+
                 cnt++;
 
                 CachePartitionExchangeWorkerTask task = null;
@@ -2520,8 +2541,12 @@ public class GridCachePartitionExchangeManager<K, V> 
extends GridCacheSharedMana
                     if (isCancelled())
                         Thread.currentThread().interrupt();
 
+                    updateHeartbeat();
+
                     task = futQ.poll(timeout, MILLISECONDS);
 
+                    updateHeartbeat();
+
                     if (task == null)
                         continue; // Main while loop.
 
@@ -2547,9 +2572,8 @@ public class GridCachePartitionExchangeManager<K, V> 
extends GridCacheSharedMana
                         if (isCancelled())
                             break;
 
-                        if (task instanceof RebalanceReassignExchangeTask) {
+                        if (task instanceof RebalanceReassignExchangeTask)
                             exchId = ((RebalanceReassignExchangeTask) 
task).exchangeId();
-                        }
                         else if (task instanceof ForceRebalanceExchangeTask) {
                             forcePreload = true;
 
@@ -2595,12 +2619,25 @@ public class GridCachePartitionExchangeManager<K, V> 
extends GridCacheSharedMana
                                 long curTimeout = 
cfg.getTransactionConfiguration().getTxTimeoutOnPartitionMapExchange();
 
                                 try {
-                                    resVer = exchFut.get(curTimeout > 0 && 
!txRolledBack ?
-                                            Math.min(curTimeout, dumpTimeout) 
: dumpTimeout, TimeUnit.MILLISECONDS);
+                                    long exchTimeout = curTimeout > 0 && 
!txRolledBack
+                                        ? Math.min(curTimeout, dumpTimeout)
+                                        : dumpTimeout;
+
+                                    blockingSectionEnd();
+
+                                    try {
+                                        resVer = exchFut.get(exchTimeout, 
TimeUnit.MILLISECONDS);
+                                    } finally {
+                                        blockingSectionEnd();
+                                    }
+
+                                    onIdle();
 
                                     break;
                                 }
                                 catch (IgniteFutureTimeoutCheckedException 
ignored) {
+                                    updateHeartbeat();
+
                                     if (nextDumpTime <= U.currentTimeMillis()) 
{
                                         U.warn(diagnosticLog, "Failed to wait 
for partition map exchange [" +
                                             "topVer=" + 
exchFut.initialVersion() +

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheSharedTtlCleanupManager.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheSharedTtlCleanupManager.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheSharedTtlCleanupManager.java
index 7adabc3..7a54354 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheSharedTtlCleanupManager.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/GridCacheSharedTtlCleanupManager.java
@@ -134,6 +134,8 @@ public class GridCacheSharedTtlCleanupManager extends 
GridCacheSharedManagerAdap
                     boolean expiredRemains = false;
 
                     for (GridCacheTtlManager mgr : mgrs) {
+                        updateHeartbeat();
+
                         if (mgr.expire(CLEANUP_WORKER_ENTRIES_PROCESS_LIMIT))
                             expiredRemains = true;
 
@@ -141,8 +143,12 @@ public class GridCacheSharedTtlCleanupManager extends 
GridCacheSharedManagerAdap
                             return;
                     }
 
+                    updateHeartbeat();
+
                     if (!expiredRemains)
                         U.sleep(CLEANUP_WORKER_SLEEP_INTERVAL);
+
+                    onIdle();
                 }
             }
             catch (Throwable t) {

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionsExchangeFuture.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionsExchangeFuture.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionsExchangeFuture.java
index 265c48d..f3e8fde 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionsExchangeFuture.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/distributed/dht/preloader/GridDhtPartitionsExchangeFuture.java
@@ -1474,7 +1474,14 @@ public class GridDhtPartitionsExchangeFuture extends 
GridDhtTopologyFutureAdapte
             try {
                 while (true) {
                     try {
-                        releaseLatch.await(waitTimeout, TimeUnit.MILLISECONDS);
+                        cctx.exchange().exchangerBlockingSectionBegin();
+
+                        try {
+                            releaseLatch.await(waitTimeout, 
TimeUnit.MILLISECONDS);
+                        }
+                        finally {
+                            cctx.exchange().exchangerBlockingSectionEnd();
+                        }
 
                         if (log.isInfoEnabled())
                             log.info("Finished waiting for partitions release 
latch: " + releaseLatch);

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
index 529b52e..d74723e 100755
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/GridCacheDatabaseSharedManager.java
@@ -74,6 +74,7 @@ import org.apache.ignite.events.EventType;
 import org.apache.ignite.failure.FailureContext;
 import org.apache.ignite.failure.FailureType;
 import org.apache.ignite.internal.GridKernalContext;
+import org.apache.ignite.internal.IgniteFutureTimeoutCheckedException;
 import org.apache.ignite.internal.IgniteInternalFuture;
 import org.apache.ignite.internal.IgniteInterruptedCheckedException;
 import org.apache.ignite.internal.NodeStoppingException;
@@ -1456,33 +1457,73 @@ public class GridCacheDatabaseSharedManager extends 
IgniteCacheDatabaseSharedMan
         if (checkpointLock.writeLock().isHeldByCurrentThread())
             return;
 
-        for (; ; ) {
-            checkpointLock.readLock().lock();
+        long timeout = cctx.gridConfig().getFailureDetectionTimeout();
 
-            if (stopping) {
-                checkpointLock.readLock().unlock();
+        long start = U.currentTimeMillis();
+        long passed;
 
-                throw new IgniteException(new NodeStoppingException("Failed to 
perform cache update: node is stopping."));
-            }
+        boolean interruped = false;
 
-            if (checkpointLock.getReadHoldCount() > 1 || 
safeToUpdatePageMemories())
-                break;
-            else {
-                checkpointLock.readLock().unlock();
+        try {
+            for (; ; ) {
+                if ((passed = U.currentTimeMillis() - start) >= timeout)
+                    failCheckpointReadLock();
 
                 try {
-                    checkpointer.wakeupForCheckpoint(0, "too many dirty 
pages").cpBeginFut.getUninterruptibly();
+                    if (!checkpointLock.readLock().tryLock(timeout - passed, 
TimeUnit.MILLISECONDS))
+                        failCheckpointReadLock();
                 }
-                catch (IgniteCheckedException e) {
-                    throw new IgniteException("Failed to wait for checkpoint 
begin.", e);
+                catch (InterruptedException e) {
+                    interruped = true;
+
+                    continue;
+                }
+
+                if (stopping) {
+                    checkpointLock.readLock().unlock();
+
+                    throw new IgniteException(new 
NodeStoppingException("Failed to perform cache update: node is stopping."));
+                }
+
+                if (checkpointLock.getReadHoldCount() > 1 || 
safeToUpdatePageMemories())
+                    break;
+                else {
+                    checkpointLock.readLock().unlock();
+
+                    if (U.currentTimeMillis() - start >= timeout)
+                        failCheckpointReadLock();
+
+                    try {
+                        checkpointer.wakeupForCheckpoint(0, "too many dirty 
pages").cpBeginFut
+                            .getUninterruptibly();
+                    }
+                    catch (IgniteFutureTimeoutCheckedException e) {
+                        failCheckpointReadLock();
+                    }
+                    catch (IgniteCheckedException e) {
+                        throw new IgniteException("Failed to wait for 
checkpoint begin.", e);
+                    }
                 }
             }
         }
+        finally {
+            if (interruped)
+                Thread.currentThread().interrupt();
+        }
 
         if (ASSERTION_ENABLED)
             CHECKPOINT_LOCK_HOLD_COUNT.set(CHECKPOINT_LOCK_HOLD_COUNT.get() + 
1);
     }
 
+    /** */
+    private void failCheckpointReadLock() throws IgniteException {
+        IgniteException e = new IgniteException("Checkpoint read lock 
acquisition has been timed out.");
+
+        cctx.kernalContext().failure().process(new 
FailureContext(CRITICAL_ERROR, e));
+
+        throw e;
+    }
+
     /** {@inheritDoc} */
     @Override public boolean checkpointLockIsHeldByThread() {
         return !ASSERTION_ENABLED ||
@@ -3094,6 +3135,8 @@ public class GridCacheDatabaseSharedManager extends 
IgniteCacheDatabaseSharedMan
                     return;
                 }
 
+                updateHeartbeat();
+
                 currCheckpointPagesCnt = chp.pagesSize;
 
                 writtenPagesCntr = new AtomicInteger();
@@ -3124,6 +3167,11 @@ public class GridCacheDatabaseSharedManager extends 
IgniteCacheDatabaseSharedMan
                                     updStores,
                                     doneWriteFut,
                                     totalPagesToWriteCnt,
+                                    new Runnable() {
+                                        @Override public void run() {
+                                            updateHeartbeat();
+                                        }
+                                    },
                                     asyncRunner
                                 );
 
@@ -3132,23 +3180,34 @@ public class GridCacheDatabaseSharedManager extends 
IgniteCacheDatabaseSharedMan
                                 }
                                 catch (RejectedExecutionException ignore) {
                                     // Run the task synchronously.
+                                    updateHeartbeat();
+
                                     write.run();
                                 }
                             }
                         }
                         else {
                             // Single-threaded checkpoint.
+                            updateHeartbeat();
+
                             Runnable write = new WriteCheckpointPages(
                                 tracker,
                                 chp.cpPages,
                                 updStores,
                                 doneWriteFut,
                                 totalPagesToWriteCnt,
+                                new Runnable() {
+                                    @Override public void run() {
+                                        updateHeartbeat();
+                                    }
+                                },
                                 null);
 
                             write.run();
                         }
 
+                        updateHeartbeat();
+
                         // Wait and check for errors.
                         doneWriteFut.get();
 
@@ -3170,7 +3229,14 @@ public class GridCacheDatabaseSharedManager extends 
IgniteCacheDatabaseSharedMan
                                     return;
                                 }
 
-                                updStoreEntry.getKey().sync();
+                                blockingSectionBegin();
+
+                                try {
+                                    updStoreEntry.getKey().sync();
+                                }
+                                finally {
+                                    blockingSectionEnd();
+                                }
 
                                 
syncedPagesCntr.addAndGet(updStoreEntry.getValue().intValue());
                             }
@@ -3390,15 +3456,18 @@ public class GridCacheDatabaseSharedManager extends 
IgniteCacheDatabaseSharedMan
             boolean cancel = false;
 
             try {
-                long now = U.currentTimeMillis();
-
                 synchronized (this) {
                     long remaining;
 
-                    while ((remaining = scheduledCp.nextCpTs - now) > 0 && 
!isCancelled()) {
-                        wait(remaining);
+                    while ((remaining = scheduledCp.nextCpTs - 
U.currentTimeMillis()) > 0 && !isCancelled()) {
+                        blockingSectionBegin();
 
-                        now = U.currentTimeMillis();
+                        try {
+                            wait(remaining);
+                        }
+                        finally {
+                            blockingSectionEnd();
+                        }
                     }
                 }
             }
@@ -3783,15 +3852,21 @@ public class GridCacheDatabaseSharedManager extends 
IgniteCacheDatabaseSharedMan
         /** Total pages to write, counter may be greater than {@link 
#writePageIds} size */
         private final int totalPagesToWrite;
 
+        /** */
+        private final Runnable beforePageWrite;
+
         /** If any pages were skipped, new task with remaining pages will be 
submitted here. */
         private final ExecutorService retryWriteExecutor;
 
         /**
-         * @param tracker Tracker.
-         * @param writePageIds Write page ids.
-         * @param updStores Upd stores.
-         * @param doneFut Done future.
-         * @param totalPagesToWrite Total pages to write.
+         * Creates task for write pages
+         *
+         * @param tracker
+         * @param writePageIds Collection of page IDs to write.
+         * @param updStores
+         * @param doneFut
+         * @param totalPagesToWrite total pages to be written under this 
checkpoint
+         * @param beforePageWrite Action to be performed before every page 
write.
          * @param retryWriteExecutor Retry write executor.
          */
         private WriteCheckpointPages(
@@ -3800,6 +3875,7 @@ public class GridCacheDatabaseSharedManager extends 
IgniteCacheDatabaseSharedMan
             final ConcurrentLinkedHashMap<PageStore, LongAdder> updStores,
             final CountDownFuture doneFut,
             final int totalPagesToWrite,
+            final Runnable beforePageWrite,
             final ExecutorService retryWriteExecutor
         ) {
             this.tracker = tracker;
@@ -3807,6 +3883,7 @@ public class GridCacheDatabaseSharedManager extends 
IgniteCacheDatabaseSharedMan
             this.updStores = updStores;
             this.doneFut = doneFut;
             this.totalPagesToWrite = totalPagesToWrite;
+            this.beforePageWrite = beforePageWrite;
             this.retryWriteExecutor = retryWriteExecutor;
         }
 
@@ -3831,7 +3908,13 @@ public class GridCacheDatabaseSharedManager extends 
IgniteCacheDatabaseSharedMan
                     else {
                         // Submit current retry pages to the end of the queue 
to avoid starvation.
                         WriteCheckpointPages retryWritesTask = new 
WriteCheckpointPages(
-                            tracker, pagesToRetry, updStores, doneFut, 
totalPagesToWrite, retryWriteExecutor);
+                            tracker,
+                            pagesToRetry,
+                            updStores,
+                            doneFut,
+                            totalPagesToWrite,
+                            beforePageWrite,
+                            retryWriteExecutor);
 
                         retryWriteExecutor.submit(retryWritesTask);
                     }
@@ -3859,6 +3942,8 @@ public class GridCacheDatabaseSharedManager extends 
IgniteCacheDatabaseSharedMan
 
                 tmpWriteBuf.rewind();
 
+                beforePageWrite.run();
+
                 snapshotMgr.beforePageWrite(fullId);
 
                 int grpId = fullId.groupId();

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/file/FilePageStore.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/file/FilePageStore.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/file/FilePageStore.java
index d4d4716..4bb7513 100755
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/file/FilePageStore.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/file/FilePageStore.java
@@ -648,7 +648,7 @@ public class FilePageStore implements PageStore {
                 fileIO.force();
         }
         catch (IOException e) {
-            throw new StorageException("Failed to fsync partition file [file=" 
+ cfgFile.getAbsolutePath() + "]", e);
+            throw new StorageException("Failed to fsync partition file [file=" 
+ cfgFile.getAbsolutePath() + ']', e);
         }
         finally {
             lock.writeLock().unlock();

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/FileWriteAheadLogManager.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/FileWriteAheadLogManager.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/FileWriteAheadLogManager.java
index 907a311..f53c02f 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/FileWriteAheadLogManager.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/FileWriteAheadLogManager.java
@@ -1699,6 +1699,8 @@ public class FileWriteAheadLogManager extends 
GridCacheSharedManagerAdapter impl
 
         /** {@inheritDoc} */
         @Override protected void body() {
+            blockingSectionBegin();
+
             try {
                 allocateRemainingFiles();
             }
@@ -1714,13 +1716,24 @@ public class FileWriteAheadLogManager extends 
GridCacheSharedManagerAdapter impl
 
                 return;
             }
+            finally {
+                blockingSectionEnd();
+            }
 
             Throwable err = null;
 
             try {
                 synchronized (this) {
-                    while (curAbsWalIdx == -1 && !stopped)
-                        wait();
+                    while (curAbsWalIdx == -1 && !stopped) {
+                        blockingSectionBegin();
+
+                        try {
+                            wait();
+                        }
+                        finally {
+                            blockingSectionEnd();
+                        }
+                    }
 
                     // If the archive directory is empty, we can be sure that 
there were no WAL segments archived.
                     // This is ensured by the check in truncate() which will 
leave at least one file there
@@ -1734,8 +1747,16 @@ public class FileWriteAheadLogManager extends 
GridCacheSharedManagerAdapter impl
                         assert lastAbsArchivedIdx <= curAbsWalIdx : 
"lastArchived=" + lastAbsArchivedIdx +
                             ", current=" + curAbsWalIdx;
 
-                        while (lastAbsArchivedIdx >= curAbsWalIdx - 1 && 
!stopped)
-                            wait();
+                        while (lastAbsArchivedIdx >= curAbsWalIdx - 1 && 
!stopped) {
+                            blockingSectionBegin();
+
+                            try {
+                                wait();
+                            }
+                            finally {
+                                blockingSectionEnd();
+                            }
+                        }
 
                         toArchive = lastAbsArchivedIdx + 1;
                     }
@@ -1743,11 +1764,28 @@ public class FileWriteAheadLogManager extends 
GridCacheSharedManagerAdapter impl
                     if (stopped)
                         break;
 
-                    final SegmentArchiveResult res = archiveSegment(toArchive);
+                    SegmentArchiveResult res;
+
+                    blockingSectionBegin();
+
+                    try {
+                        res = archiveSegment(toArchive);
+                    }
+                    finally {
+                        blockingSectionEnd();
+                    }
 
                     synchronized (this) {
-                        while (locked.containsKey(toArchive) && !stopped)
-                            wait();
+                        while (locked.containsKey(toArchive) && !stopped) {
+                            blockingSectionBegin();
+
+                            try {
+                                wait();
+                            }
+                            finally {
+                                blockingSectionEnd();
+                            }
+                        }
 
                         // Then increase counter to allow rollover on clean 
working file
                         changeLastArchivedIndexAndNotifyWaiters(toArchive);
@@ -1762,6 +1800,8 @@ public class FileWriteAheadLogManager extends 
GridCacheSharedManagerAdapter impl
                                 res.getDstArchiveFile())
                         );
                     }
+
+                    onIdle();
                 }
             }
             catch (InterruptedException t) {
@@ -2263,7 +2303,14 @@ public class FileWriteAheadLogManager extends 
GridCacheSharedManagerAdapter impl
                     long segmentToDecompress = -1L;
 
                     try {
-                        segmentToDecompress = segmentsQueue.take();
+                        blockingSectionBegin();
+
+                        try {
+                            segmentToDecompress = segmentsQueue.take();
+                        }
+                        finally {
+                            blockingSectionEnd();
+                        }
 
                         if (isCancelled())
                             break;
@@ -2279,7 +2326,7 @@ public class FileWriteAheadLogManager extends 
GridCacheSharedManagerAdapter impl
                             zis.getNextEntry();
 
                             while (io.writeFully(arr, 0, zis.read(arr)) > 0)
-                                ;
+                                updateHeartbeat();
                         }
 
                         try {
@@ -2293,6 +2340,8 @@ public class FileWriteAheadLogManager extends 
GridCacheSharedManagerAdapter impl
                                 U.error(log, "Can't delete temporary unzipped 
segment [tmp=" + unzipTmp + "]");
                         }
 
+                        updateHeartbeat();
+
                         synchronized (this) {
                             
decompressionFutures.remove(segmentToDecompress).onDone();
                         }
@@ -3320,11 +3369,23 @@ public class FileWriteAheadLogManager extends 
GridCacheSharedManagerAdapter impl
 
         /** {@inheritDoc} */
         @Override protected void body() {
+            Throwable err = null;
+
             try {
                 while (!isCancelled()) {
+                    onIdle();
+
                     while (waiters.isEmpty()) {
-                        if (!isCancelled())
-                            LockSupport.park();
+                        if (!isCancelled()) {
+                            blockingSectionBegin();
+
+                            try {
+                                LockSupport.park();
+                            }
+                            finally {
+                                blockingSectionEnd();
+                            }
+                        }
                         else {
                             unparkWaiters(Long.MAX_VALUE);
 
@@ -3339,6 +3400,8 @@ public class FileWriteAheadLogManager extends 
GridCacheSharedManagerAdapter impl
                             pos = val;
                     }
 
+                    updateHeartbeat();
+
                     if (pos == null)
                         continue;
                     else if (pos < UNCONDITIONAL_FLUSH) {
@@ -3363,6 +3426,8 @@ public class FileWriteAheadLogManager extends 
GridCacheSharedManagerAdapter impl
                         unparkWaiters(pos);
                     }
 
+                    updateHeartbeat();
+
                     List<SegmentedRingByteBuffer.ReadSegment> segs = 
currentHandle().buf.poll(pos);
 
                     if (segs == null) {
@@ -3374,6 +3439,8 @@ public class FileWriteAheadLogManager extends 
GridCacheSharedManagerAdapter impl
                     for (int i = 0; i < segs.size(); i++) {
                         SegmentedRingByteBuffer.ReadSegment seg = segs.get(i);
 
+                        updateHeartbeat();
+
                         try {
                             writeBuffer(seg.position(), seg.buffer());
                         }

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/FsyncModeFileWriteAheadLogManager.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/FsyncModeFileWriteAheadLogManager.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/FsyncModeFileWriteAheadLogManager.java
index 6a816a5..3d0b8b9 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/FsyncModeFileWriteAheadLogManager.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/wal/FsyncModeFileWriteAheadLogManager.java
@@ -1535,6 +1535,8 @@ public class FsyncModeFileWriteAheadLogManager extends 
GridCacheSharedManagerAda
 
         /** {@inheritDoc} */
         @Override protected void body() {
+            blockingSectionBegin();
+
             try {
                 allocateRemainingFiles();
             }
@@ -1550,13 +1552,24 @@ public class FsyncModeFileWriteAheadLogManager extends 
GridCacheSharedManagerAda
 
                 return;
             }
+            finally {
+                blockingSectionEnd();
+            }
 
             Throwable err = null;
 
             try {
                 synchronized (this) {
-                    while (curAbsWalIdx == -1 && !stopped)
-                        wait();
+                    while (curAbsWalIdx == -1 && !stopped) {
+                        blockingSectionBegin();
+
+                        try {
+                            wait();
+                        }
+                        finally {
+                            blockingSectionEnd();
+                        }
+                    }
 
                     // If the archive directory is empty, we can be sure that 
there were no WAL segments archived.
                     // This is ensured by the check in truncate() which will 
leave at least one file there
@@ -1570,8 +1583,16 @@ public class FsyncModeFileWriteAheadLogManager extends 
GridCacheSharedManagerAda
                         assert lastAbsArchivedIdx <= curAbsWalIdx : 
"lastArchived=" + lastAbsArchivedIdx +
                             ", current=" + curAbsWalIdx;
 
-                        while (lastAbsArchivedIdx >= curAbsWalIdx - 1 && 
!stopped)
-                            wait();
+                        while (lastAbsArchivedIdx >= curAbsWalIdx - 1 && 
!stopped) {
+                            blockingSectionBegin();
+
+                            try {
+                                wait();
+                            }
+                            finally {
+                                blockingSectionEnd();
+                            }
+                        }
 
                         toArchive = lastAbsArchivedIdx + 1;
                     }
@@ -1579,11 +1600,28 @@ public class FsyncModeFileWriteAheadLogManager extends 
GridCacheSharedManagerAda
                     if (stopped)
                         break;
 
-                    final SegmentArchiveResult res = archiveSegment(toArchive);
+                    SegmentArchiveResult res;
+
+                    blockingSectionBegin();
+
+                    try {
+                        res = archiveSegment(toArchive);
+                    }
+                    finally {
+                        blockingSectionEnd();
+                    }
 
                     synchronized (this) {
-                        while (locked.containsKey(toArchive) && !stopped)
-                            wait();
+                        while (locked.containsKey(toArchive) && !stopped) {
+                            blockingSectionBegin();
+
+                            try {
+                                wait();
+                            }
+                            finally {
+                                blockingSectionEnd();
+                            }
+                        }
 
                         changeLastArchivedIndexAndWakeupCompressor(toArchive);
 
@@ -1594,6 +1632,8 @@ public class FsyncModeFileWriteAheadLogManager extends 
GridCacheSharedManagerAda
                         evt.record(new 
WalSegmentArchivedEvent(cctx.discovery().localNode(),
                             res.getAbsIdx(), res.getDstArchiveFile()));
                     }
+
+                    onIdle();
                 }
             }
             catch (InterruptedException t) {
@@ -2073,7 +2113,14 @@ public class FsyncModeFileWriteAheadLogManager extends 
GridCacheSharedManagerAda
                     long segmentToDecompress = -1L;
 
                     try {
-                        segmentToDecompress = segmentsQueue.take();
+                        blockingSectionBegin();
+
+                        try {
+                            segmentToDecompress = segmentsQueue.take();
+                        }
+                        finally {
+                            blockingSectionEnd();
+                        }
 
                         if (isCancelled())
                             break;
@@ -2089,7 +2136,7 @@ public class FsyncModeFileWriteAheadLogManager extends 
GridCacheSharedManagerAda
                             zis.getNextEntry();
 
                             while (io.writeFully(arr, 0, zis.read(arr)) > 0)
-                                ;
+                                updateHeartbeat();
                         }
 
                         try {
@@ -2103,6 +2150,8 @@ public class FsyncModeFileWriteAheadLogManager extends 
GridCacheSharedManagerAda
                                 U.error(log, "Can't delete temporary unzipped 
segment [tmp=" + unzipTmp + ']');
                         }
 
+                        updateHeartbeat();
+
                         synchronized (this) {
                             
decompressionFutures.remove(segmentToDecompress).onDone();
                         }

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
index 722de18..b48eff1 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/failure/FailureProcessor.java
@@ -17,6 +17,7 @@
 
 package org.apache.ignite.internal.processors.failure;
 
+import java.util.Collections;
 import org.apache.ignite.Ignite;
 import org.apache.ignite.IgniteCheckedException;
 import org.apache.ignite.IgniteSystemProperties;
@@ -29,7 +30,7 @@ import 
org.apache.ignite.internal.processors.GridProcessorAdapter;
 import org.apache.ignite.internal.util.typedef.X;
 import org.apache.ignite.internal.util.typedef.internal.U;
 
-import static 
org.apache.ignite.IgniteSystemProperties.IGNITE_DUMP_THREADS_ON_FAILURE;
+import static org.apache.ignite.failure.FailureType.SYSTEM_WORKER_BLOCKED;
 
 /**
  * General failure processing API
@@ -83,7 +84,11 @@ public class FailureProcessor extends GridProcessorAdapter {
      * @return Default {@link FailureHandler} implementation.
      */
     protected FailureHandler getDefaultFailureHandler() {
-        return new StopNodeOrHaltFailureHandler();
+        FailureHandler hnd = new StopNodeOrHaltFailureHandler();
+
+        
hnd.setIgnoredFailureTypes(Collections.singleton(SYSTEM_WORKER_BLOCKED));
+
+        return hnd;
     }
 
     /**
@@ -116,7 +121,7 @@ public class FailureProcessor extends GridProcessorAdapter {
             return;
 
         U.error(ignite.log(), "Critical system error detected. Will be handled 
accordingly to configured handler " +
-            "[hnd=" + hnd.getClass() + ", failureCtx=" + failureCtx + ']', 
failureCtx.error());
+            "[hnd=" + hnd + ", failureCtx=" + failureCtx + ']', 
failureCtx.error());
 
         if (reserveBuf != null && X.hasCause(failureCtx.error(), 
OutOfMemoryError.class))
             reserveBuf = null;

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/processors/platform/compute/PlatformCompute.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/platform/compute/PlatformCompute.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/platform/compute/PlatformCompute.java
index ed61021..19daf75 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/platform/compute/PlatformCompute.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/platform/compute/PlatformCompute.java
@@ -17,7 +17,12 @@
 
 package org.apache.ignite.internal.processors.platform.compute;
 
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.UUID;
 import java.util.concurrent.Executor;
+import java.util.concurrent.TimeUnit;
 import org.apache.ignite.IgniteCheckedException;
 import org.apache.ignite.IgniteCompute;
 import org.apache.ignite.binary.BinaryObject;
@@ -37,12 +42,6 @@ import 
org.apache.ignite.internal.util.future.IgniteFutureImpl;
 import org.apache.ignite.lang.IgniteClosure;
 import org.apache.ignite.lang.IgniteInClosure;
 
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.UUID;
-import java.util.concurrent.TimeUnit;
-
 import static 
org.apache.ignite.internal.processors.task.GridTaskThreadContextKey.TC_SUBGRID;
 
 /**

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/processors/timeout/GridTimeoutProcessor.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/timeout/GridTimeoutProcessor.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/timeout/GridTimeoutProcessor.java
index de9e8eb..405e321 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/processors/timeout/GridTimeoutProcessor.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/processors/timeout/GridTimeoutProcessor.java
@@ -200,8 +200,12 @@ public class GridTimeoutProcessor extends 
GridProcessorAdapter {
          *
          */
         TimeoutWorker() {
-            super(ctx.config().getIgniteInstanceName(), "grid-timeout-worker",
-                GridTimeoutProcessor.this.log, ctx.workersRegistry());
+            super(
+                ctx.config().getIgniteInstanceName(),
+                "grid-timeout-worker",
+                GridTimeoutProcessor.this.log,
+                ctx.workersRegistry()
+            );
         }
 
         /** {@inheritDoc} */
@@ -210,8 +214,12 @@ public class GridTimeoutProcessor extends 
GridProcessorAdapter {
 
             try {
                 while (!isCancelled()) {
+                    updateHeartbeat();
+
                     long now = U.currentTimeMillis();
 
+                    onIdle();
+
                     for (Iterator<GridTimeoutObject> iter = 
timeoutObjs.iterator(); iter.hasNext(); ) {
                         GridTimeoutObject timeoutObj = iter.next();
 
@@ -254,13 +262,29 @@ public class GridTimeoutProcessor extends 
GridProcessorAdapter {
                             if (first != null) {
                                 long waitTime = first.endTime() - 
U.currentTimeMillis();
 
-                                if (waitTime > 0)
-                                    mux.wait(waitTime);
+                                if (waitTime > 0) {
+                                    blockingSectionBegin();
+
+                                    try {
+                                        mux.wait(waitTime);
+                                    }
+                                    finally {
+                                        blockingSectionEnd();
+                                    }
+                                }
                                 else
                                     break;
                             }
-                            else
-                                mux.wait(5000);
+                            else {
+                                blockingSectionBegin();
+
+                                try {
+                                    mux.wait(5000);
+                                }
+                                finally {
+                                    blockingSectionEnd();
+                                }
+                            }
                         }
                     }
                 }

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java 
b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
index 2e1f27e..6320c9f 100755
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
@@ -1349,6 +1349,22 @@ public abstract class IgniteUtils {
     }
 
     /**
+     * Dumps stack trace of the thread to the given log at warning level.
+     *
+     * @param t Thread to be dumped.
+     * @param log Logger.
+     */
+    public static void dumpThread(Thread t, @Nullable IgniteLogger log) {
+        ThreadMXBean mxBean = ManagementFactory.getThreadMXBean();
+
+        GridStringBuilder sb = new GridStringBuilder();
+
+        printThreadInfo(mxBean.getThreadInfo(t.getId()), sb, 
Collections.emptySet());
+
+        warn(log, sb.toString());
+    }
+
+    /**
      * Get deadlocks from the thread bean.
      * @param mxBean the bean
      * @return the set of deadlocked threads (may be empty Set, but never 
null).

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/util/StripedExecutor.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/util/StripedExecutor.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/util/StripedExecutor.java
index d89124f..0ddb398 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/util/StripedExecutor.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/util/StripedExecutor.java
@@ -67,7 +67,7 @@ public class StripedExecutor implements ExecutorService {
      * @param poolName Pool name.
      * @param log Logger.
      * @param errHnd Critical failure handler.
-     * @param gridWorkerLsnr listener to link with every stripe worker.
+     * @param gridWorkerLsnr Listener to link with every stripe worker.
      */
     public StripedExecutor(
         int cnt,
@@ -487,11 +487,20 @@ public class StripedExecutor implements ExecutorService {
                 Runnable cmd;
 
                 try {
-                    cmd = take();
+                    blockingSectionBegin();
+
+                    try {
+                        cmd = take();
+                    }
+                    finally {
+                        blockingSectionEnd();
+                    }
 
                     if (cmd != null) {
                         active = true;
 
+                        updateHeartbeat();
+
                         try {
                             cmd.run();
                         }
@@ -500,6 +509,8 @@ public class StripedExecutor implements ExecutorService {
                             completedCnt++;
                         }
                     }
+
+                    onIdle();
                 }
                 catch (InterruptedException ignored) {
                     Thread.currentThread().interrupt();

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/util/future/GridFutureAdapter.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/util/future/GridFutureAdapter.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/util/future/GridFutureAdapter.java
index fa5c4df..8302504 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/util/future/GridFutureAdapter.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/util/future/GridFutureAdapter.java
@@ -98,14 +98,15 @@ public class GridFutureAdapter<R> implements 
IgniteInternalFuture<R> {
     }
 
     /** */
-    private boolean ignoreInterrupts;
+    private volatile boolean ignoreInterrupts;
 
     /** */
     @GridToStringExclude
     private volatile Object state = INIT;
 
     /**
-     * Determines whether the future will ignore interrupts.
+     * Determines whether the future will ignore interrupts while waiting for 
result in {@code get()} methods.
+     * This call should <i>happen before</i> subsequent {@code get()} in order 
to have guaranteed effect.
      */
     public void ignoreInterrupts() {
         ignoreInterrupts = true;
@@ -156,7 +157,7 @@ public class GridFutureAdapter<R> implements 
IgniteInternalFuture<R> {
         A.ensure(timeout >= 0, "timeout cannot be negative: " + timeout);
         A.notNull(unit, "unit");
 
-        return get0(unit.toNanos(timeout));
+        return get0(ignoreInterrupts, unit.toNanos(timeout));
     }
 
     /**
@@ -197,12 +198,13 @@ public class GridFutureAdapter<R> implements 
IgniteInternalFuture<R> {
     }
 
     /**
+     * @param ignoreInterrupts Whether to ignore interrupts.
      * @param nanosTimeout Timeout (nanoseconds).
      * @return Result.
      * @throws IgniteFutureTimeoutCheckedException If timeout reached before 
computation completed.
      * @throws IgniteCheckedException If error occurred.
      */
-    @Nullable private R get0(long nanosTimeout) throws IgniteCheckedException {
+    @Nullable private R get0(boolean ignoreInterrupts, long nanosTimeout) 
throws IgniteCheckedException {
         if (isDone() || !registerWaiter(Thread.currentThread()))
             return resolve();
 

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/util/nio/GridNioServer.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/util/nio/GridNioServer.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/util/nio/GridNioServer.java
index 9678ae7..e4c96b4 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/util/nio/GridNioServer.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/util/nio/GridNioServer.java
@@ -241,7 +241,10 @@ public class GridNioServer<T> {
     /** */
     private final IgniteRunnable balancer;
 
-    /** */
+    /**
+     * Interval in milliseconds between consequtive {@link 
GridWorkerListener#onIdle(GridWorker)} calls
+     * in server workers.
+     */
     private final boolean readWriteSelectorsAssign;
 
     /**
@@ -1785,11 +1788,15 @@ public class GridNioServer<T> {
                 boolean reset = false;
 
                 while (!closed) {
+                    updateHeartbeat();
+
                     try {
                         if (reset)
                             createSelector();
 
                         bodyInternal();
+
+                        onIdle();
                     }
                     catch (IgniteCheckedException e) {
                         if (!Thread.currentThread().isInterrupted()) {
@@ -1924,7 +1931,11 @@ public class GridNioServer<T> {
                 while (!closed && selector.isOpen()) {
                     SessionChangeRequest req0;
 
+                    updateHeartbeat();
+
                     while ((req0 = changeReqs.poll()) != null) {
+                        updateHeartbeat();
+
                         switch (req0.operation()) {
                             case CONNECT: {
                                 NioOperationFuture fut = 
(NioOperationFuture)req0;
@@ -2100,6 +2111,8 @@ public class GridNioServer<T> {
 
                         if (res > 0) {
                             // Walk through the ready keys collection and 
process network events.
+                            updateHeartbeat();
+
                             if (selectedKeys == null)
                                 processSelectedKeys(selector.selectedKeys());
                             else
@@ -2129,6 +2142,8 @@ public class GridNioServer<T> {
                         if (!changeReqs.isEmpty())
                             continue;
 
+                        updateHeartbeat();
+
                         // Wake up every 2 seconds to check if closed.
                         if (selector.select(2000) > 0) {
                             // Walk through the ready keys collection and 
process network events.
@@ -2891,7 +2906,6 @@ public class GridNioServer<T> {
                     lsnr.onFailure(CRITICAL_ERROR, err);
                 else if (err != null)
                     lsnr.onFailure(SYSTEM_WORKER_TERMINATION, err);
-
             }
         }
 
@@ -2903,13 +2917,19 @@ public class GridNioServer<T> {
         private void accept() throws IgniteCheckedException {
             try {
                 while (!closed && selector.isOpen() && 
!Thread.currentThread().isInterrupted()) {
+                    updateHeartbeat();
+
                     // Wake up every 2 seconds to check if closed.
                     if (selector.select(2000) > 0)
                         // Walk through the ready keys collection and process 
date requests.
                         processSelectedKeys(selector.selectedKeys());
+                    else
+                        updateHeartbeat();
 
                     if (balancer != null)
                         balancer.run();
+
+                    onIdle();
                 }
             }
             // Ignore this exception as thread interruption is equal to 
'close' call.

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorker.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorker.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorker.java
index 9ac416c..3d9163d 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorker.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorker.java
@@ -53,6 +53,9 @@ public abstract class GridWorker implements Runnable {
     /** Actual thread runner. */
     private volatile Thread runner;
 
+    /** Timestamp to be updated by this worker periodically to indicate it's 
up and running. */
+    private volatile long heartbeatTs;
+
     /** */
     private final Object mux = new Object();
 
@@ -66,14 +69,19 @@ public abstract class GridWorker implements Runnable {
      * @param log Grid logger to be used.
      * @param lsnr Listener for life-cycle events.
      */
-    protected GridWorker(String igniteInstanceName, String name, IgniteLogger 
log, @Nullable GridWorkerListener lsnr) {
+    protected GridWorker(
+        String igniteInstanceName,
+        String name,
+        IgniteLogger log,
+        @Nullable GridWorkerListener lsnr
+    ) {
         assert name != null;
         assert log != null;
 
         this.igniteInstanceName = igniteInstanceName;
         this.name = name;
-        this.lsnr = lsnr;
         this.log = log;
+        this.lsnr = lsnr;
     }
 
     /**
@@ -95,6 +103,8 @@ public abstract class GridWorker implements Runnable {
         // may depend on it being present.
         runner = Thread.currentThread();
 
+        updateHeartbeat();
+
         if (log.isDebugEnabled())
             log.debug("Grid runnable started: " + name);
 
@@ -255,6 +265,38 @@ public abstract class GridWorker implements Runnable {
         return finished;
     }
 
+    /** */
+    public long heartbeatTs() {
+        return heartbeatTs;
+    }
+
+    /** */
+    public void updateHeartbeat() {
+        heartbeatTs = U.currentTimeMillis();
+    }
+
+    /**
+     * Protects the worker from timeout penalties if subsequent instructions 
in the calling thread does not update
+     * heartbeat timestamp timely, e.g. due to blocking operations, up to the 
nearest {@link #blockingSectionEnd()}
+     * call. Nested calls are not supported.
+     */
+    public void blockingSectionBegin() {
+        heartbeatTs = Long.MAX_VALUE;
+    }
+
+    /**
+     * Closes the protection section previously opened by {@link 
#blockingSectionBegin()}.
+     */
+    public void blockingSectionEnd() {
+        updateHeartbeat();
+    }
+
+    /** Can be called from {@link #runner()} thread to perform idleness 
handling. */
+    protected void onIdle() {
+        if (lsnr != null)
+            lsnr.onIdle(this);
+    }
+
     /** {@inheritDoc} */
     @Override public String toString() {
         Thread runner = this.runner;
@@ -264,4 +306,4 @@ public abstract class GridWorker implements Runnable {
             "interrupted", (runner != null ? runner.isInterrupted() : 
"unknown"),
             "runner", (runner == null ? "null" : runner.getName()));
     }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorkerListener.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorkerListener.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorkerListener.java
index e4b39ac..db6bbb9 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorkerListener.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorkerListener.java
@@ -32,4 +32,9 @@ public interface GridWorkerListener extends EventListener {
      * @param w Stopped worker.
      */
     public void onStopped(GridWorker w);
+
+    /**
+     * @param w Idle worker.
+     */
+    public void onIdle(GridWorker w);
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorkerListenerAdapter.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorkerListenerAdapter.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorkerListenerAdapter.java
index c6f688b..73a7886 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorkerListenerAdapter.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/util/worker/GridWorkerListenerAdapter.java
@@ -23,11 +23,16 @@ package org.apache.ignite.internal.util.worker;
 public class GridWorkerListenerAdapter implements GridWorkerListener {
     /** {@inheritDoc} */
     @Override public void onStarted(GridWorker w) {
-        /* No-op. */
+        // No-op.
     }
 
     /** {@inheritDoc} */
     @Override public void onStopped(GridWorker w) {
-        /* No-op. */
+        // No-op.
+    }
+
+    /** {@inheritDoc} */
+    @Override public void onIdle(GridWorker w) {
+        // No-op.
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ignite/blob/f59d29b9/modules/core/src/main/java/org/apache/ignite/internal/worker/WorkersControlMXBeanImpl.java
----------------------------------------------------------------------
diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/worker/WorkersControlMXBeanImpl.java
 
b/modules/core/src/main/java/org/apache/ignite/internal/worker/WorkersControlMXBeanImpl.java
index 1f082b5..e6abe6e 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/worker/WorkersControlMXBeanImpl.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/worker/WorkersControlMXBeanImpl.java
@@ -67,6 +67,16 @@ public class WorkersControlMXBeanImpl implements 
WorkersControlMXBean {
     }
 
     /** {@inheritDoc} */
+    @Override public boolean getHealthMonitoringEnabled() {
+        return workerRegistry.livenessCheckEnabled();
+    }
+
+    /** {@inheritDoc} */
+    @Override public void setHealthMonitoringEnabled(boolean val) {
+        workerRegistry.livenessCheckEnabled(val);
+    }
+
+    /** {@inheritDoc} */
     @Override public boolean stopThreadByUniqueName(String name) {
         Thread[] threads = Thread.getAllStackTraces().keySet().stream()
             .filter(t -> Objects.equals(t.getName(), name))

Reply via email to