This is an automated email from the ASF dual-hosted git repository. ilyak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/master by this push: new 1a3fd11 IGNITE-13665 When system worker is blocked, output its stack trace - Fixes #8442. 1a3fd11 is described below commit 1a3fd112b02133892c7c95d4be607079ffa83211 Author: Ilya Kasnacheev <ilya.kasnach...@gmail.com> AuthorDate: Wed Nov 11 14:25:59 2020 +0300 IGNITE-13665 When system worker is blocked, output its stack trace - Fixes #8442. --- .../org/apache/ignite/internal/IgnitionEx.java | 14 ++-- .../apache/ignite/internal/util/IgniteUtils.java | 16 ----- .../ignite/internal/worker/WorkersRegistry.java | 2 - .../ignite/failure/SystemWorkersBlockingTest.java | 82 +++++++++++++--------- 4 files changed, 59 insertions(+), 55 deletions(-) diff --git a/modules/core/src/main/java/org/apache/ignite/internal/IgnitionEx.java b/modules/core/src/main/java/org/apache/ignite/internal/IgnitionEx.java index a02c5ea..dfdae46 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/IgnitionEx.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/IgnitionEx.java @@ -1849,11 +1849,16 @@ public class IgnitionEx { WorkersRegistry workerRegistry = new WorkersRegistry( new IgniteBiInClosure<GridWorker, FailureType>() { - @Override public void apply(GridWorker deadWorker, FailureType failureType) { + @Override public void apply(GridWorker worker, FailureType failureType) { + IgniteException ex = new IgniteException(S.toString(GridWorker.class, worker)); + + Thread runner = worker.runner(); + + if (runner != null && runner != Thread.currentThread()) + ex.setStackTrace(runner.getStackTrace()); + if (grid != null) - grid.context().failure().process(new FailureContext( - failureType, - new IgniteException(S.toString(GridWorker.class, deadWorker)))); + grid.context().failure().process(new FailureContext(failureType, ex)); } }, IgniteSystemProperties.getLong(IGNITE_SYSTEM_WORKER_BLOCKED_TIMEOUT, @@ -1899,6 +1904,7 @@ public class IgnitionEx { // Note, that we do not pre-start threads here as class loading pool may // not be needed. validateThreadPoolSize(cfg.getPeerClassLoadingThreadPoolSize(), "peer class loading"); + p2pExecSvc = new IgniteThreadPoolExecutor( "p2p", cfg.getIgniteInstanceName(), diff --git a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java index 4f1af38..b3b644d 100755 --- a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java @@ -1506,22 +1506,6 @@ public abstract class IgniteUtils { } /** - * Dumps stack trace of the thread to the given log at warning level. - * - * @param t Thread to be dumped. - * @param log Logger. - */ - public static void dumpThread(Thread t, @Nullable IgniteLogger log) { - ThreadMXBean mxBean = ManagementFactory.getThreadMXBean(); - - GridStringBuilder sb = new GridStringBuilder(); - - printThreadInfo(mxBean.getThreadInfo(t.getId()), sb, Collections.emptySet()); - - warn(log, sb.toString()); - } - - /** * Get deadlocks from the thread bean. * @param mxBean the bean * @return the set of deadlocked threads (may be empty Set, but never null). diff --git a/modules/core/src/main/java/org/apache/ignite/internal/worker/WorkersRegistry.java b/modules/core/src/main/java/org/apache/ignite/internal/worker/WorkersRegistry.java index 5829b3c..7af557d 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/worker/WorkersRegistry.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/worker/WorkersRegistry.java @@ -229,8 +229,6 @@ public class WorkersRegistry implements GridWorkerListener { "[workerName=" + worker.name() + ", threadName=" + runner.getName() + ", blockedFor=" + heartbeatDelay / 1000 + "s]"); - U.dumpThread(worker.runner(), log); - workerFailedHnd.apply(worker, SYSTEM_WORKER_BLOCKED); } diff --git a/modules/core/src/test/java/org/apache/ignite/failure/SystemWorkersBlockingTest.java b/modules/core/src/test/java/org/apache/ignite/failure/SystemWorkersBlockingTest.java index 8455f87..ccfc507 100644 --- a/modules/core/src/test/java/org/apache/ignite/failure/SystemWorkersBlockingTest.java +++ b/modules/core/src/test/java/org/apache/ignite/failure/SystemWorkersBlockingTest.java @@ -17,9 +17,12 @@ package org.apache.ignite.failure; +import java.util.Arrays; import java.util.HashSet; import java.util.Set; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.LockSupport; @@ -30,8 +33,8 @@ import org.apache.ignite.internal.IgniteInterruptedCheckedException; import org.apache.ignite.internal.util.worker.GridWorker; import org.apache.ignite.internal.worker.WorkersRegistry; import org.apache.ignite.testframework.GridTestUtils; +import org.apache.ignite.testframework.junits.GridAbstractTest; import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; -import org.apache.ignite.thread.IgniteThread; import org.junit.Test; /** @@ -44,6 +47,12 @@ public class SystemWorkersBlockingTest extends GridCommonAbstractTest { /** Handler latch. */ private final CountDownLatch hndLatch = new CountDownLatch(1); + /** Blocking thread latch. */ + private final CountDownLatch blockLatch = new CountDownLatch(1); + + /** Worker executor. */ + private final ExecutorService workerExecutor = Executors.newSingleThreadExecutor(); + /** Reference to failure error. */ private final AtomicReference<Throwable> failureError = new AtomicReference<>(); @@ -81,6 +90,13 @@ public class SystemWorkersBlockingTest extends GridCommonAbstractTest { @Override protected void afterTest() throws Exception { super.afterTest(); + blockLatch.countDown(); + + if (workerExecutor.isTerminated()) { + workerExecutor.shutdownNow(); + workerExecutor.awaitTermination(2 * SYSTEM_WORKER_BLOCKED_TIMEOUT, TimeUnit.MILLISECONDS); + } + stopAllGrids(); } @@ -91,34 +107,23 @@ public class SystemWorkersBlockingTest extends GridCommonAbstractTest { public void testBlockingWorker() throws Exception { IgniteEx ignite = startGrid(0); - CountDownLatch blockLatch = new CountDownLatch(1); + GridWorker worker = new LatchingGridWorker(ignite); - GridWorker worker = new GridWorker(ignite.name(), "test-worker", log) { - @Override protected void body() throws InterruptedException { - blockLatch.await(); - } - }; + runWorker(worker); - IgniteThread runner = null; - try { - runner = runWorker(worker); + ignite.context().workersRegistry().register(worker); - ignite.context().workersRegistry().register(worker); + assertTrue(hndLatch.await(ignite.configuration().getFailureDetectionTimeout() * 2, + TimeUnit.MILLISECONDS)); - assertTrue(hndLatch.await(SYSTEM_WORKER_BLOCKED_TIMEOUT * 2, TimeUnit.MILLISECONDS)); + Throwable blockedExeption = failureError.get(); - Throwable err = failureError.get(); - - assertNotNull(err); - assertTrue(err.getMessage() != null && err.getMessage().contains("test-worker")); - } - finally { - if (runner != null) { - blockLatch.countDown(); + assertNotNull(blockedExeption); - runner.join(SYSTEM_WORKER_BLOCKED_TIMEOUT); - } - } + assertTrue(Arrays.stream(blockedExeption.getStackTrace()).anyMatch( + e -> CountDownLatch.class.getName().equals(e.getClassName()))); + assertTrue(Arrays.stream(blockedExeption.getStackTrace()).anyMatch( + e -> LatchingGridWorker.class.getName().equals(e.getClassName()))); } /** @@ -145,26 +150,37 @@ public class SystemWorkersBlockingTest extends GridCommonAbstractTest { } }; - IgniteThread runner = runWorker(worker); + runWorker(worker); Thread.sleep(2 * SYSTEM_WORKER_BLOCKED_TIMEOUT); - runner.interrupt(); + workerExecutor.shutdownNow(); - assertTrue(finishLatch.await(SYSTEM_WORKER_BLOCKED_TIMEOUT, TimeUnit.MILLISECONDS)); + assertTrue(workerExecutor.awaitTermination(SYSTEM_WORKER_BLOCKED_TIMEOUT, TimeUnit.MILLISECONDS)); } /** - * @param worker Grid worker to run. - * @return Thread, running worker. + * Run worker and wait for its initialization. + * + * @param worker GridWorker to run. + * @throws IgniteInterruptedCheckedException If wait is interrupted. */ - private IgniteThread runWorker(GridWorker worker) throws IgniteInterruptedCheckedException { - IgniteThread runner = new IgniteThread(worker); - - runner.start(); + private void runWorker(GridWorker worker) throws IgniteInterruptedCheckedException { + workerExecutor.execute(worker); GridTestUtils.waitForCondition(() -> worker.runner() != null, 100); + } - return runner; + /** */ + private class LatchingGridWorker extends GridWorker { + /** */ + public LatchingGridWorker(IgniteEx ignite) { + super(ignite.name(), "test-worker", GridAbstractTest.log); + } + + /** */ + @Override protected void body() throws InterruptedException { + blockLatch.await(); + } } }