Repository: ignite Updated Branches: refs/heads/master 2b7623acf -> af8cb6242
IGNITE-6171 Native facility to control excessive GC pauses Signed-off-by: Anton Vinogradov <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/ignite/repo Commit: http://git-wip-us.apache.org/repos/asf/ignite/commit/af8cb624 Tree: http://git-wip-us.apache.org/repos/asf/ignite/tree/af8cb624 Diff: http://git-wip-us.apache.org/repos/asf/ignite/diff/af8cb624 Branch: refs/heads/master Commit: af8cb624220398e99954f892ffd12e072524066b Parents: 2b7623a Author: Dmitriy Sorokin <[email protected]> Authored: Wed Jan 31 18:11:07 2018 +0300 Committer: Anton Vinogradov <[email protected]> Committed: Wed Jan 31 18:11:07 2018 +0300 ---------------------------------------------------------------------- .../apache/ignite/IgniteSystemProperties.java | 12 ++ .../apache/ignite/internal/IgniteKernal.java | 21 ++- .../ignite/internal/LongJVMPauseDetector.java | 167 +++++++++++++++++++ .../org/apache/ignite/mxbean/IgniteMXBean.java | 25 +++ 4 files changed, 224 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ignite/blob/af8cb624/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java ---------------------------------------------------------------------- diff --git a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java index 7761292..2b221a1 100644 --- a/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java +++ b/modules/core/src/main/java/org/apache/ignite/IgniteSystemProperties.java @@ -770,6 +770,18 @@ public final class IgniteSystemProperties { */ public static final String IGNITE_DATA_STORAGE_FOLDER_BY_CONSISTENT_ID = "IGNITE_DATA_STORAGE_FOLDER_BY_CONSISTENT_ID"; + /** Ignite JVM pause detector disabled. */ + public static final String IGNITE_JVM_PAUSE_DETECTOR_DISABLED = "IGNITE_JVM_PAUSE_DETECTOR_DISABLED"; + + /** Ignite JVM pause detector precision. */ + public static final String IGNITE_JVM_PAUSE_DETECTOR_PRECISION = "IGNITE_JVM_PAUSE_DETECTOR_PRECISION"; + + /** Ignite JVM pause detector threshold. */ + public static final String IGNITE_JVM_PAUSE_DETECTOR_THRESHOLD = "IGNITE_JVM_PAUSE_DETECTOR_THRESHOLD"; + + /** Ignite JVM pause detector last events count. */ + public static final String IGNITE_JVM_PAUSE_DETECTOR_LAST_EVENTS_COUNT = "IGNITE_JVM_PAUSE_DETECTOR_LAST_EVENTS_COUNT"; + /** * Default value is {@code false}. */ http://git-wip-us.apache.org/repos/asf/ignite/blob/af8cb624/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java ---------------------------------------------------------------------- diff --git a/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java b/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java index 3094963..e637a6b 100644 --- a/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java +++ b/modules/core/src/main/java/org/apache/ignite/internal/IgniteKernal.java @@ -130,8 +130,8 @@ import org.apache.ignite.internal.processors.cache.persistence.filename.PdsConsi import org.apache.ignite.internal.processors.cacheobject.IgniteCacheObjectProcessor; import org.apache.ignite.internal.processors.closure.GridClosureProcessor; import org.apache.ignite.internal.processors.cluster.ClusterProcessor; -import org.apache.ignite.internal.processors.cluster.IGridClusterStateProcessor; import org.apache.ignite.internal.processors.cluster.GridClusterStateProcessor; +import org.apache.ignite.internal.processors.cluster.IGridClusterStateProcessor; import org.apache.ignite.internal.processors.continuous.GridContinuousProcessor; import org.apache.ignite.internal.processors.datastreamer.DataStreamProcessor; import org.apache.ignite.internal.processors.datastructures.DataStructuresProcessor; @@ -288,6 +288,10 @@ public class IgniteKernal implements IgniteEx, IgniteMXBean, Externalizable { /** Force complete reconnect future. */ private static final Object STOP_RECONNECT = new Object(); + static { + LongJVMPauseDetector.start(); + } + /** */ @GridToStringExclude private GridKernalContextImpl ctx; @@ -460,6 +464,21 @@ public class IgniteKernal implements IgniteEx, IgniteMXBean, Externalizable { } /** {@inheritDoc} */ + @Override public long getLongJVMPausesCount() { + return LongJVMPauseDetector.longPausesCount(); + } + + /** {@inheritDoc} */ + @Override public long getLongJVMPausesTotalDuration() { + return LongJVMPauseDetector.longPausesTotalDuration(); + } + + /** {@inheritDoc} */ + @Override public Map<Long, Long> getLongJVMPauseLastEvents() { + return LongJVMPauseDetector.longPauseEvents(); + } + + /** {@inheritDoc} */ @Override public String getUpTimeFormatted() { return X.timeSpan2HMSM(U.currentTimeMillis() - startTime); } http://git-wip-us.apache.org/repos/asf/ignite/blob/af8cb624/modules/core/src/main/java/org/apache/ignite/internal/LongJVMPauseDetector.java ---------------------------------------------------------------------- diff --git a/modules/core/src/main/java/org/apache/ignite/internal/LongJVMPauseDetector.java b/modules/core/src/main/java/org/apache/ignite/internal/LongJVMPauseDetector.java new file mode 100644 index 0000000..c10b6f9 --- /dev/null +++ b/modules/core/src/main/java/org/apache/ignite/internal/LongJVMPauseDetector.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal; + +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.ignite.IgniteLogger; +import org.apache.ignite.logger.java.JavaLogger; + +import static org.apache.ignite.IgniteSystemProperties.IGNITE_JVM_PAUSE_DETECTOR_DISABLED; +import static org.apache.ignite.IgniteSystemProperties.IGNITE_JVM_PAUSE_DETECTOR_LAST_EVENTS_COUNT; +import static org.apache.ignite.IgniteSystemProperties.IGNITE_JVM_PAUSE_DETECTOR_PRECISION; +import static org.apache.ignite.IgniteSystemProperties.IGNITE_JVM_PAUSE_DETECTOR_THRESHOLD; +import static org.apache.ignite.IgniteSystemProperties.getBoolean; +import static org.apache.ignite.IgniteSystemProperties.getInteger; + +/** + * Class for detection of long JVM pauses. + * It has a worker thread, which wakes up in cycle every {@code PRECISION} (default is 50) milliseconds, + * and monitors a time values between awakenings. If worker pause exceeds the expected value more than {@code THRESHOLD} + * default is 500), the difference is considered as JVM pause, most likely STW, and event of long JVM pause is registered. + * The values of {@code PRECISION}, {@code THRESHOLD} and {@code EVT_CNT} (event window size, default is 20) can be + * configured in system or environment properties IGNITE_JVM_PAUSE_DETECTOR_PRECISION, + * IGNITE_JVM_PAUSE_DETECTOR_THRESHOLD and IGNITE_JVM_PAUSE_DETECTOR_LAST_EVENTS_COUNT accordingly. + */ +class LongJVMPauseDetector { + /** Logger. */ + private static final IgniteLogger LOG = new JavaLogger(); + + /** Worker reference. */ + private static final AtomicReference<Thread> workerRef = new AtomicReference<>(); + + /** Precision. */ + private static final int PRECISION = getInteger(IGNITE_JVM_PAUSE_DETECTOR_PRECISION, 50); + + /** Threshold. */ + private static final int THRESHOLD = getInteger(IGNITE_JVM_PAUSE_DETECTOR_THRESHOLD, 500); + + /** Event count. */ + private static final int EVT_CNT = getInteger(IGNITE_JVM_PAUSE_DETECTOR_LAST_EVENTS_COUNT, 20); + + /** Long pause count. */ + private static long longPausesCnt; + + /** Long pause total duration. */ + private static long longPausesTotalDuration; + + /** Long pauses timestamps. */ + private static final long[] longPausesTimestamps = new long[EVT_CNT]; + + /** Long pauses durations. */ + private static final long[] longPausesDurations = new long[EVT_CNT]; + + /** + * Starts worker if not started yet. + */ + public static void start() { + if (getBoolean(IGNITE_JVM_PAUSE_DETECTOR_DISABLED, false)) { + if (LOG.isDebugEnabled()) + LOG.debug("JVM Pause Detector is disabled."); + + return; + } + + final Thread worker = new Thread("jvm-pause-detector-worker") { + private long prev = System.currentTimeMillis(); + + @Override public void run() { + if (LOG.isDebugEnabled()) + LOG.debug(getName() + " has been started."); + + while (true) { + try { + Thread.sleep(PRECISION); + + final long now = System.currentTimeMillis(); + final long pause = now - PRECISION - prev; + + prev = now; + + if (pause >= THRESHOLD) { + LOG.warning("Possible too long JVM pause: " + pause + " milliseconds."); + + synchronized (LongJVMPauseDetector.class) { + final int next = (int)(longPausesCnt % EVT_CNT); + + longPausesCnt++; + + longPausesTotalDuration += pause; + + longPausesTimestamps[next] = now; + + longPausesDurations[next] = pause; + } + } + } + catch (InterruptedException e) { + LOG.error(getName() + " has been interrupted", e); + + break; + } + } + } + }; + + if (!workerRef.compareAndSet(null, worker)) { + LOG.warning(LongJVMPauseDetector.class.getSimpleName() + " already started!"); + + return; + } + + worker.setDaemon(true); + worker.start(); + } + + /** + * Stops the worker if one is created and running. + */ + public static void stop() { + final Thread worker = workerRef.getAndSet(null); + + if (worker != null && worker.isAlive() && !worker.isInterrupted()) + worker.interrupt(); + } + + /** + * @return Long JVM pauses count. + */ + synchronized static long longPausesCount() { + return longPausesCnt; + } + + /** + * @return Long JVM pauses total duration. + */ + synchronized static long longPausesTotalDuration() { + return longPausesTotalDuration; + } + + /** + * @return Last long JVM pause events. + */ + synchronized static Map<Long, Long> longPauseEvents() { + final Map<Long, Long> evts = new TreeMap<>(); + + for (int i = 0; i < longPausesTimestamps.length && longPausesTimestamps[i] != 0; i++) + evts.put(longPausesTimestamps[i], longPausesDurations[i]); + + return evts; + } +} http://git-wip-us.apache.org/repos/asf/ignite/blob/af8cb624/modules/core/src/main/java/org/apache/ignite/mxbean/IgniteMXBean.java ---------------------------------------------------------------------- diff --git a/modules/core/src/main/java/org/apache/ignite/mxbean/IgniteMXBean.java b/modules/core/src/main/java/org/apache/ignite/mxbean/IgniteMXBean.java index 428d03c..308c148 100644 --- a/modules/core/src/main/java/org/apache/ignite/mxbean/IgniteMXBean.java +++ b/modules/core/src/main/java/org/apache/ignite/mxbean/IgniteMXBean.java @@ -18,6 +18,7 @@ package org.apache.ignite.mxbean; import java.util.List; +import java.util.Map; import java.util.UUID; import javax.management.JMException; @@ -96,6 +97,30 @@ public interface IgniteMXBean { public long getUpTime(); /** + * Gets long JVM pauses count. + * + * @return Long JVM pauses count. + */ + @MXBeanDescription("Long JVM pauses count.") + public long getLongJVMPausesCount(); + + /** + * Gets long JVM pauses total duration. + * + * @return Long JVM pauses total duration. + */ + @MXBeanDescription("Long JVM pauses total duration.") + public long getLongJVMPausesTotalDuration(); + + /** + * Gets long JVM pause last events. + * + * @return Long JVM pause last events. + */ + @MXBeanDescription("Long JVM pause last events.") + public Map<Long, Long> getLongJVMPauseLastEvents(); + + /** * Gets a list of formatted user-defined attributes added to this node. * <p> * Note that grid will add all System properties and environment properties
