Catch errors when the JVM pulls the rug out from GCInspector patch by Josh McKenzie; reviewed by jbellis for CASSANDRA-5345
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/51a1f232 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/51a1f232 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/51a1f232 Branch: refs/heads/trunk Commit: 51a1f232b42b4385df7f2aca42a97b4c5b02c077 Parents: 062addb Author: Jonathan Ellis <jbel...@apache.org> Authored: Fri Jul 18 18:13:33 2014 -0500 Committer: Jonathan Ellis <jbel...@apache.org> Committed: Fri Jul 18 18:13:33 2014 -0500 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../apache/cassandra/service/GCInspector.java | 130 +++++++++++-------- 2 files changed, 79 insertions(+), 52 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/51a1f232/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index d339309..d4b11d1 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 2.0.10 + * Catch errors when the JVM pulls the rug out from GCInspector (CASSANDRA-5345) * (Windows) force range-based repair to non-sequential mode (CASSANDRA-7541) * Fix range merging when DES scores are zero (CASSANDRA-7535) * Warn when SSL certificates have expired (CASSANDRA-7528) http://git-wip-us.apache.org/repos/asf/cassandra/blob/51a1f232/src/java/org/apache/cassandra/service/GCInspector.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/service/GCInspector.java b/src/java/org/apache/cassandra/service/GCInspector.java index 9961bf9..f03ec01 100644 --- a/src/java/org/apache/cassandra/service/GCInspector.java +++ b/src/java/org/apache/cassandra/service/GCInspector.java @@ -21,6 +21,7 @@ import java.lang.management.GarbageCollectorMXBean; import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; import java.lang.management.MemoryUsage; +import java.lang.reflect.UndeclaredThrowableException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -49,8 +50,28 @@ public class GCInspector final List<GarbageCollectorMXBean> beans = new ArrayList<GarbageCollectorMXBean>(); final MemoryMXBean membean = ManagementFactory.getMemoryMXBean(); - public GCInspector() + public void start() { + buildMXBeanList(); + + // don't bother starting a thread that will do nothing. + if (beans.isEmpty()) + return; + + Runnable t = new Runnable() + { + public void run() + { + logGCResults(); + } + }; + StorageService.scheduledTasks.scheduleWithFixedDelay(t, INTERVAL_IN_MS, INTERVAL_IN_MS, TimeUnit.MILLISECONDS); + } + + private void buildMXBeanList() + { + beans.clear(); + MBeanServer server = ManagementFactory.getPlatformMBeanServer(); try { @@ -67,62 +88,67 @@ public class GCInspector } } - public void start() + private void logGCResults() { - // don't bother starting a thread that will do nothing. - if (beans.size() == 0) - return; - Runnable t = new Runnable() + boolean gcChanged = false; + try { - public void run() + for (GarbageCollectorMXBean gc : beans) { - logGCResults(); + if (!gc.isValid()) + { + gcChanged = true; + continue; + } + + Long previousTotal = gctimes.get(gc.getName()); + Long total = gc.getCollectionTime(); + if (previousTotal == null) + previousTotal = 0L; + if (previousTotal.equals(total)) + continue; + gctimes.put(gc.getName(), total); + Long duration = total - previousTotal; // may be zero for a really fast collection + + Long previousCount = gccounts.get(gc.getName()); + Long count = gc.getCollectionCount(); + + if (previousCount == null) + previousCount = 0L; + if (count.equals(previousCount)) + continue; + + gccounts.put(gc.getName(), count); + + MemoryUsage mu = membean.getHeapMemoryUsage(); + long memoryUsed = mu.getUsed(); + long memoryMax = mu.getMax(); + + String st = String.format("GC for %s: %s ms for %s collections, %s used; max is %s", + gc.getName(), duration, count - previousCount, memoryUsed, memoryMax); + long durationPerCollection = duration / (count - previousCount); + if (durationPerCollection > MIN_DURATION) + logger.info(st); + else if (logger.isDebugEnabled()) + logger.debug(st); + + if (durationPerCollection > MIN_DURATION_TPSTATS) + StatusLogger.log(); + + // if we just finished a full collection and we're still using a lot of memory, try to reduce the pressure + if (gc.getName().equals("ConcurrentMarkSweep")) + SSTableDeletingTask.rescheduleFailedTasks(); } - }; - StorageService.scheduledTasks.scheduleWithFixedDelay(t, INTERVAL_IN_MS, INTERVAL_IN_MS, TimeUnit.MILLISECONDS); - } - - private void logGCResults() - { - for (GarbageCollectorMXBean gc : beans) + } + catch (UndeclaredThrowableException e) { - Long previousTotal = gctimes.get(gc.getName()); - Long total = gc.getCollectionTime(); - if (previousTotal == null) - previousTotal = 0L; - if (previousTotal.equals(total)) - continue; - gctimes.put(gc.getName(), total); - Long duration = total - previousTotal; // may be zero for a really fast collection - - Long previousCount = gccounts.get(gc.getName()); - Long count = gc.getCollectionCount(); - - if (previousCount == null) - previousCount = 0L; - if (count.equals(previousCount)) - continue; - - gccounts.put(gc.getName(), count); - - MemoryUsage mu = membean.getHeapMemoryUsage(); - long memoryUsed = mu.getUsed(); - long memoryMax = mu.getMax(); - - String st = String.format("GC for %s: %s ms for %s collections, %s used; max is %s", - gc.getName(), duration, count - previousCount, memoryUsed, memoryMax); - long durationPerCollection = duration / (count - previousCount); - if (durationPerCollection > MIN_DURATION) - logger.info(st); - else if (logger.isDebugEnabled()) - logger.debug(st); - - if (durationPerCollection > MIN_DURATION_TPSTATS) - StatusLogger.log(); - - // if we just finished a full collection and we're still using a lot of memory, try to reduce the pressure - if (gc.getName().equals("ConcurrentMarkSweep")) - SSTableDeletingTask.rescheduleFailedTasks(); + // valid-ness may have changed out from under us, even though we check for it explicitly. + // if so, gc.getName() will throw UTE when reflection runs into InstanceNotFoundException. + // See CASSANDRA-5345 + gcChanged = true; } + + if (gcChanged) + buildMXBeanList(); } }