Author: cziegeler Date: Tue Aug 22 05:18:51 2017 New Revision: 1805720 URL: http://svn.apache.org/viewvc?rev=1805720&view=rev Log: SLING-5965 : Metrics and a Health-Check for Scheduler to detect long-running Quartz-Jobs
Modified: sling/trunk/bundles/commons/scheduler/pom.xml sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/GaugesSupport.java sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/QuartzJobExecutor.java sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/QuartzScheduler.java Modified: sling/trunk/bundles/commons/scheduler/pom.xml URL: http://svn.apache.org/viewvc/sling/trunk/bundles/commons/scheduler/pom.xml?rev=1805720&r1=1805719&r2=1805720&view=diff ============================================================================== --- sling/trunk/bundles/commons/scheduler/pom.xml (original) +++ sling/trunk/bundles/commons/scheduler/pom.xml Tue Aug 22 05:18:51 2017 @@ -84,12 +84,14 @@ org.apache.sling.discovery;resolution:=optional, org.apache.sling.settings;resolution:=optional, org.apache.sling.hc.api;resolution:=optional, + org.apache.sling.hc.util;resolution:=optional, * </Import-Package> <DynamicImport-Package> org.apache.sling.discovery;version="[1.0,2)", org.apache.sling.settings;version="[1.0,2)", - org.apache.sling.hc.api + org.apache.sling.hc.api, + org.apache.sling.hc.util </DynamicImport-Package> <Embed-Dependency> quartz @@ -180,19 +182,16 @@ <scope>test</scope> </dependency> <dependency> - <groupId>org.apache.sling</groupId> - <artifactId>org.apache.sling.commons.metrics</artifactId> - <version>1.2.0</version> + <groupId>org.apache.sling</groupId> + <artifactId>org.apache.sling.hc.core</artifactId> + <version>1.0.6</version> + <scope>provided</scope> </dependency> <dependency> - <groupId>org.apache.sling</groupId> - <artifactId>org.apache.sling.hc.core</artifactId> - <version>1.0.6</version> - </dependency> - <dependency> - <groupId>org.apache.sling</groupId> - <artifactId>org.apache.sling.commons.osgi</artifactId> - <version>2.1.0</version> + <groupId>io.dropwizard.metrics</groupId> + <artifactId>metrics-core</artifactId> + <version>3.2.3</version> + <scope>provided</scope> </dependency> </dependencies> </project> Modified: sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/GaugesSupport.java URL: http://svn.apache.org/viewvc/sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/GaugesSupport.java?rev=1805720&r1=1805719&r2=1805720&view=diff ============================================================================== --- sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/GaugesSupport.java (original) +++ sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/GaugesSupport.java Tue Aug 22 05:18:51 2017 @@ -17,16 +17,13 @@ package org.apache.sling.commons.scheduler.impl; import java.util.Date; -import java.util.Dictionary; import java.util.HashMap; -import java.util.Hashtable; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.NoSuchElementException; -import org.apache.sling.commons.metrics.Gauge; import org.osgi.framework.BundleContext; import org.osgi.framework.Constants; import org.osgi.framework.ServiceRegistration; @@ -41,11 +38,13 @@ import org.quartz.SchedulerException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.codahale.metrics.Gauge; +import com.codahale.metrics.MetricRegistry; + @Component( property = { Constants.SERVICE_VENDOR + "=The Apache Software Foundation" - }, - immediate = true + } ) /** * This service creates gauges for getting how long the oldest running job is @@ -64,20 +63,20 @@ public class GaugesSupport { private final static String CLEANUP_JOB_NAME = "org.apache.sling.commons.scheduler.impl.GaugesSupport.CleanupJob"; + @Reference + private MetricRegistry metricRegistry; + @SuppressWarnings("rawtypes") private final class TemporaryGauge implements Gauge { - private final ServiceRegistration registration; private final JobExecutionContext jobExecutionContext; private final String gaugeName; private volatile boolean unregistered = false; - private TemporaryGauge(BundleContext ctx, JobExecutionContext jobExecutionContext, String gaugeName) { + private TemporaryGauge(final JobExecutionContext jobExecutionContext, final String gaugeName) { this.jobExecutionContext = jobExecutionContext; this.gaugeName = gaugeName; - Dictionary<String, String> p = new Hashtable<String, String>(); - p.put(Gauge.NAME, gaugeName); - registration = ctx.registerService(Gauge.class.getName(), TemporaryGauge.this, p); + metricRegistry.register(gaugeName, this); } private void unregister() { @@ -97,7 +96,7 @@ public class GaugesSupport { logger.debug("unregister: unregistering dangling temporary gauge for slow job : " + gaugeName); } } - registration.unregister(); + metricRegistry.remove(gaugeName); } @Override @@ -128,8 +127,8 @@ public class GaugesSupport { private final Logger logger = LoggerFactory.getLogger(getClass()); @SuppressWarnings("rawtypes") - private final Map<String, ServiceRegistration> gaugeRegistrations = new HashMap<String, ServiceRegistration>(); - private final Map<String, TemporaryGauge> temporaryGauges = new HashMap<String, TemporaryGauge>(); + private final Map<String, ServiceRegistration> gaugeRegistrations = new HashMap<>(); + private final Map<String, TemporaryGauge> temporaryGauges = new HashMap<>(); private volatile boolean active = true; @@ -147,7 +146,7 @@ public class GaugesSupport { active = true; // register the gauges - registerGauges(ctx); + registerGauges(); bundleId = ctx.getBundle().getBundleId(); try { @@ -178,7 +177,7 @@ public class GaugesSupport { final long start = System.currentTimeMillis(); final Map<String, TemporaryGauge> localTemporaryGauges; synchronized (temporaryGauges) { - localTemporaryGauges = new HashMap<String, TemporaryGauge>(temporaryGauges); + localTemporaryGauges = new HashMap<>(temporaryGauges); } final Iterator<TemporaryGauge> it = localTemporaryGauges.values().iterator(); while (it.hasNext()) { @@ -195,47 +194,45 @@ public class GaugesSupport { + ", at end : " + endCount + ", cleanup took : " + diff + "ms)"); } - private void registerGauges(BundleContext ctx) { - createGauge(ctx, configHolder, null, null, QuartzScheduler.METRICS_NAME_OLDEST_RUNNING_JOB_MILLIS); - createGauge(ctx, configHolder, configHolder.poolName(), null, + private void registerGauges() { + createGauge(configHolder, null, null, QuartzScheduler.METRICS_NAME_OLDEST_RUNNING_JOB_MILLIS); + createGauge(configHolder, configHolder.poolName(), null, QuartzScheduler.METRICS_NAME_OLDEST_RUNNING_JOB_MILLIS + ".tp." + configHolder.poolName()); if (configHolder.allowedPoolNames() != null) { for (String tpName : configHolder.allowedPoolNames()) { - createGauge(ctx, configHolder, tpName, null, + createGauge(configHolder, tpName, null, QuartzScheduler.METRICS_NAME_OLDEST_RUNNING_JOB_MILLIS + ".tp." + tpName); } } for (Map.Entry<String, String> entry : configHolder.getFilterSuffixes().entrySet()) { final String name = entry.getKey(); final String filterName = entry.getValue(); - createGauge(ctx, configHolder, null, filterName, + createGauge(configHolder, null, filterName, QuartzScheduler.METRICS_NAME_OLDEST_RUNNING_JOB_MILLIS + ".filter." + name); } } @SuppressWarnings("rawtypes") - private void createGauge(final BundleContext ctx, final ConfigHolder configHolder, final String tpName, - final String filterName, final String gaugeName) { - Dictionary<String, String> p = new Hashtable<String, String>(); - p.put(Gauge.NAME, gaugeName); + private void createGauge(final ConfigHolder configHolder, + final String tpName, + final String filterName, + final String gaugeName) { final Gauge gauge = new Gauge() { @Override public Long getValue() { if (!active) { return -1L; // disabled case } - return getOldestRunningJobMillis(configHolder, ctx, tpName, filterName); + return getOldestRunningJobMillis(configHolder, tpName, filterName); } }; logger.debug("createGauge: registering gauge : " + gaugeName); - ServiceRegistration reg = ctx.registerService(Gauge.class.getName(), gauge, p); - synchronized (this.gaugeRegistrations) { - gaugeRegistrations.put(gaugeName, reg); - } + this.metricRegistry.register(gaugeName, gauge); } - private Long getOldestRunningJobMillis(ConfigHolder configHolder, BundleContext ctx, String threadPoolNameOrNull, - String filterNameOrNull) { + private Long getOldestRunningJobMillis(final ConfigHolder configHolder, + final String threadPoolNameOrNull, + final String filterNameOrNull) { final QuartzScheduler localQuartzScheduler = quartzScheduler; if (localQuartzScheduler == null) { // could happen during deactivation @@ -252,7 +249,7 @@ public class GaugesSupport { // if a threadPoolName is set and no filter then we go by // threadPoolName final SchedulerProxy schedulerProxy = schedulers.get(threadPoolNameOrNull); - oldestDate = getOldestRunningJobDate(configHolder, ctx, schedulerProxy, null); + oldestDate = getOldestRunningJobDate(configHolder, schedulerProxy, null); } else { // if nothing is set we iterate over everything // if both threadPoolName and filter is set, filter has precedence @@ -260,7 +257,7 @@ public class GaugesSupport { for (Map.Entry<String, SchedulerProxy> entry : schedulers.entrySet()) { SchedulerProxy schedulerProxy = entry.getValue(); oldestDate = olderOf(oldestDate, - getOldestRunningJobDate(configHolder, ctx, schedulerProxy, filterNameOrNull)); + getOldestRunningJobDate(configHolder, schedulerProxy, filterNameOrNull)); } } if (oldestDate == null) { @@ -270,8 +267,9 @@ public class GaugesSupport { } } - private Date getOldestRunningJobDate(final ConfigHolder configHolder, final BundleContext ctx, - final SchedulerProxy schedulerProxy, final String filterNameOrNull) { + private Date getOldestRunningJobDate(final ConfigHolder configHolder, + final SchedulerProxy schedulerProxy, + final String filterNameOrNull) { if (schedulerProxy == null) { return null; } @@ -315,7 +313,7 @@ public class GaugesSupport { if (slowThresholdMillis > 0 && elapsedMillis > slowThresholdMillis) { // then create a gauge for this slow job in case there isn't one // yet - createTemporaryGauge(ctx, jobExecutionContext); + createTemporaryGauge(jobExecutionContext); } oldestDate = olderOf(oldestDate, fireTime); } @@ -334,7 +332,7 @@ public class GaugesSupport { } } - private void createTemporaryGauge(final BundleContext ctx, final JobExecutionContext jobExecutionContext) { + private void createTemporaryGauge(final JobExecutionContext jobExecutionContext) { final JobDataMap data = jobExecutionContext.getJobDetail().getJobDataMap(); final String jobName = data.getString(QuartzScheduler.DATA_MAP_NAME); final String gaugeName = QuartzScheduler.METRICS_NAME_OLDEST_RUNNING_JOB_MILLIS + ".slow." @@ -363,7 +361,7 @@ public class GaugesSupport { } logger.debug("createTemporaryGauge: creating temporary gauge for slow job : " + gaugeName); synchronized (this.temporaryGauges) { - temporaryGauges.put(gaugeName, new TemporaryGauge(ctx, jobExecutionContext, gaugeName)); + temporaryGauges.put(gaugeName, new TemporaryGauge(jobExecutionContext, gaugeName)); } } @@ -371,12 +369,12 @@ public class GaugesSupport { private void unregisterGauges() { final Map<String, ServiceRegistration> localGaugeRegistrations; synchronized (gaugeRegistrations) { - localGaugeRegistrations = new HashMap<String, ServiceRegistration>(gaugeRegistrations); + localGaugeRegistrations = new HashMap<>(gaugeRegistrations); gaugeRegistrations.clear(); } final Map<String, TemporaryGauge> localTemporaryGauges; synchronized (temporaryGauges) { - localTemporaryGauges = new HashMap<String, TemporaryGauge>(temporaryGauges); + localTemporaryGauges = new HashMap<>(temporaryGauges); } final Iterator<Entry<String, ServiceRegistration>> it = localGaugeRegistrations.entrySet().iterator(); Modified: sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/QuartzJobExecutor.java URL: http://svn.apache.org/viewvc/sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/QuartzJobExecutor.java?rev=1805720&r1=1805719&r2=1805720&view=diff ============================================================================== --- sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/QuartzJobExecutor.java (original) +++ sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/QuartzJobExecutor.java Tue Aug 22 05:18:51 2017 @@ -27,9 +27,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; -import org.apache.sling.commons.metrics.Counter; -import org.apache.sling.commons.metrics.MetricsService; -import org.apache.sling.commons.metrics.Timer; import org.apache.sling.commons.scheduler.JobContext; import org.apache.sling.commons.scheduler.Scheduler; import org.quartz.Job; @@ -39,6 +36,10 @@ import org.quartz.JobExecutionException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.codahale.metrics.Counter; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; + /** * This component is responsible to launch a {@link org.apache.sling.commons.scheduler.Job} * or {@link Runnable} in a Quartz Scheduler. @@ -47,7 +48,7 @@ import org.slf4j.LoggerFactory; public class QuartzJobExecutor implements Job { static final int DEFAULT_SLOW_JOB_THRESHOLD_MILLIS = 1000; - + /** Is discovery available? */ public static final AtomicBoolean DISCOVERY_AVAILABLE = new AtomicBoolean(false); @@ -72,13 +73,13 @@ public class QuartzJobExecutor implement public final String providedName; public final String name; public final String[] runOn; - + // SLING-5965 : piggybacking metrics field onto JobDesc // to avoid having to create yet another object per job execution. // creating such an additional object would require a bit more JVM-GC. // but to keep JobDesc close to what it was originally intended for // ('describing a job') keeping everything additional private - private final MetricsService metricsService; + private final MetricRegistry metricRegistry; private final Counter runningJobsCounter; private final Counter overallRunningJobsCounter; private final Timer jobDurationTimer; @@ -91,21 +92,19 @@ public class QuartzJobExecutor implement this.name = (String) data.get(QuartzScheduler.DATA_MAP_NAME); this.providedName = (String)data.get(QuartzScheduler.DATA_MAP_PROVIDED_NAME); this.runOn = (String[])data.get(QuartzScheduler.DATA_MAP_RUN_ON); - + // initialize metrics fields final QuartzScheduler localQuartzScheduler = (QuartzScheduler) data.get(QuartzScheduler.DATA_MAP_QUARTZ_SCHEDULER); - MetricsService localMetricsService = null; + MetricRegistry localMetricsService = null; ConfigHolder localConfigHolder = null; if (localQuartzScheduler != null) { // shouldn't be null but for paranoia - localMetricsService = localQuartzScheduler.metricsService; + localMetricsService = localQuartzScheduler.metricsRegistry; localConfigHolder = localQuartzScheduler.configHolder; } - // localMetricsService might be null during deactivation - metricsService = localMetricsService == null ? MetricsService.NOOP : localMetricsService; // mainConfiguration might be null during deactivation slowThresholdMillis = localConfigHolder != null ? localConfigHolder.slowThresholdMillis() : DEFAULT_SLOW_JOB_THRESHOLD_MILLIS; - + String metricsSuffix = ""; final String filterName = MetricsHelper.deriveFilterName(localConfigHolder, job); if (filterName != null) { @@ -118,17 +117,29 @@ public class QuartzJobExecutor implement metricsSuffix = ".tp." + threadPoolName; } } - - runningJobsCounter = metricsService.counter(QuartzScheduler.METRICS_NAME_RUNNING_JOBS + metricsSuffix); - jobDurationTimer = metricsService.timer(QuartzScheduler.METRICS_NAME_TIMER + metricsSuffix); - overallRunningJobsCounter = metricsSuffix.length() == 0 ? null - : metricsService.counter(QuartzScheduler.METRICS_NAME_RUNNING_JOBS); + + if ( localMetricsService != null ) { + metricRegistry = localMetricsService; + runningJobsCounter = metricRegistry.counter(QuartzScheduler.METRICS_NAME_RUNNING_JOBS + metricsSuffix); + jobDurationTimer = metricRegistry.timer(QuartzScheduler.METRICS_NAME_TIMER + metricsSuffix); + overallRunningJobsCounter = metricsSuffix.length() == 0 ? null + : metricRegistry.counter(QuartzScheduler.METRICS_NAME_RUNNING_JOBS); + } else { + metricRegistry = null; + runningJobsCounter = null; + jobDurationTimer = null; + overallRunningJobsCounter = null; + } } - + private void measureJobStart() { // measure job start - if (overallRunningJobsCounter != null) overallRunningJobsCounter.increment(); - runningJobsCounter.increment(); + if (overallRunningJobsCounter != null) { + overallRunningJobsCounter.inc(); + } + if ( runningJobsCounter != null ) { + runningJobsCounter.inc(); + } jobStart = System.currentTimeMillis(); } @@ -137,9 +148,13 @@ public class QuartzJobExecutor implement // then measureJobStart was never invoked - hence not measuring anything return; } - - if (overallRunningJobsCounter != null) overallRunningJobsCounter.decrement(); - runningJobsCounter.decrement(); + + if (overallRunningJobsCounter != null) { + overallRunningJobsCounter.dec(); + } + if ( runningJobsCounter != null ) { + runningJobsCounter.dec(); + } final long elapsedMillis = System.currentTimeMillis() - jobStart; // depending on slowness either measure via a separate 'slow' or the normal timer // (and this triage can only be done by manual measuring) @@ -147,15 +162,19 @@ public class QuartzJobExecutor implement // if the job was slow we (only) add it to a separate '.slow.' timer // the idea being to not "pollute" the normal timer which would // get quite skewed metrics otherwise with slow jobs around - final String slowTimerName = QuartzScheduler.METRICS_NAME_TIMER + ".slow." - + MetricsHelper.asMetricsSuffix(this.name); - metricsService.timer(slowTimerName).update(elapsedMillis, TimeUnit.MILLISECONDS); + if ( metricRegistry != null ) { + final String slowTimerName = QuartzScheduler.METRICS_NAME_TIMER + ".slow." + + MetricsHelper.asMetricsSuffix(this.name); + metricRegistry.timer(slowTimerName).update(elapsedMillis, TimeUnit.MILLISECONDS); + } } else { // if the job was not slow, then measure it normally - jobDurationTimer.update(elapsedMillis, TimeUnit.MILLISECONDS); + if ( jobDurationTimer != null ) { + jobDurationTimer.update(elapsedMillis, TimeUnit.MILLISECONDS); + } } } - + public boolean isKnownJob() { return this.job != null && this.name != null; } Modified: sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/QuartzScheduler.java URL: http://svn.apache.org/viewvc/sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/QuartzScheduler.java?rev=1805720&r1=1805719&r2=1805720&view=diff ============================================================================== --- sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/QuartzScheduler.java (original) +++ sling/trunk/bundles/commons/scheduler/src/main/java/org/apache/sling/commons/scheduler/impl/QuartzScheduler.java Tue Aug 22 05:18:51 2017 @@ -25,7 +25,6 @@ import java.util.NoSuchElementException; import java.util.Set; import java.util.UUID; -import org.apache.sling.commons.metrics.MetricsService; import org.apache.sling.commons.scheduler.Job; import org.apache.sling.commons.scheduler.ScheduleOptions; import org.apache.sling.commons.scheduler.Scheduler; @@ -54,6 +53,8 @@ import org.quartz.impl.matchers.GroupMat import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.codahale.metrics.MetricRegistry; + /** * The quartz based implementation of the scheduler. * @@ -112,7 +113,7 @@ public class QuartzScheduler implements private ThreadPoolManager threadPoolManager; @Reference - MetricsService metricsService; + MetricRegistry metricsRegistry; ConfigHolder configHolder;