Author: slaurent Date: Fri May 16 20:05:08 2014 New Revision: 1595331 URL: http://svn.apache.org/r1595331 Log: Fix https://issues.apache.org/bugzilla/show_bug.cgi?id=56526 Improved StuckThreadDetectionValve to allow to interrupt stuck threads.
Added: tomcat/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java Modified: tomcat/trunk/java/org/apache/catalina/valves/LocalStrings.properties tomcat/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java tomcat/trunk/webapps/docs/changelog.xml tomcat/trunk/webapps/docs/config/valve.xml Modified: tomcat/trunk/java/org/apache/catalina/valves/LocalStrings.properties URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/valves/LocalStrings.properties?rev=1595331&r1=1595330&r2=1595331&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/catalina/valves/LocalStrings.properties (original) +++ tomcat/trunk/java/org/apache/catalina/valves/LocalStrings.properties Fri May 16 20:05:08 2014 @@ -52,6 +52,7 @@ sslValve.invalidProvider=The SSL provide #Stuck thread detection Valve stuckThreadDetectionValve.notifyStuckThreadDetected=Thread "{0}" (id={6}) has been active for {1} milliseconds (since {2}) to serve the same request for {4} and may be stuck (configured threshold for this StuckThreadDetectionValve is {5} seconds). There is/are {3} thread(s) in total that are monitored by this Valve and may be stuck. stuckThreadDetectionValve.notifyStuckThreadCompleted=Thread "{0}" (id={3}) was previously reported to be stuck but has completed. It was active for approximately {1} milliseconds.{2,choice,0#|0< There is/are still {2} thread(s) that are monitored by this Valve and may be stuck.} +stuckThreadDetectionValve.notifyStuckThreadInterrupted=Thread "{0}" (id={5}) has been interrupted because it was active for {1} milliseconds (since {2}) to serve the same request for {3} and was probably stuck (configured interruption threshold for this StuckThreadDetectionValve is {4} seconds). # HTTP status reports # All status codes registered with IANA can be found at Modified: tomcat/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java?rev=1595331&r1=1595330&r2=1595331&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java (original) +++ tomcat/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java Fri May 16 20:05:08 2014 @@ -23,7 +23,9 @@ import java.util.List; import java.util.Queue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.Semaphore; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import javax.servlet.ServletException; @@ -57,11 +59,21 @@ public class StuckThreadDetectionValve e private final AtomicInteger stuckCount = new AtomicInteger(0); /** + * Keeps count of the number of stuck threads that have been interruoted + */ + private AtomicLong interruptedThreadsCount = new AtomicLong(); + + /** * In seconds. Default 600 (10 minutes). */ private int threshold = 600; /** + * In seconds. Default is -1 to disable interruption. + */ + private int interruptThreadThreshold; + + /** * The only references we keep to actual running Thread objects are in * this Map (which is automatically cleaned in invoke()s finally clause). * That way, Threads can be GC'ed, eventhough the Valve still thinks they @@ -76,7 +88,7 @@ public class StuckThreadDetectionValve e new ConcurrentLinkedQueue<>(); /** - * Specify the threshold (in seconds) used when checking for stuck threads. + * Specifies the threshold (in seconds) used when checking for stuck threads. * If <=0, the detection is disabled. The default is 600 seconds. * * @param threshold @@ -95,6 +107,22 @@ public class StuckThreadDetectionValve e } + public int getInterruptThreadThreshold() { + return interruptThreadThreshold; + } + + /** + * Specifies the threshold (in seconds) before stuck threads are interrupted. + * If <=0, the interruption is disabled. The default is -1. + * If >=0, the value must actually be >= threshold. + * + * @param interruptThreadThreshold + * The new thread interruption threshold in seconds + */ + public void setInterruptThreadThreshold(int interruptThreadThreshold) { + this.interruptThreadThreshold = interruptThreadThreshold; + } + /** * Required to enable async support. */ @@ -173,7 +201,7 @@ public class StuckThreadDetectionValve e requestUrl.append(request.getQueryString()); } MonitoredThread monitoredThread = new MonitoredThread(Thread.currentThread(), - requestUrl.toString()); + requestUrl.toString(), interruptThreadThreshold > 0); activeThreads.put(key, monitoredThread); try { @@ -181,6 +209,9 @@ public class StuckThreadDetectionValve e } finally { activeThreads.remove(key); if (monitoredThread.markAsDone() == MonitoredThreadState.STUCK) { + if(monitoredThread.wasInterrupted()) { + interruptedThreadsCount.incrementAndGet(); + } completedStuckThreadsQueue.add( new CompletedStuckThread(monitoredThread.getThread(), monitoredThread.getActiveTimeInMillis())); @@ -192,7 +223,7 @@ public class StuckThreadDetectionValve e public void backgroundProcess() { super.backgroundProcess(); - long thresholdInMillis = threshold * 1000; + long thresholdInMillis = threshold * 1000L; // Check monitored threads, being careful that the request might have // completed by the time we examine it @@ -203,6 +234,9 @@ public class StuckThreadDetectionValve e int numStuckThreads = stuckCount.incrementAndGet(); notifyStuckThreadDetected(monitoredThread, activeTime, numStuckThreads); } + if(interruptThreadThreshold > 0 && activeTime >= interruptThreadThreshold*1000L) { + monitoredThread.interruptIfStuck(interruptThreadThreshold); + } } // Check if any threads previously reported as stuck, have finished. for (CompletedStuckThread completedStuckThread = completedStuckThreadsQueue.poll(); @@ -238,6 +272,11 @@ public class StuckThreadDetectionValve e return nameList.toArray(new String[nameList.size()]); } + public long getInterruptedThreadsCount() { + return interruptedThreadsCount.get(); + } + + private static class MonitoredThread { /** @@ -248,11 +287,27 @@ public class StuckThreadDetectionValve e private final long start; private final AtomicInteger state = new AtomicInteger( MonitoredThreadState.RUNNING.ordinal()); + /** + * Semaphore to synchronize the stuck thread with the background-process + * thread. It's not used if the interruption feature is not active. + */ + private final Semaphore interruptionSemaphore; + /** + * Set to true after the thread is interrupted. No need to make it + * volatile since it is accessed right after acquiring the semaphore. + */ + private boolean interrupted; - public MonitoredThread(Thread thread, String requestUri) { + public MonitoredThread(Thread thread, String requestUri, + boolean interruptible) { this.thread = thread; this.requestUri = requestUri; this.start = System.currentTimeMillis(); + if (interruptible) { + interruptionSemaphore = new Semaphore(1); + } else { + interruptionSemaphore = null; + } } public Thread getThread() { @@ -278,12 +333,63 @@ public class StuckThreadDetectionValve e public MonitoredThreadState markAsDone() { int val = this.state.getAndSet(MonitoredThreadState.DONE.ordinal()); - return MonitoredThreadState.values()[val]; + MonitoredThreadState threadState = MonitoredThreadState.values()[val]; + + if (threadState == MonitoredThreadState.STUCK + && interruptionSemaphore != null) { + try { + // use the semaphore to synchronize with the background thread + // which might try to interrupt this current thread. + // Otherwise, the current thread might be interrupted after + // going out from here, maybe already serving a new request + this.interruptionSemaphore.acquire(); + } catch (InterruptedException e) { + log.debug( + "thread interrupted after the request is finished, ignoring", + e); + } + // no need to release the semaphore, it will be GCed + } + //else the request went through before being marked as stuck, no need + //to sync agains the semaphore + return threadState; } boolean isMarkedAsStuck() { return this.state.get() == MonitoredThreadState.STUCK.ordinal(); } + + public boolean interruptIfStuck(long interruptThreadThreshold) { + if (!isMarkedAsStuck() || interruptionSemaphore == null + || !this.interruptionSemaphore.tryAcquire()) { + // if the semaphore is already acquired, it means that the + // request thread got unstuck before we interrupted it + return false; + } + try { + if (log.isWarnEnabled()) { + String msg = sm.getString( + "stuckThreadDetectionValve.notifyStuckThreadInterrupted", + this.getThread().getName(), + Long.valueOf(getActiveTimeInMillis()), + this.getStartTime(), this.getRequestUri(), + Long.valueOf(interruptThreadThreshold), + String.valueOf(this.getThread().getId())); + Throwable th = new Throwable(); + th.setStackTrace(this.getThread().getStackTrace()); + log.warn(msg, th); + } + this.thread.interrupt(); + } finally { + this.interrupted = true; + this.interruptionSemaphore.release(); + } + return true; + } + + public boolean wasInterrupted() { + return interrupted; + } } private static class CompletedStuckThread { Added: tomcat/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java URL: http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java?rev=1595331&view=auto ============================================================================== --- tomcat/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java (added) +++ tomcat/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java Fri May 16 20:05:08 2014 @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.catalina.valves; + +import java.io.File; +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.catalina.Wrapper; +import org.apache.catalina.core.StandardContext; +import org.apache.catalina.startup.Tomcat; +import org.apache.catalina.startup.TomcatBaseTest; +import org.apache.tomcat.util.buf.ByteChunk; + +public class TestStuckThreadDetectionValve extends TomcatBaseTest { + private StandardContext context; + private Tomcat tomcat; + + @Before + public void setup() { + tomcat = getTomcatInstance(); + File docBase = new File(System.getProperty("java.io.tmpdir")); + context = (StandardContext) tomcat.addContext("", + docBase.getAbsolutePath()); + } + + @Test + public void testDetection() throws Exception { + // second, we test the actual effect of the flag on the startup + StuckingServlet stuckingServlet = new StuckingServlet(5000L); + Wrapper servlet = Tomcat.addServlet(context, "myservlet", + stuckingServlet); + servlet.addMapping("/myservlet"); + + StuckThreadDetectionValve valve = new StuckThreadDetectionValve(); + valve.setThreshold(2); + context.addValve(valve); + context.setBackgroundProcessorDelay(1); + tomcat.start(); + + Assert.assertEquals(0, valve.getStuckThreadIds().length); + + final ByteChunk result = new ByteChunk(); + Thread asyncThread = new Thread() { + @Override + public void run() { + try { + getUrl("http://localhost:" + getPort() + "/myservlet", + result, null); + } catch (IOException e) { + e.printStackTrace(); + } + } + + }; + asyncThread.start(); + + Thread.sleep(1000L); + Assert.assertEquals(0, valve.getStuckThreadIds().length); + + Thread.sleep(2000L); + Assert.assertEquals(1, valve.getStuckThreadIds().length); + + asyncThread.join(); + Assert.assertFalse(stuckingServlet.wasInterrupted); + Assert.assertTrue(result.toString().startsWith("OK")); + } + + @Test + public void testInterruption() throws Exception { + // second, we test the actual effect of the flag on the startup + StuckingServlet stuckingServlet = new StuckingServlet( + TimeUnit.SECONDS.toMillis(20L)); + Wrapper servlet = Tomcat.addServlet(context, "myservlet", + stuckingServlet); + servlet.addMapping("/myservlet"); + + StuckThreadDetectionValve valve = new StuckThreadDetectionValve(); + valve.setThreshold(2); + valve.setInterruptThreadThreshold(4); + context.addValve(valve); + context.setBackgroundProcessorDelay(1); + tomcat.start(); + + Assert.assertEquals(0, valve.getStuckThreadIds().length); + + final ByteChunk result = new ByteChunk(); + Thread asyncThread = new Thread() { + @Override + public void run() { + try { + getUrl("http://localhost:" + getPort() + "/myservlet", + result, null); + } catch (IOException e) { + e.printStackTrace(); + } + } + + }; + asyncThread.start(); + + Thread.sleep(1000L); + Assert.assertEquals(0, valve.getStuckThreadIds().length); + + Thread.sleep(3000L); + Assert.assertEquals(1, valve.getStuckThreadIds().length); + + Thread.sleep(3000L); + Assert.assertTrue(stuckingServlet.wasInterrupted); + Assert.assertEquals(0, valve.getStuckThreadIds().length); + + asyncThread.join(); + Assert.assertTrue(result.toString().startsWith("OK")); + } + + private class StuckingServlet extends HttpServlet { + + private static final long serialVersionUID = 1L; + private final long delay; + boolean wasInterrupted = false; + + StuckingServlet(long delay) { + this.delay = delay; + } + + @Override + protected void doGet(HttpServletRequest req, HttpServletResponse resp) + throws IOException { + try { + Thread.sleep(delay); + } catch (InterruptedException e) { + wasInterrupted = true; + } + resp.setContentType("text/plain"); + resp.getWriter().println("OK"); + } + + } +} Modified: tomcat/trunk/webapps/docs/changelog.xml URL: http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/changelog.xml?rev=1595331&r1=1595330&r2=1595331&view=diff ============================================================================== --- tomcat/trunk/webapps/docs/changelog.xml (original) +++ tomcat/trunk/webapps/docs/changelog.xml Fri May 16 20:05:08 2014 @@ -45,24 +45,29 @@ issues to not "pop up" wrt. others). --> <section name="Tomcat 8.0.8 (markt)"> - <subsection name="Catalina"> - <changelog> - <fix> - <bug>56536</bug>: Ensure that - <code>HttpSessionBindingListener.valueUnbound()</code> uses the correct - class loader when the <code>SingleSignOn</code> valve is used. (markt) - </fix> - </changelog> - </subsection> - <subsection name="Jasper"> - <changelog> - <fix> - <bug>56529</bug>: Avoid <code>NoSuchElementException</code> while handling - attributes with empty string value in custom tags. Patch provided by - Hariprasad Manchi. (violetagg) + <subsection name="Catalina"> + <changelog> + <fix> + <bug>56536</bug>: Ensure that + <code>HttpSessionBindingListener.valueUnbound()</code> uses the correct + class loader when the <code>SingleSignOn</code> valve is used. (markt) </fix> - </changelog> - </subsection> + <add> + <bug>56526</bug>: Improved the <code>StuckThreadDetectionValve</code> to + optionally interrupt stuck threads to attempt to unblock them. + (slaurent) + </add> + </changelog> + </subsection> + <subsection name="Jasper"> + <changelog> + <fix> + <bug>56529</bug>: Avoid <code>NoSuchElementException</code> while handling + attributes with empty string value in custom tags. Patch provided by + Hariprasad Manchi. (violetagg) + </fix> + </changelog> + </subsection> </section> <section name="Tomcat 8.0.7 (markt)" rtext="not released"> <subsection name="Catalina"> Modified: tomcat/trunk/webapps/docs/config/valve.xml URL: http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/config/valve.xml?rev=1595331&r1=1595330&r2=1595331&view=diff ============================================================================== --- tomcat/trunk/webapps/docs/config/valve.xml (original) +++ tomcat/trunk/webapps/docs/config/valve.xml Fri May 16 20:05:08 2014 @@ -1548,10 +1548,12 @@ <subsection name="Introduction"> - <p>This valve allows to detect requests that take a long time to process, which might - indicate that the thread that is processing it is stuck.</p> - <p>When such a request is detected, the current stack trace of its thread is written - to Tomcat log with a WARN level.</p> + <p>This valve allows to detect requests that take a long time to process, + which might indicate that the thread that is processing it is stuck. + Additionally it can optionally interrupt such threads to try and unblock + them.</p> + <p>When such a request is detected, the current stack trace of its thread is + written to Tomcat log with a WARN level.</p> <p>The IDs and names of the stuck threads are available through JMX in the <code>stuckThreadIds</code> and <code>stuckThreadNames</code> attributes. The IDs can be used with the standard Threading JVM MBean @@ -1576,9 +1578,20 @@ <attribute name="threshold" required="false"> <p>Minimum duration in seconds after which a thread is considered stuck. Default is 600 seconds. If set to 0, the detection is disabled.</p> - <p>Note: since the detection is done in the background thread of the Container - (Engine, Host or Context) declaring this Valve, the threshold should be higher - than the <code>backgroundProcessorDelay</code> of this Container.</p> + <p>Note: since the detection (and optional interruption) is done in the + background thread of the Container (Engine, Host or Context) declaring + this Valve, the threshold should be higher than the + <code>backgroundProcessorDelay</code> of this Container.</p> + </attribute> + + <attribute name="interruptThreadThreshold" required="false"> + <p>Minimum duration in seconds after which a stuck thread should be + interrupted to attempt to "free" it.</p> + <p>Note that there's no guarantee that the thread will get unstuck. + This usually works well for threads stuck on I/O or locks, but is + probably useless in case of infinite loops.</p> + <p>Default is -1 which disables the feature. To enable it, the value + must be greater or equal to <code>threshold</code>.</p> </attribute> </attributes> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org