Author: slaurent
Date: Fri May 16 20:05:08 2014
New Revision: 1595331
URL: http://svn.apache.org/r1595331
Log:
Fix https://issues.apache.org/bugzilla/show_bug.cgi?id=56526
Improved StuckThreadDetectionValve to allow to interrupt stuck threads.
Added:
tomcat/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java
Modified:
tomcat/trunk/java/org/apache/catalina/valves/LocalStrings.properties
tomcat/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java
tomcat/trunk/webapps/docs/changelog.xml
tomcat/trunk/webapps/docs/config/valve.xml
Modified: tomcat/trunk/java/org/apache/catalina/valves/LocalStrings.properties
URL:
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/valves/LocalStrings.properties?rev=1595331&r1=1595330&r2=1595331&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/catalina/valves/LocalStrings.properties
(original)
+++ tomcat/trunk/java/org/apache/catalina/valves/LocalStrings.properties Fri
May 16 20:05:08 2014
@@ -52,6 +52,7 @@ sslValve.invalidProvider=The SSL provide
#Stuck thread detection Valve
stuckThreadDetectionValve.notifyStuckThreadDetected=Thread "{0}" (id={6}) has
been active for {1} milliseconds (since {2}) to serve the same request for {4}
and may be stuck (configured threshold for this StuckThreadDetectionValve is
{5} seconds). There is/are {3} thread(s) in total that are monitored by this
Valve and may be stuck.
stuckThreadDetectionValve.notifyStuckThreadCompleted=Thread "{0}" (id={3}) was
previously reported to be stuck but has completed. It was active for
approximately {1} milliseconds.{2,choice,0#|0< There is/are still {2} thread(s)
that are monitored by this Valve and may be stuck.}
+stuckThreadDetectionValve.notifyStuckThreadInterrupted=Thread "{0}" (id={5})
has been interrupted because it was active for {1} milliseconds (since {2}) to
serve the same request for {3} and was probably stuck (configured interruption
threshold for this StuckThreadDetectionValve is {4} seconds).
# HTTP status reports
# All status codes registered with IANA can be found at
Modified:
tomcat/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java?rev=1595331&r1=1595330&r2=1595331&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java
(original)
+++ tomcat/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java
Fri May 16 20:05:08 2014
@@ -23,7 +23,9 @@ import java.util.List;
import java.util.Queue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.Semaphore;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
import javax.servlet.ServletException;
@@ -57,11 +59,21 @@ public class StuckThreadDetectionValve e
private final AtomicInteger stuckCount = new AtomicInteger(0);
/**
+ * Keeps count of the number of stuck threads that have been interruoted
+ */
+ private AtomicLong interruptedThreadsCount = new AtomicLong();
+
+ /**
* In seconds. Default 600 (10 minutes).
*/
private int threshold = 600;
/**
+ * In seconds. Default is -1 to disable interruption.
+ */
+ private int interruptThreadThreshold;
+
+ /**
* The only references we keep to actual running Thread objects are in
* this Map (which is automatically cleaned in invoke()s finally clause).
* That way, Threads can be GC'ed, eventhough the Valve still thinks they
@@ -76,7 +88,7 @@ public class StuckThreadDetectionValve e
new ConcurrentLinkedQueue<>();
/**
- * Specify the threshold (in seconds) used when checking for stuck threads.
+ * Specifies the threshold (in seconds) used when checking for stuck
threads.
* If <=0, the detection is disabled. The default is 600 seconds.
*
* @param threshold
@@ -95,6 +107,22 @@ public class StuckThreadDetectionValve e
}
+ public int getInterruptThreadThreshold() {
+ return interruptThreadThreshold;
+ }
+
+ /**
+ * Specifies the threshold (in seconds) before stuck threads are
interrupted.
+ * If <=0, the interruption is disabled. The default is -1.
+ * If >=0, the value must actually be >= threshold.
+ *
+ * @param interruptThreadThreshold
+ * The new thread interruption threshold in seconds
+ */
+ public void setInterruptThreadThreshold(int interruptThreadThreshold) {
+ this.interruptThreadThreshold = interruptThreadThreshold;
+ }
+
/**
* Required to enable async support.
*/
@@ -173,7 +201,7 @@ public class StuckThreadDetectionValve e
requestUrl.append(request.getQueryString());
}
MonitoredThread monitoredThread = new
MonitoredThread(Thread.currentThread(),
- requestUrl.toString());
+ requestUrl.toString(), interruptThreadThreshold > 0);
activeThreads.put(key, monitoredThread);
try {
@@ -181,6 +209,9 @@ public class StuckThreadDetectionValve e
} finally {
activeThreads.remove(key);
if (monitoredThread.markAsDone() == MonitoredThreadState.STUCK) {
+ if(monitoredThread.wasInterrupted()) {
+ interruptedThreadsCount.incrementAndGet();
+ }
completedStuckThreadsQueue.add(
new CompletedStuckThread(monitoredThread.getThread(),
monitoredThread.getActiveTimeInMillis()));
@@ -192,7 +223,7 @@ public class StuckThreadDetectionValve e
public void backgroundProcess() {
super.backgroundProcess();
- long thresholdInMillis = threshold * 1000;
+ long thresholdInMillis = threshold * 1000L;
// Check monitored threads, being careful that the request might have
// completed by the time we examine it
@@ -203,6 +234,9 @@ public class StuckThreadDetectionValve e
int numStuckThreads = stuckCount.incrementAndGet();
notifyStuckThreadDetected(monitoredThread, activeTime,
numStuckThreads);
}
+ if(interruptThreadThreshold > 0 && activeTime >=
interruptThreadThreshold*1000L) {
+ monitoredThread.interruptIfStuck(interruptThreadThreshold);
+ }
}
// Check if any threads previously reported as stuck, have finished.
for (CompletedStuckThread completedStuckThread =
completedStuckThreadsQueue.poll();
@@ -238,6 +272,11 @@ public class StuckThreadDetectionValve e
return nameList.toArray(new String[nameList.size()]);
}
+ public long getInterruptedThreadsCount() {
+ return interruptedThreadsCount.get();
+ }
+
+
private static class MonitoredThread {
/**
@@ -248,11 +287,27 @@ public class StuckThreadDetectionValve e
private final long start;
private final AtomicInteger state = new AtomicInteger(
MonitoredThreadState.RUNNING.ordinal());
+ /**
+ * Semaphore to synchronize the stuck thread with the
background-process
+ * thread. It's not used if the interruption feature is not active.
+ */
+ private final Semaphore interruptionSemaphore;
+ /**
+ * Set to true after the thread is interrupted. No need to make it
+ * volatile since it is accessed right after acquiring the semaphore.
+ */
+ private boolean interrupted;
- public MonitoredThread(Thread thread, String requestUri) {
+ public MonitoredThread(Thread thread, String requestUri,
+ boolean interruptible) {
this.thread = thread;
this.requestUri = requestUri;
this.start = System.currentTimeMillis();
+ if (interruptible) {
+ interruptionSemaphore = new Semaphore(1);
+ } else {
+ interruptionSemaphore = null;
+ }
}
public Thread getThread() {
@@ -278,12 +333,63 @@ public class StuckThreadDetectionValve e
public MonitoredThreadState markAsDone() {
int val =
this.state.getAndSet(MonitoredThreadState.DONE.ordinal());
- return MonitoredThreadState.values()[val];
+ MonitoredThreadState threadState =
MonitoredThreadState.values()[val];
+
+ if (threadState == MonitoredThreadState.STUCK
+ && interruptionSemaphore != null) {
+ try {
+ // use the semaphore to synchronize with the background
thread
+ // which might try to interrupt this current thread.
+ // Otherwise, the current thread might be interrupted after
+ // going out from here, maybe already serving a new request
+ this.interruptionSemaphore.acquire();
+ } catch (InterruptedException e) {
+ log.debug(
+ "thread interrupted after the request is finished,
ignoring",
+ e);
+ }
+ // no need to release the semaphore, it will be GCed
+ }
+ //else the request went through before being marked as stuck, no
need
+ //to sync agains the semaphore
+ return threadState;
}
boolean isMarkedAsStuck() {
return this.state.get() == MonitoredThreadState.STUCK.ordinal();
}
+
+ public boolean interruptIfStuck(long interruptThreadThreshold) {
+ if (!isMarkedAsStuck() || interruptionSemaphore == null
+ || !this.interruptionSemaphore.tryAcquire()) {
+ // if the semaphore is already acquired, it means that the
+ // request thread got unstuck before we interrupted it
+ return false;
+ }
+ try {
+ if (log.isWarnEnabled()) {
+ String msg = sm.getString(
+
"stuckThreadDetectionValve.notifyStuckThreadInterrupted",
+ this.getThread().getName(),
+ Long.valueOf(getActiveTimeInMillis()),
+ this.getStartTime(), this.getRequestUri(),
+ Long.valueOf(interruptThreadThreshold),
+ String.valueOf(this.getThread().getId()));
+ Throwable th = new Throwable();
+ th.setStackTrace(this.getThread().getStackTrace());
+ log.warn(msg, th);
+ }
+ this.thread.interrupt();
+ } finally {
+ this.interrupted = true;
+ this.interruptionSemaphore.release();
+ }
+ return true;
+ }
+
+ public boolean wasInterrupted() {
+ return interrupted;
+ }
}
private static class CompletedStuckThread {
Added:
tomcat/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java?rev=1595331&view=auto
==============================================================================
---
tomcat/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java
(added)
+++
tomcat/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java
Fri May 16 20:05:08 2014
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.catalina.valves;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.catalina.Wrapper;
+import org.apache.catalina.core.StandardContext;
+import org.apache.catalina.startup.Tomcat;
+import org.apache.catalina.startup.TomcatBaseTest;
+import org.apache.tomcat.util.buf.ByteChunk;
+
+public class TestStuckThreadDetectionValve extends TomcatBaseTest {
+ private StandardContext context;
+ private Tomcat tomcat;
+
+ @Before
+ public void setup() {
+ tomcat = getTomcatInstance();
+ File docBase = new File(System.getProperty("java.io.tmpdir"));
+ context = (StandardContext) tomcat.addContext("",
+ docBase.getAbsolutePath());
+ }
+
+ @Test
+ public void testDetection() throws Exception {
+ // second, we test the actual effect of the flag on the startup
+ StuckingServlet stuckingServlet = new StuckingServlet(5000L);
+ Wrapper servlet = Tomcat.addServlet(context, "myservlet",
+ stuckingServlet);
+ servlet.addMapping("/myservlet");
+
+ StuckThreadDetectionValve valve = new StuckThreadDetectionValve();
+ valve.setThreshold(2);
+ context.addValve(valve);
+ context.setBackgroundProcessorDelay(1);
+ tomcat.start();
+
+ Assert.assertEquals(0, valve.getStuckThreadIds().length);
+
+ final ByteChunk result = new ByteChunk();
+ Thread asyncThread = new Thread() {
+ @Override
+ public void run() {
+ try {
+ getUrl("http://localhost:" + getPort() + "/myservlet",
+ result, null);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ };
+ asyncThread.start();
+
+ Thread.sleep(1000L);
+ Assert.assertEquals(0, valve.getStuckThreadIds().length);
+
+ Thread.sleep(2000L);
+ Assert.assertEquals(1, valve.getStuckThreadIds().length);
+
+ asyncThread.join();
+ Assert.assertFalse(stuckingServlet.wasInterrupted);
+ Assert.assertTrue(result.toString().startsWith("OK"));
+ }
+
+ @Test
+ public void testInterruption() throws Exception {
+ // second, we test the actual effect of the flag on the startup
+ StuckingServlet stuckingServlet = new StuckingServlet(
+ TimeUnit.SECONDS.toMillis(20L));
+ Wrapper servlet = Tomcat.addServlet(context, "myservlet",
+ stuckingServlet);
+ servlet.addMapping("/myservlet");
+
+ StuckThreadDetectionValve valve = new StuckThreadDetectionValve();
+ valve.setThreshold(2);
+ valve.setInterruptThreadThreshold(4);
+ context.addValve(valve);
+ context.setBackgroundProcessorDelay(1);
+ tomcat.start();
+
+ Assert.assertEquals(0, valve.getStuckThreadIds().length);
+
+ final ByteChunk result = new ByteChunk();
+ Thread asyncThread = new Thread() {
+ @Override
+ public void run() {
+ try {
+ getUrl("http://localhost:" + getPort() + "/myservlet",
+ result, null);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ };
+ asyncThread.start();
+
+ Thread.sleep(1000L);
+ Assert.assertEquals(0, valve.getStuckThreadIds().length);
+
+ Thread.sleep(3000L);
+ Assert.assertEquals(1, valve.getStuckThreadIds().length);
+
+ Thread.sleep(3000L);
+ Assert.assertTrue(stuckingServlet.wasInterrupted);
+ Assert.assertEquals(0, valve.getStuckThreadIds().length);
+
+ asyncThread.join();
+ Assert.assertTrue(result.toString().startsWith("OK"));
+ }
+
+ private class StuckingServlet extends HttpServlet {
+
+ private static final long serialVersionUID = 1L;
+ private final long delay;
+ boolean wasInterrupted = false;
+
+ StuckingServlet(long delay) {
+ this.delay = delay;
+ }
+
+ @Override
+ protected void doGet(HttpServletRequest req, HttpServletResponse resp)
+ throws IOException {
+ try {
+ Thread.sleep(delay);
+ } catch (InterruptedException e) {
+ wasInterrupted = true;
+ }
+ resp.setContentType("text/plain");
+ resp.getWriter().println("OK");
+ }
+
+ }
+}
Modified: tomcat/trunk/webapps/docs/changelog.xml
URL:
http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/changelog.xml?rev=1595331&r1=1595330&r2=1595331&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/changelog.xml (original)
+++ tomcat/trunk/webapps/docs/changelog.xml Fri May 16 20:05:08 2014
@@ -45,24 +45,29 @@
issues to not "pop up" wrt. others).
-->
<section name="Tomcat 8.0.8 (markt)">
- <subsection name="Catalina">
- <changelog>
- <fix>
- <bug>56536</bug>: Ensure that
- <code>HttpSessionBindingListener.valueUnbound()</code> uses the correct
- class loader when the <code>SingleSignOn</code> valve is used. (markt)
- </fix>
- </changelog>
- </subsection>
- <subsection name="Jasper">
- <changelog>
- <fix>
- <bug>56529</bug>: Avoid <code>NoSuchElementException</code> while
handling
- attributes with empty string value in custom tags. Patch provided by
- Hariprasad Manchi. (violetagg)
+ <subsection name="Catalina">
+ <changelog>
+ <fix>
+ <bug>56536</bug>: Ensure that
+ <code>HttpSessionBindingListener.valueUnbound()</code> uses the correct
+ class loader when the <code>SingleSignOn</code> valve is used. (markt)
</fix>
- </changelog>
- </subsection>
+ <add>
+ <bug>56526</bug>: Improved the <code>StuckThreadDetectionValve</code>
to
+ optionally interrupt stuck threads to attempt to unblock them.
+ (slaurent)
+ </add>
+ </changelog>
+ </subsection>
+ <subsection name="Jasper">
+ <changelog>
+ <fix>
+ <bug>56529</bug>: Avoid <code>NoSuchElementException</code> while
handling
+ attributes with empty string value in custom tags. Patch provided by
+ Hariprasad Manchi. (violetagg)
+ </fix>
+ </changelog>
+ </subsection>
</section>
<section name="Tomcat 8.0.7 (markt)" rtext="not released">
<subsection name="Catalina">
Modified: tomcat/trunk/webapps/docs/config/valve.xml
URL:
http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/config/valve.xml?rev=1595331&r1=1595330&r2=1595331&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/config/valve.xml (original)
+++ tomcat/trunk/webapps/docs/config/valve.xml Fri May 16 20:05:08 2014
@@ -1548,10 +1548,12 @@
<subsection name="Introduction">
- <p>This valve allows to detect requests that take a long time to process,
which might
- indicate that the thread that is processing it is stuck.</p>
- <p>When such a request is detected, the current stack trace of its thread
is written
- to Tomcat log with a WARN level.</p>
+ <p>This valve allows to detect requests that take a long time to process,
+ which might indicate that the thread that is processing it is stuck.
+ Additionally it can optionally interrupt such threads to try and unblock
+ them.</p>
+ <p>When such a request is detected, the current stack trace of its thread
is
+ written to Tomcat log with a WARN level.</p>
<p>The IDs and names of the stuck threads are available through JMX in the
<code>stuckThreadIds</code> and <code>stuckThreadNames</code> attributes.
The IDs can be used with the standard Threading JVM MBean
@@ -1576,9 +1578,20 @@
<attribute name="threshold" required="false">
<p>Minimum duration in seconds after which a thread is considered
stuck.
Default is 600 seconds. If set to 0, the detection is disabled.</p>
- <p>Note: since the detection is done in the background thread of the
Container
- (Engine, Host or Context) declaring this Valve, the threshold should
be higher
- than the <code>backgroundProcessorDelay</code> of this Container.</p>
+ <p>Note: since the detection (and optional interruption) is done in the
+ background thread of the Container (Engine, Host or Context) declaring
+ this Valve, the threshold should be higher than the
+ <code>backgroundProcessorDelay</code> of this Container.</p>
+ </attribute>
+
+ <attribute name="interruptThreadThreshold" required="false">
+ <p>Minimum duration in seconds after which a stuck thread should be
+ interrupted to attempt to "free" it.</p>
+ <p>Note that there's no guarantee that the thread will get unstuck.
+ This usually works well for threads stuck on I/O or locks, but is
+ probably useless in case of infinite loops.</p>
+ <p>Default is -1 which disables the feature. To enable it, the value
+ must be greater or equal to <code>threshold</code>.</p>
</attribute>
</attributes>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]