Author: markt
Date: Mon Apr 23 19:48:11 2018
New Revision: 1829934
URL: http://svn.apache.org/viewvc?rev=1829934&view=rev
Log:
Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=62297
Enable the CrawlerSessionManagerValve to correctly handle bots that crawl
multiple hosts and/or web applications when the Valve is configured on a Host
or an Engine.
Modified:
tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java
tomcat/trunk/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java
tomcat/trunk/webapps/docs/changelog.xml
tomcat/trunk/webapps/docs/config/valve.xml
Modified:
tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java?rev=1829934&r1=1829933&r2=1829934&view=diff
==============================================================================
---
tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java
(original)
+++
tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java
Mon Apr 23 19:48:11 2018
@@ -27,6 +27,8 @@ import javax.servlet.http.HttpSession;
import javax.servlet.http.HttpSessionBindingEvent;
import javax.servlet.http.HttpSessionBindingListener;
+import org.apache.catalina.Context;
+import org.apache.catalina.Host;
import org.apache.catalina.LifecycleException;
import org.apache.catalina.connector.Request;
import org.apache.catalina.connector.Response;
@@ -44,8 +46,8 @@ public class CrawlerSessionManagerValve
private static final Log log =
LogFactory.getLog(CrawlerSessionManagerValve.class);
- private final Map<String, String> clientIpSessionId = new
ConcurrentHashMap<>();
- private final Map<String, String> sessionIdClientIp = new
ConcurrentHashMap<>();
+ private final Map<String, String> clientIdSessionId = new
ConcurrentHashMap<>();
+ private final Map<String, String> sessionIdClientId = new
ConcurrentHashMap<>();
private String crawlerUserAgents = ".*[bB]ot.*|.*Yahoo!
Slurp.*|.*Feedfetcher-Google.*";
private Pattern uaPattern = null;
@@ -55,6 +57,10 @@ public class CrawlerSessionManagerValve
private int sessionInactiveInterval = 60;
+ private boolean isHostAware = true;
+
+ private boolean isContextAware = true;
+
/**
* Specifies a default constructor so async support can be configured.
@@ -134,7 +140,27 @@ public class CrawlerSessionManagerValve
public Map<String, String> getClientIpSessionId() {
- return clientIpSessionId;
+ return clientIdSessionId;
+ }
+
+
+ public boolean isHostAware() {
+ return isHostAware;
+ }
+
+
+ public void setHostAware(boolean isHostAware) {
+ this.isHostAware = isHostAware;
+ }
+
+
+ public boolean isContextAware() {
+ return isContextAware;
+ }
+
+
+ public void setContextAware(boolean isContextAware) {
+ this.isContextAware = isContextAware;
}
@@ -152,9 +178,10 @@ public class CrawlerSessionManagerValve
boolean isBot = false;
String sessionId = null;
String clientIp = request.getRemoteAddr();
+ String clientIdentifier = getClientIdentifier(request.getHost(),
request.getContext(), clientIp);
if (log.isDebugEnabled()) {
- log.debug(request.hashCode() + ": ClientIp=" + clientIp + ",
RequestedSessionId="
+ log.debug(request.hashCode() + ": ClientIdentifier=" +
clientIdentifier + ", RequestedSessionId="
+ request.getRequestedSessionId());
}
@@ -194,7 +221,7 @@ public class CrawlerSessionManagerValve
// If this is a bot, is the session ID known?
if (isBot) {
- sessionId = clientIpSessionId.get(clientIp);
+ sessionId = clientIdSessionId.get(clientIdentifier);
if (sessionId != null) {
request.setRequestedSessionId(sessionId);
if (log.isDebugEnabled()) {
@@ -211,8 +238,8 @@ public class CrawlerSessionManagerValve
// Has bot just created a session, if so make a note of it
HttpSession s = request.getSession(false);
if (s != null) {
- clientIpSessionId.put(clientIp, s.getId());
- sessionIdClientIp.put(s.getId(), clientIp);
+ clientIdSessionId.put(clientIdentifier, s.getId());
+ sessionIdClientId.put(s.getId(), clientIdentifier);
// #valueUnbound() will be called on session expiration
s.setAttribute(this.getClass().getName(), this);
s.setMaxInactiveInterval(sessionInactiveInterval);
@@ -231,11 +258,23 @@ public class CrawlerSessionManagerValve
}
+ private String getClientIdentifier(Host host, Context context, String
clientIp) {
+ StringBuilder result = new StringBuilder(clientIp);
+ if (isHostAware) {
+ result.append('-').append(host.getName());
+ }
+ if (isContextAware) {
+ result.append(context.getName());
+ }
+ return result.toString();
+ }
+
+
@Override
public void valueUnbound(HttpSessionBindingEvent event) {
- String clientIp = sessionIdClientIp.remove(event.getSession().getId());
- if (clientIp != null) {
- clientIpSessionId.remove(clientIp);
+ String clientIdentifier =
sessionIdClientId.remove(event.getSession().getId());
+ if (clientIdentifier != null) {
+ clientIdSessionId.remove(clientIdentifier);
}
}
}
Modified:
tomcat/trunk/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java?rev=1829934&r1=1829933&r2=1829934&view=diff
==============================================================================
---
tomcat/trunk/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java
(original)
+++
tomcat/trunk/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java
Mon Apr 23 19:48:11 2018
@@ -16,12 +16,17 @@
*/
package org.apache.catalina.valves;
+import java.io.IOException;
+import java.util.Arrays;
import java.util.Collections;
+import javax.servlet.ServletException;
import javax.servlet.http.HttpSession;
import org.junit.Test;
+import org.apache.catalina.Context;
+import org.apache.catalina.Host;
import org.apache.catalina.Valve;
import org.apache.catalina.connector.Request;
import org.apache.catalina.connector.Response;
@@ -34,6 +39,7 @@ public class TestCrawlerSessionManagerVa
public void testCrawlerIpsPositive() throws Exception {
CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
valve.setCrawlerIps("216\\.58\\.206\\.174");
+ valve.setCrawlerUserAgents(valve.getCrawlerUserAgents());
valve.setNext(EasyMock.createMock(Valve.class));
HttpSession session = createSessionExpectations(valve, true);
Request request = createRequestExpectations("216.58.206.174", session,
true);
@@ -49,6 +55,7 @@ public class TestCrawlerSessionManagerVa
public void testCrawlerIpsNegative() throws Exception {
CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
valve.setCrawlerIps("216\\.58\\.206\\.174");
+ valve.setCrawlerUserAgents(valve.getCrawlerUserAgents());
valve.setNext(EasyMock.createMock(Valve.class));
HttpSession session = createSessionExpectations(valve, false);
Request request = createRequestExpectations("127.0.0.1", session,
false);
@@ -60,6 +67,32 @@ public class TestCrawlerSessionManagerVa
EasyMock.verify(request, session);
}
+ @Test
+ public void testCrawlerMultipleHostsHostAware() throws Exception {
+ CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
+ valve.setCrawlerUserAgents(valve.getCrawlerUserAgents());
+ valve.setHostAware(true);
+ valve.setContextAware(true);
+ valve.setNext(EasyMock.createMock(Valve.class));
+
+ verifyCrawlingLocalhost(valve, "localhost");
+ verifyCrawlingLocalhost(valve, "example.invalid");
+ }
+
+
+ private void verifyCrawlingLocalhost(CrawlerSessionManagerValve valve,
String hostname)
+ throws IOException, ServletException {
+ HttpSession session = createSessionExpectations(valve, true);
+ Request request = createRequestExpectations("127.0.0.1", session,
true, hostname, "tomcatBot 1.0");
+
+ EasyMock.replay(request, session);
+
+ valve.invoke(request, EasyMock.createMock(Response.class));
+
+ EasyMock.verify(request, session);
+ }
+
+
private HttpSession createSessionExpectations(CrawlerSessionManagerValve
valve, boolean isBot) {
HttpSession session = EasyMock.createMock(HttpSession.class);
if (isBot) {
@@ -72,15 +105,36 @@ public class TestCrawlerSessionManagerVa
return session;
}
+
private Request createRequestExpectations(String ip, HttpSession session,
boolean isBot) {
+ return createRequestExpectations(ip, session, isBot, "localhost",
"something 1.0");
+ }
+
+ private Request createRequestExpectations(String ip, HttpSession session,
boolean isBot, String hostname, String userAgent) {
Request request = EasyMock.createMock(Request.class);
EasyMock.expect(request.getRemoteAddr()).andReturn(ip);
+
EasyMock.expect(request.getHost()).andReturn(simpleHostWithName(hostname));
+
EasyMock.expect(request.getContext()).andReturn(simpleContextWithName());
IExpectationSetters<HttpSession> setter =
EasyMock.expect(request.getSession(false))
.andReturn(null);
if (isBot) {
setter.andReturn(session);
}
-
EasyMock.expect(request.getHeaders("user-agent")).andReturn(Collections.emptyEnumeration());
+ EasyMock.expect(request.getHeaders("user-agent")).andAnswer(() ->
Collections.enumeration(Arrays.asList(userAgent)));
return request;
}
+
+ private Host simpleHostWithName(String hostname) {
+ Host host = EasyMock.createMock(Host.class);
+ EasyMock.expect(host.getName()).andReturn(hostname);
+ EasyMock.replay(host);
+ return host;
+ }
+
+ private Context simpleContextWithName() {
+ Context context = EasyMock.createMock(Context.class);
+ EasyMock.expect(context.getName()).andReturn("/examples");
+ EasyMock.replay(context);
+ return context;
+ }
}
Modified: tomcat/trunk/webapps/docs/changelog.xml
URL:
http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/changelog.xml?rev=1829934&r1=1829933&r2=1829934&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/changelog.xml (original)
+++ tomcat/trunk/webapps/docs/changelog.xml Mon Apr 23 19:48:11 2018
@@ -73,6 +73,11 @@
access Java 11 support to the annotation scanning code. (markt)
</add>
<fix>
+ <bug>62297</bug>: Enable the <code>CrawlerSessionManagerValve</code> to
+ correctly handle bots that crawl multiple hosts and/or web applications
+ when the Valve is configured on a Host or an Engine. (fschumacher)
+ </fix>
+ <fix>
<bug>62309</bug>: Fix a <code>SecurityException</code> when using
JASPIC
under a <code>SecurityManager</code> when authentication is not
mandatory. (markt)
Modified: tomcat/trunk/webapps/docs/config/valve.xml
URL:
http://svn.apache.org/viewvc/tomcat/trunk/webapps/docs/config/valve.xml?rev=1829934&r1=1829933&r2=1829934&view=diff
==============================================================================
--- tomcat/trunk/webapps/docs/config/valve.xml (original)
+++ tomcat/trunk/webapps/docs/config/valve.xml Mon Apr 23 19:48:11 2018
@@ -1820,6 +1820,13 @@
</p>
</attribute>
+ <attribute name="contextAware" required="false">
+ <p>Flag to use the context name together with the client IP to
+ identify the session to re-use. Can be combined with
<code>hostAware</code>.
+ Default value: <code>true</code>
+ </p>
+ </attribute>
+
<attribute name="crawlerIps" required="false">
<p>Regular expression (using <code>java.util.regex</code>) that client
IP is matched against to determine if a request is from a web crawler.
@@ -1833,6 +1840,13 @@
<code>.*[bB]ot.*|.*Yahoo! Slurp.*|.*Feedfetcher-Google.*</code> is
used.</p>
</attribute>
+ <attribute name="hostAware" required="false">
+ <p>Flag to use the configured host together with the client IP to
+ identify the session to re-use. Can be combined with
<code>contextAware</code>.
+ Default value: <code>true</code>
+ </p>
+ </attribute>
+
<attribute name="sessionInactiveInterval" required="false">
<p>The minimum time in seconds that the Crawler Session Manager Valve
should keep the mapping of client IP to session ID in memory without
any
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]