Author: olegk
Date: Sun Nov 9 11:00:27 2008
New Revision: 712532
URL: http://svn.apache.org/viewvc?rev=712532&view=rev
Log:
HttpClient implementation optimized specifically for web crawling
Added:
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsRequestRetryHandler.java
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/NoAuthHandler.java
Added:
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java?rev=712532&view=auto
==============================================================================
---
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java
(added)
+++
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java
Sun Nov 9 11:00:27 2008
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.protocol.http;
+
+import org.apache.http.ConnectionReuseStrategy;
+import org.apache.http.HttpVersion;
+import org.apache.http.auth.AuthSchemeRegistry;
+import org.apache.http.client.AuthenticationHandler;
+import org.apache.http.client.CookieStore;
+import org.apache.http.client.CredentialsProvider;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.HttpRequestRetryHandler;
+import org.apache.http.client.RedirectHandler;
+import org.apache.http.client.UserTokenHandler;
+import org.apache.http.client.protocol.RequestDefaultHeaders;
+import org.apache.http.conn.ClientConnectionManager;
+import org.apache.http.conn.ConnectionKeepAliveStrategy;
+import org.apache.http.conn.routing.HttpRoutePlanner;
+import org.apache.http.conn.scheme.PlainSocketFactory;
+import org.apache.http.conn.scheme.Scheme;
+import org.apache.http.conn.scheme.SchemeRegistry;
+import org.apache.http.conn.ssl.SSLSocketFactory;
+import org.apache.http.cookie.CookieSpecRegistry;
+import org.apache.http.impl.DefaultConnectionReuseStrategy;
+import org.apache.http.impl.client.AbstractHttpClient;
+import org.apache.http.impl.client.BasicCookieStore;
+import org.apache.http.impl.client.BasicCredentialsProvider;
+import org.apache.http.impl.client.DefaultConnectionKeepAliveStrategy;
+import org.apache.http.impl.client.DefaultRedirectHandler;
+import org.apache.http.impl.client.DefaultUserTokenHandler;
+import org.apache.http.impl.conn.ProxySelectorRoutePlanner;
+import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
+import org.apache.http.params.BasicHttpParams;
+import org.apache.http.params.HttpParams;
+import org.apache.http.params.HttpProtocolParams;
+import org.apache.http.protocol.BasicHttpContext;
+import org.apache.http.protocol.BasicHttpProcessor;
+import org.apache.http.protocol.HTTP;
+import org.apache.http.protocol.HttpContext;
+import org.apache.http.protocol.HttpRequestExecutor;
+import org.apache.http.protocol.RequestConnControl;
+import org.apache.http.protocol.RequestContent;
+import org.apache.http.protocol.RequestExpectContinue;
+import org.apache.http.protocol.RequestTargetHost;
+import org.apache.http.protocol.RequestUserAgent;
+
+/**
+ * Version of [EMAIL PROTECTED] HttpClient} optimized specifically for web
crawling.
+ * This class has support for HTTP state management and authentication
+ * disabled based on the assumptions.
+ */
+public class DroidsHttpClient extends AbstractHttpClient
+{
+
+ public DroidsHttpClient()
+ {
+ super(null, null);
+ }
+
+ @Override
+ protected HttpParams createHttpParams()
+ {
+ HttpParams params = new BasicHttpParams();
+ HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
+ HttpProtocolParams.setContentCharset(params, HTTP.DEFAULT_CONTENT_CHARSET);
+ HttpProtocolParams.setUseExpectContinue(params, true);
+ HttpProtocolParams.setUserAgent(params, "Apache-Droids/1.1");
+ return params;
+ }
+
+ @Override
+ protected BasicHttpProcessor createHttpProcessor()
+ {
+ BasicHttpProcessor httpproc = new BasicHttpProcessor();
+ httpproc.addInterceptor(new RequestDefaultHeaders());
+ // Required protocol interceptors
+ httpproc.addInterceptor(new RequestContent());
+ httpproc.addInterceptor(new RequestTargetHost());
+ // Recommended protocol interceptors
+ httpproc.addInterceptor(new RequestConnControl());
+ httpproc.addInterceptor(new RequestUserAgent());
+ httpproc.addInterceptor(new RequestExpectContinue());
+ return httpproc;
+ }
+
+ @Override
+ protected ClientConnectionManager createClientConnectionManager()
+ {
+ SchemeRegistry schemeRegistry = new SchemeRegistry();
+ schemeRegistry.register(new Scheme("http",
PlainSocketFactory.getSocketFactory(), 80));
+ schemeRegistry.register(new Scheme("https",
SSLSocketFactory.getSocketFactory(), 443));
+ return new ThreadSafeClientConnManager(getParams(), schemeRegistry);
+ }
+
+ @Override
+ protected ConnectionKeepAliveStrategy createConnectionKeepAliveStrategy()
+ {
+ return new DefaultConnectionKeepAliveStrategy();
+ }
+
+ @Override
+ protected ConnectionReuseStrategy createConnectionReuseStrategy()
+ {
+ return new DefaultConnectionReuseStrategy();
+ }
+
+ @Override
+ protected AuthSchemeRegistry createAuthSchemeRegistry()
+ {
+ // Return empty auth scheme registry. There'll be no auth support
+ return new AuthSchemeRegistry();
+ }
+
+ @Override
+ protected CookieSpecRegistry createCookieSpecRegistry()
+ {
+ // Return empty cookie scheme registry. There'll be no cookie support
+ return new CookieSpecRegistry();
+ }
+
+ @Override
+ protected CookieStore createCookieStore()
+ {
+ return new BasicCookieStore();
+ }
+
+ @Override
+ protected CredentialsProvider createCredentialsProvider()
+ {
+ return new BasicCredentialsProvider();
+ }
+
+ @Override
+ protected HttpContext createHttpContext()
+ {
+ HttpContext context = new BasicHttpContext();
+ return context;
+ }
+
+ @Override
+ protected HttpRequestRetryHandler createHttpRequestRetryHandler()
+ {
+ return new DroidsRequestRetryHandler();
+ }
+
+ @Override
+ protected HttpRoutePlanner createHttpRoutePlanner()
+ {
+ return new
ProxySelectorRoutePlanner(getConnectionManager().getSchemeRegistry(), null);
+ }
+
+ @Override
+ protected AuthenticationHandler createProxyAuthenticationHandler()
+ {
+ return new NoAuthHandler();
+ }
+
+ @Override
+ protected AuthenticationHandler createTargetAuthenticationHandler()
+ {
+ return new NoAuthHandler();
+ }
+
+ @Override
+ protected RedirectHandler createRedirectHandler()
+ {
+ return new DefaultRedirectHandler();
+ }
+
+ @Override
+ protected HttpRequestExecutor createRequestExecutor()
+ {
+ return new HttpRequestExecutor();
+ }
+
+ @Override
+ protected UserTokenHandler createUserTokenHandler()
+ {
+ return new DefaultUserTokenHandler();
+ }
+
+}
Added:
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsRequestRetryHandler.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsRequestRetryHandler.java?rev=712532&view=auto
==============================================================================
---
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsRequestRetryHandler.java
(added)
+++
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsRequestRetryHandler.java
Sun Nov 9 11:00:27 2008
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.protocol.http;
+
+import java.io.IOException;
+import java.io.InterruptedIOException;
+import java.net.ConnectException;
+import java.net.UnknownHostException;
+
+import javax.net.ssl.SSLHandshakeException;
+
+import org.apache.http.NoHttpResponseException;
+import org.apache.http.client.HttpRequestRetryHandler;
+import org.apache.http.protocol.HttpContext;
+
+class DroidsRequestRetryHandler implements HttpRequestRetryHandler
+{
+
+ private final int retryCount;
+
+ public DroidsRequestRetryHandler(int retryCount)
+ {
+ super();
+ this.retryCount = retryCount;
+ }
+
+ public DroidsRequestRetryHandler()
+ {
+ this(3);
+ }
+
+ public boolean retryRequest(final IOException exception, int executionCount,
+ final HttpContext context)
+ {
+ if (exception == null) {
+ throw new IllegalArgumentException("Exception parameter may not be
null");
+ }
+ if (context == null) {
+ throw new IllegalArgumentException("HTTP context may not be null");
+ }
+ if (executionCount > this.retryCount) {
+ // Do not retry if over max retry count
+ return false;
+ }
+ if (exception instanceof NoHttpResponseException) {
+ // Retry if the server dropped connection on us
+ return true;
+ }
+ if (exception instanceof InterruptedIOException) {
+ // Timeout
+ return false;
+ }
+ if (exception instanceof UnknownHostException) {
+ // Unknown host
+ return false;
+ }
+ if (exception instanceof ConnectException) {
+ // Connection refused
+ return false;
+ }
+ if (exception instanceof SSLHandshakeException) {
+ // SSL handshake exception
+ return false;
+ }
+ // otherwise retry
+ return true;
+ }
+
+}
Added:
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/NoAuthHandler.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/NoAuthHandler.java?rev=712532&view=auto
==============================================================================
---
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/NoAuthHandler.java
(added)
+++
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/NoAuthHandler.java
Sun Nov 9 11:00:27 2008
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.droids.protocol.http;
+
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.http.Header;
+import org.apache.http.HttpResponse;
+import org.apache.http.auth.AuthScheme;
+import org.apache.http.auth.AuthenticationException;
+import org.apache.http.auth.MalformedChallengeException;
+import org.apache.http.client.AuthenticationHandler;
+import org.apache.http.protocol.HttpContext;
+
+class NoAuthHandler implements AuthenticationHandler
+{
+
+ public Map<String, Header> getChallenges(HttpResponse response, HttpContext
context)
+ throws MalformedChallengeException
+ {
+ return Collections.emptyMap();
+ }
+
+ public boolean isAuthenticationRequested(HttpResponse response, HttpContext
context)
+ {
+ return false;
+ }
+
+ public AuthScheme selectScheme(Map<String, Header> challenges, HttpResponse
response, HttpContext context)
+ throws AuthenticationException
+ {
+ throw new AuthenticationException(
+ "Unable to respond to any of these challenges: " + challenges);
+ }
+
+}