Modified: nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java (original) +++ nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java Fri Jan 9 06:34:33 2015 @@ -51,18 +51,17 @@ public class HttpResponse implements Res private final Metadata headers = new SpellCheckedMetadata(); protected enum Scheme { - HTTP, - HTTPS, + HTTP, HTTPS, } public HttpResponse(HttpBase http, URL url, WebPage page) - throws ProtocolException, IOException { + throws ProtocolException, IOException { this.http = http; this.url = url; Scheme scheme = null; - + if ("http".equals(url.getProtocol())) { scheme = Scheme.HTTP; } else if ("https".equals(url.getProtocol())) { @@ -90,50 +89,56 @@ public class HttpResponse implements Res } else { port = 443; } - portString= ""; + portString = ""; } else { - port= url.getPort(); - portString= ":" + port; + port = url.getPort(); + portString = ":" + port; } Socket socket = null; try { - socket = new Socket(); // create the socket + socket = new Socket(); // create the socket socket.setSoTimeout(http.getTimeout()); - // connect String sockHost = http.useProxy() ? http.getProxyHost() : host; int sockPort = http.useProxy() ? http.getProxyPort() : port; - InetSocketAddress sockAddr= new InetSocketAddress(sockHost, sockPort); + InetSocketAddress sockAddr = new InetSocketAddress(sockHost, sockPort); socket.connect(sockAddr, http.getTimeout()); - + if (scheme == Scheme.HTTPS) { - SSLSocketFactory factory = (SSLSocketFactory)SSLSocketFactory.getDefault(); - SSLSocket sslsocket = (SSLSocket)factory.createSocket(socket, sockHost, sockPort, true); + SSLSocketFactory factory = (SSLSocketFactory) SSLSocketFactory + .getDefault(); + SSLSocket sslsocket = (SSLSocket) factory.createSocket(socket, + sockHost, sockPort, true); sslsocket.setUseClientMode(true); - - // Get the protocols and ciphers supported by this JVM - Set<String> protocols = new HashSet<String>(Arrays.asList(sslsocket.getSupportedProtocols())); - Set<String> ciphers = new HashSet<String>(Arrays.asList(sslsocket.getSupportedCipherSuites())); - + + // Get the protocols and ciphers supported by this JVM + Set<String> protocols = new HashSet<String>(Arrays.asList(sslsocket + .getSupportedProtocols())); + Set<String> ciphers = new HashSet<String>(Arrays.asList(sslsocket + .getSupportedCipherSuites())); + // Intersect with preferred protocols and ciphers protocols.retainAll(http.getTlsPreferredProtocols()); ciphers.retainAll(http.getTlsPreferredCipherSuites()); - - sslsocket.setEnabledProtocols(protocols.toArray(new String[protocols.size()])); - sslsocket.setEnabledCipherSuites(ciphers.toArray(new String[ciphers.size()])); - + + sslsocket.setEnabledProtocols(protocols.toArray(new String[protocols + .size()])); + sslsocket.setEnabledCipherSuites(ciphers.toArray(new String[ciphers + .size()])); + sslsocket.startHandshake(); socket = sslsocket; } - + conf = http.getConf(); if (sockAddr != null && conf.getBoolean("store.ip.address", false) == true) { - String ipString = sockAddr.getAddress().getHostAddress(); //get the ip address + String ipString = sockAddr.getAddress().getHostAddress(); // get the ip + // address page.getMetadata().put(new Utf8("_ip_"), - ByteBuffer.wrap(ipString.getBytes())); + ByteBuffer.wrap(ipString.getBytes())); } // make request @@ -141,9 +146,9 @@ public class HttpResponse implements Res StringBuffer reqStr = new StringBuffer("GET "); if (http.useProxy()) { - reqStr.append(url.getProtocol()+"://"+host+portString+path); + reqStr.append(url.getProtocol() + "://" + host + portString + path); } else { - reqStr.append(path); + reqStr.append(path); } reqStr.append(" HTTP/1.0\r\n"); @@ -161,39 +166,40 @@ public class HttpResponse implements Res String userAgent = http.getUserAgent(); if ((userAgent == null) || (userAgent.length() == 0)) { - if (Http.LOG.isErrorEnabled()) { Http.LOG.error("User-agent is not set!"); } + if (Http.LOG.isErrorEnabled()) { + Http.LOG.error("User-agent is not set!"); + } } else { reqStr.append("User-Agent: "); reqStr.append(userAgent); reqStr.append("\r\n"); } -// if (page.isReadable(WebPage.Field.MODIFIED_TIME.getIndex())) { - reqStr.append("If-Modified-Since: " + - HttpDateFormat.toString(page.getModifiedTime())); - reqStr.append("\r\n"); -// } + // if (page.isReadable(WebPage.Field.MODIFIED_TIME.getIndex())) { + reqStr.append("If-Modified-Since: " + + HttpDateFormat.toString(page.getModifiedTime())); + reqStr.append("\r\n"); + // } reqStr.append("\r\n"); - byte[] reqBytes= reqStr.toString().getBytes(); + byte[] reqBytes = reqStr.toString().getBytes(); req.write(reqBytes); req.flush(); - PushbackInputStream in = // process response - new PushbackInputStream( - new BufferedInputStream(socket.getInputStream(), Http.BUFFER_SIZE), - Http.BUFFER_SIZE) ; + PushbackInputStream in = // process response + new PushbackInputStream(new BufferedInputStream(socket.getInputStream(), + Http.BUFFER_SIZE), Http.BUFFER_SIZE); StringBuffer line = new StringBuffer(); - boolean haveSeenNonContinueStatus= false; + boolean haveSeenNonContinueStatus = false; while (!haveSeenNonContinueStatus) { // parse status code line this.code = parseStatusLine(in, line); // parse headers parseHeaders(in, line); - haveSeenNonContinueStatus= code != 100; // 100 is "Continue" + haveSeenNonContinueStatus = code != 100; // 100 is "Continue" } String transferEncoding = getHeader(Response.TRANSFER_ENCODING); @@ -228,10 +234,10 @@ public class HttpResponse implements Res } - - /* ------------------------- * - * <implementation:Response> * - * ------------------------- */ + /* + * ------------------------- * <implementation:Response> * + * ------------------------- + */ public URL getUrl() { return url; @@ -253,15 +259,15 @@ public class HttpResponse implements Res return content; } - /* ------------------------- * - * <implementation:Response> * - * ------------------------- */ - + /* + * ------------------------- * <implementation:Response> * + * ------------------------- + */ - private void readPlainContent(InputStream in) - throws HttpException, IOException { + private void readPlainContent(InputStream in) throws HttpException, + IOException { - int contentLength = Integer.MAX_VALUE; // get content length + int contentLength = Integer.MAX_VALUE; // get content length String contentLengthString = headers.get(Response.CONTENT_LENGTH); if (contentLengthString != null) { contentLengthString = contentLengthString.trim(); @@ -269,12 +275,13 @@ public class HttpResponse implements Res if (!contentLengthString.isEmpty()) contentLength = Integer.parseInt(contentLengthString); } catch (NumberFormatException e) { - throw new HttpException("bad content length: "+contentLengthString); + throw new HttpException("bad content length: " + contentLengthString); } } - if (http.getMaxContent() >= 0 - && contentLength > http.getMaxContent()) // limit download size - contentLength = http.getMaxContent(); + if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) // limit + // download + // size + contentLength = http.getMaxContent(); ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE); byte[] bytes = new byte[Http.BUFFER_SIZE]; @@ -384,38 +391,37 @@ public class HttpResponse implements Res parseHeaders(in, line); } - + private int parseStatusLine(PushbackInputStream in, StringBuffer line) - throws IOException, HttpException { + throws IOException, HttpException { readLine(in, line, false); int codeStart = line.indexOf(" "); - int codeEnd = line.indexOf(" ", codeStart+1); + int codeEnd = line.indexOf(" ", codeStart + 1); // handle lines with no plaintext result code, ie: // "HTTP/1.1 200" vs "HTTP/1.1 200 OK" if (codeEnd == -1) - codeEnd= line.length(); + codeEnd = line.length(); int code; try { - code= Integer.parseInt(line.substring(codeStart+1, codeEnd)); + code = Integer.parseInt(line.substring(codeStart + 1, codeEnd)); } catch (NumberFormatException e) { - throw new HttpException("bad status line '" + line - + "': " + e.getMessage(), e); + throw new HttpException("bad status line '" + line + "': " + + e.getMessage(), e); } return code; } + private void processHeaderLine(StringBuffer line) throws IOException, + HttpException { - private void processHeaderLine(StringBuffer line) - throws IOException, HttpException { - - int colonIndex = line.indexOf(":"); // key is up to colon + int colonIndex = line.indexOf(":"); // key is up to colon if (colonIndex == -1) { int i; - for (i= 0; i < line.length(); i++) + for (i = 0; i < line.length(); i++) if (!Character.isWhitespace(line.charAt(i))) break; if (i == line.length()) @@ -424,7 +430,7 @@ public class HttpResponse implements Res } String key = line.substring(0, colonIndex); - int valueStart = colonIndex+1; // skip whitespace + int valueStart = colonIndex + 1; // skip whitespace while (valueStart < line.length()) { int c = line.charAt(valueStart); if (c != ' ' && c != '\t') @@ -435,28 +441,27 @@ public class HttpResponse implements Res headers.set(key, value); } - // Adds headers to our headers Metadata private void parseHeaders(PushbackInputStream in, StringBuffer line) - throws IOException, HttpException { + throws IOException, HttpException { while (readLine(in, line, true) != 0) { // handle HTTP responses with missing blank line after headers int pos; - if ( ((pos= line.indexOf("<!DOCTYPE")) != -1) - || ((pos= line.indexOf("<HTML")) != -1) - || ((pos= line.indexOf("<html")) != -1) ) { + if (((pos = line.indexOf("<!DOCTYPE")) != -1) + || ((pos = line.indexOf("<HTML")) != -1) + || ((pos = line.indexOf("<html")) != -1)) { in.unread(line.substring(pos).getBytes("UTF-8")); line.setLength(pos); try { - //TODO: (CM) We don't know the header names here - //since we're just handling them generically. It would - //be nice to provide some sort of mapping function here - //for the returned header names to the standard metadata - //names in the ParseData class + // TODO: (CM) We don't know the header names here + // since we're just handling them generically. It would + // be nice to provide some sort of mapping function here + // for the returned header names to the standard metadata + // names in the ParseData class processHeaderLine(line); } catch (Exception e) { // fixme: @@ -470,29 +475,29 @@ public class HttpResponse implements Res } private static int readLine(PushbackInputStream in, StringBuffer line, - boolean allowContinuedLine) - throws IOException { + boolean allowContinuedLine) throws IOException { line.setLength(0); for (int c = in.read(); c != -1; c = in.read()) { switch (c) { - case '\r': - if (peek(in) == '\n') { - in.read(); - } - case '\n': - if (line.length() > 0) { - // at EOL -- check for continued line if the current - // (possibly continued) line wasn't blank - if (allowContinuedLine) - switch (peek(in)) { - case ' ' : case '\t': // line is continued - in.read(); - continue; - } - } - return line.length(); // else complete - default : - line.append((char)c); + case '\r': + if (peek(in) == '\n') { + in.read(); + } + case '\n': + if (line.length() > 0) { + // at EOL -- check for continued line if the current + // (possibly continued) line wasn't blank + if (allowContinuedLine) + switch (peek(in)) { + case ' ': + case '\t': // line is continued + in.read(); + continue; + } + } + return line.length(); // else complete + default: + line.append((char) c); } } throw new EOFException();
Modified: nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java (original) +++ nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java Fri Jan 9 06:34:33 2015 @@ -34,7 +34,7 @@ import org.mortbay.jetty.servlet.Context import org.mortbay.jetty.servlet.ServletHolder; /** - * Test cases for protocol-http + * Test cases for protocol-http */ public class TestProtocolHttp { private static final String RES_DIR = System.getProperty("test.data", "."); @@ -44,7 +44,7 @@ public class TestProtocolHttp { private Context root; private Configuration conf; private int port; - + public void setUp(boolean redirection) throws Exception { this.conf = new Configuration(); this.conf.addResource("nutch-default.xml"); @@ -52,18 +52,18 @@ public class TestProtocolHttp { this.http = new Http(); this.http.setConf(conf); - + this.server = new Server(); - + if (redirection) { this.root = new Context(server, "/redirection", Context.SESSIONS); this.root.setAttribute("newContextURL", "/redirect"); - } - else { + } else { this.root = new Context(server, "/", Context.SESSIONS); } - ServletHolder sh = new ServletHolder(org.apache.jasper.servlet.JspServlet.class); + ServletHolder sh = new ServletHolder( + org.apache.jasper.servlet.JspServlet.class); this.root.addServlet(sh, "*.jsp"); this.root.setResourceBase(RES_DIR); } @@ -89,12 +89,14 @@ public class TestProtocolHttp { startServer(47500, true); fetchPage("/redirection", 302); } - + /** * Starts the Jetty server at a specified port and redirection parameter. * - * @param portno Port number. - * @param redirection whether redirection + * @param portno + * Port number. + * @param redirection + * whether redirection */ private void startServer(int portno, boolean redirection) throws Exception { port = portno; @@ -123,11 +125,13 @@ public class TestProtocolHttp { Response response = http.getResponse(url, p, true); ProtocolOutput out = http.getProtocolOutput(url.toString(), p); Content content = out.getContent(); - - assertEquals("HTTP Status Code for " + url, expectedCode, response.getCode()); - if (page.compareTo("/nonexists.html") != 0 - && page.compareTo("/brokenpage.jsp") != 0 - && page.compareTo("/redirection") != 0) - assertEquals("ContentType " + url, "application/xhtml+xml", content.getContentType()); + + assertEquals("HTTP Status Code for " + url, expectedCode, + response.getCode()); + if (page.compareTo("/nonexists.html") != 0 + && page.compareTo("/brokenpage.jsp") != 0 + && page.compareTo("/redirection") != 0) + assertEquals("ContentType " + url, "application/xhtml+xml", + content.getContentType()); } } Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java (original) +++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java Fri Jan 9 06:34:33 2015 @@ -1,19 +1,19 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ /* * Based on EasySSLProtocolSocketFactory from commons-httpclient: * @@ -41,10 +41,12 @@ import org.slf4j.LoggerFactory; import javax.net.ssl.SSLContext; import javax.net.ssl.TrustManager; -public class DummySSLProtocolSocketFactory implements SecureProtocolSocketFactory { +public class DummySSLProtocolSocketFactory implements + SecureProtocolSocketFactory { /** Logger object for this class. */ - private static final Logger LOG = LoggerFactory.getLogger(DummySSLProtocolSocketFactory.class); + private static final Logger LOG = LoggerFactory + .getLogger(DummySSLProtocolSocketFactory.class); private SSLContext sslcontext = null; @@ -58,10 +60,13 @@ public class DummySSLProtocolSocketFacto private static SSLContext createEasySSLContext() { try { SSLContext context = SSLContext.getInstance("SSL"); - context.init(null, new TrustManager[] { new DummyX509TrustManager(null) }, null); + context.init(null, + new TrustManager[] { new DummyX509TrustManager(null) }, null); return context; } catch (Exception e) { - if (LOG.isErrorEnabled()) { LOG.error(e.getMessage(), e); } + if (LOG.isErrorEnabled()) { + LOG.error(e.getMessage(), e); + } throw new HttpClientError(e.toString()); } } @@ -76,10 +81,11 @@ public class DummySSLProtocolSocketFacto /** * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int,InetAddress,int) */ - public Socket createSocket(String host, int port, InetAddress clientHost, int clientPort) throws IOException, - UnknownHostException { + public Socket createSocket(String host, int port, InetAddress clientHost, + int clientPort) throws IOException, UnknownHostException { - return getSSLContext().getSocketFactory().createSocket(host, port, clientHost, clientPort); + return getSSLContext().getSocketFactory().createSocket(host, port, + clientHost, clientPort); } /** @@ -93,20 +99,28 @@ public class DummySSLProtocolSocketFacto * throws an {@link ConnectTimeoutException} * </p> * - * @param host the host name/IP - * @param port the port on the host - * @param localAddress the local host name/IP to bind the socket to - * @param localPort the port on the local machine - * @param params {@link HttpConnectionParams Http connection parameters} + * @param host + * the host name/IP + * @param port + * the port on the host + * @param localAddress + * the local host name/IP to bind the socket to + * @param localPort + * the port on the local machine + * @param params + * {@link HttpConnectionParams Http connection parameters} * * @return Socket a new socket * - * @throws IOException if an I/O error occurs while creating the socket - * @throws UnknownHostException if the IP address of the host cannot be - * determined + * @throws IOException + * if an I/O error occurs while creating the socket + * @throws UnknownHostException + * if the IP address of the host cannot be determined */ - public Socket createSocket(final String host, final int port, final InetAddress localAddress, final int localPort, - final HttpConnectionParams params) throws IOException, UnknownHostException, ConnectTimeoutException { + public Socket createSocket(final String host, final int port, + final InetAddress localAddress, final int localPort, + final HttpConnectionParams params) throws IOException, + UnknownHostException, ConnectTimeoutException { if (params == null) { throw new IllegalArgumentException("Parameters may not be null"); } @@ -115,27 +129,31 @@ public class DummySSLProtocolSocketFacto return createSocket(host, port, localAddress, localPort); } else { // To be eventually deprecated when migrated to Java 1.4 or above - return ControllerThreadSocketFactory.createSocket(this, host, port, localAddress, localPort, timeout); + return ControllerThreadSocketFactory.createSocket(this, host, port, + localAddress, localPort, timeout); } } /** * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int) */ - public Socket createSocket(String host, int port) throws IOException, UnknownHostException { + public Socket createSocket(String host, int port) throws IOException, + UnknownHostException { return getSSLContext().getSocketFactory().createSocket(host, port); } /** * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(Socket,String,int,boolean) */ - public Socket createSocket(Socket socket, String host, int port, boolean autoClose) throws IOException, - UnknownHostException { - return getSSLContext().getSocketFactory().createSocket(socket, host, port, autoClose); + public Socket createSocket(Socket socket, String host, int port, + boolean autoClose) throws IOException, UnknownHostException { + return getSSLContext().getSocketFactory().createSocket(socket, host, port, + autoClose); } public boolean equals(Object obj) { - return ((obj != null) && obj.getClass().equals(DummySSLProtocolSocketFactory.class)); + return ((obj != null) && obj.getClass().equals( + DummySSLProtocolSocketFactory.class)); } public int hashCode() { Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java (original) +++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java Fri Jan 9 06:34:33 2015 @@ -1,19 +1,19 @@ /** -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ /* * Based on EasyX509TrustManager from commons-httpclient. */ @@ -30,53 +30,57 @@ import javax.net.ssl.TrustManagerFactory import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; -public class DummyX509TrustManager implements X509TrustManager -{ - private X509TrustManager standardTrustManager = null; - - /** - * Constructor for DummyX509TrustManager. - */ - public DummyX509TrustManager(KeyStore keystore) throws NoSuchAlgorithmException, KeyStoreException { - super(); - String algo = TrustManagerFactory.getDefaultAlgorithm(); - TrustManagerFactory factory = TrustManagerFactory.getInstance(algo); - factory.init(keystore); - TrustManager[] trustmanagers = factory.getTrustManagers(); - if (trustmanagers.length == 0) { - throw new NoSuchAlgorithmException(algo + " trust manager not supported"); - } - this.standardTrustManager = (X509TrustManager)trustmanagers[0]; - } - - /** - * @see javax.net.ssl.X509TrustManager#checkClientTrusted(X509Certificate[], String) - */ - public boolean isClientTrusted(X509Certificate[] certificates) { - return true; - } - - /** - * @see javax.net.ssl.X509TrustManager#checkServerTrusted(X509Certificate[], String) - */ - public boolean isServerTrusted(X509Certificate[] certificates) { - return true; - } +public class DummyX509TrustManager implements X509TrustManager { + private X509TrustManager standardTrustManager = null; - /** - * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers() - */ - public X509Certificate[] getAcceptedIssuers() { - return this.standardTrustManager.getAcceptedIssuers(); - } + /** + * Constructor for DummyX509TrustManager. + */ + public DummyX509TrustManager(KeyStore keystore) + throws NoSuchAlgorithmException, KeyStoreException { + super(); + String algo = TrustManagerFactory.getDefaultAlgorithm(); + TrustManagerFactory factory = TrustManagerFactory.getInstance(algo); + factory.init(keystore); + TrustManager[] trustmanagers = factory.getTrustManagers(); + if (trustmanagers.length == 0) { + throw new NoSuchAlgorithmException(algo + " trust manager not supported"); + } + this.standardTrustManager = (X509TrustManager) trustmanagers[0]; + } + + /** + * @see javax.net.ssl.X509TrustManager#checkClientTrusted(X509Certificate[], + * String) + */ + public boolean isClientTrusted(X509Certificate[] certificates) { + return true; + } + + /** + * @see javax.net.ssl.X509TrustManager#checkServerTrusted(X509Certificate[], + * String) + */ + public boolean isServerTrusted(X509Certificate[] certificates) { + return true; + } + + /** + * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers() + */ + public X509Certificate[] getAcceptedIssuers() { + return this.standardTrustManager.getAcceptedIssuers(); + } + + public void checkClientTrusted(X509Certificate[] arg0, String arg1) + throws CertificateException { + // do nothing + + } + + public void checkServerTrusted(X509Certificate[] arg0, String arg1) + throws CertificateException { + // do nothing - public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { - // do nothing - - } - - public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { - // do nothing - - } + } } Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java (original) +++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java Fri Jan 9 06:34:33 2015 @@ -67,395 +67,383 @@ import org.apache.nutch.util.NutchConfig */ public class Http extends HttpBase { - public static final Logger LOG = LoggerFactory.getLogger(Http.class); + public static final Logger LOG = LoggerFactory.getLogger(Http.class); - private static MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager(); + private static MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager(); - // Since the Configuration has not yet been set, - // then an unconfigured client is returned. - private static HttpClient client = new HttpClient(connectionManager); - private static String defaultUsername; - private static String defaultPassword; - private static String defaultRealm; - private static String defaultScheme; - private static String authFile; - private static String agentHost; - private static boolean authRulesRead = false; - private static Configuration conf; - - int maxThreadsTotal = 10; - - private String proxyUsername; - private String proxyPassword; - private String proxyRealm; - - private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>(); - - static { - FIELDS.add(WebPage.Field.MODIFIED_TIME); - FIELDS.add(WebPage.Field.HEADERS); - } - - @Override - public Collection<Field> getFields() { - return FIELDS; - } - - /** - * Returns the configured HTTP client. - * - * @return HTTP client - */ - static synchronized HttpClient getClient() { - return client; - } - - /** - * Constructs this plugin. - */ - public Http() { - super(LOG); - } - - /** - * Reads the configuration from the Nutch configuration files and sets the - * configuration. - * - * @param conf - * Configuration - */ - public void setConf(Configuration conf) { - super.setConf(conf); - Http.conf = conf; - this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10); - this.proxyUsername = conf.get("http.proxy.username", ""); - this.proxyPassword = conf.get("http.proxy.password", ""); - this.proxyRealm = conf.get("http.proxy.realm", ""); - agentHost = conf.get("http.agent.host", ""); - authFile = conf.get("http.auth.file", ""); - configureClient(); - try { - setCredentials(); - } catch (Exception ex) { - if (LOG.isErrorEnabled()) { - LOG.error("Could not read " + authFile + " : " - + ex.getMessage()); - } - } - } - - /** - * Main method. - * - * @param args - * Command line arguments - */ - public static void main(String[] args) throws Exception { - Http http = new Http(); - http.setConf(NutchConfiguration.create()); - main(http, args); - } - - /** - * Fetches the <code>url</code> with a configured HTTP client and gets the - * response. - * - * @param url - * URL to be fetched - * @param datum - * Crawl data - * @param redirect - * Follow redirects if and only if true - * @return HTTP response - */ - protected Response getResponse(URL url, WebPage page, boolean redirect) - throws ProtocolException, IOException { - resolveCredentials(url); - return new HttpResponse(this, url, page, redirect); - } - - /** - * Configures the HTTP client - */ - private void configureClient() { - - // Set up an HTTPS socket factory that accepts self-signed certs. - ProtocolSocketFactory factory = new SSLProtocolSocketFactory(); - Protocol https = new Protocol("https", factory, 443); - Protocol.registerProtocol("https", https); - - HttpConnectionManagerParams params = connectionManager.getParams(); - params.setConnectionTimeout(timeout); - params.setSoTimeout(timeout); - params.setSendBufferSize(BUFFER_SIZE); - params.setReceiveBufferSize(BUFFER_SIZE); - params.setMaxTotalConnections(maxThreadsTotal); - - //Also set max connections per host to maxThreadsTotal since all threads - //might be used to fetch from the same host - otherwise timeout errors can occur - params.setDefaultMaxConnectionsPerHost(maxThreadsTotal); - - // executeMethod(HttpMethod) seems to ignore the connection timeout on - // the connection manager. - // set it explicitly on the HttpClient. - client.getParams().setConnectionManagerTimeout(timeout); - - HostConfiguration hostConf = client.getHostConfiguration(); - ArrayList<Header> headers = new ArrayList<Header>(); - // Set the User Agent in the header - headers.add(new Header("User-Agent", userAgent)); - // prefer English - headers.add(new Header("Accept-Language", - "en-us,en-gb,en;q=0.7,*;q=0.3")); - // prefer UTF-8 - headers.add(new Header("Accept-Charset", - "utf-8,ISO-8859-1;q=0.7,*;q=0.7")); - // prefer understandable formats - headers.add(new Header( - "Accept", - "text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5")); - // accept gzipped content - headers.add(new Header("Accept-Encoding", "x-gzip, gzip, deflate")); - hostConf.getParams().setParameter("http.default-headers", headers); - - // HTTP proxy server details - if (useProxy) { - hostConf.setProxy(proxyHost, proxyPort); - - if (proxyUsername.length() > 0) { - - AuthScope proxyAuthScope = getAuthScope(this.proxyHost, - this.proxyPort, this.proxyRealm); - - NTCredentials proxyCredentials = new NTCredentials( - this.proxyUsername, this.proxyPassword, Http.agentHost, - this.proxyRealm); - - client.getState().setProxyCredentials(proxyAuthScope, - proxyCredentials); - } - } - - } - - /** - * Reads authentication configuration file (defined as 'http.auth.file' in - * Nutch configuration file) and sets the credentials for the configured - * authentication scopes in the HTTP client object. - * - * @throws ParserConfigurationException - * If a document builder can not be created. - * @throws SAXException - * If any parsing error occurs. - * @throws IOException - * If any I/O error occurs. - */ - private static synchronized void setCredentials() - throws ParserConfigurationException, SAXException, IOException { - - if (authRulesRead) - return; - - authRulesRead = true; // Avoid re-attempting to read - - InputStream is = conf.getConfResourceAsInputStream(authFile); - if (is != null) { - Document doc = DocumentBuilderFactory.newInstance() - .newDocumentBuilder().parse(is); - - Element rootElement = doc.getDocumentElement(); - if (!"auth-configuration".equals(rootElement.getTagName())) { - if (LOG.isWarnEnabled()) - LOG.warn("Bad auth conf file: root element <" - + rootElement.getTagName() + "> found in " - + authFile + " - must be <auth-configuration>"); - } - - // For each set of credentials - NodeList credList = rootElement.getChildNodes(); - for (int i = 0; i < credList.getLength(); i++) { - Node credNode = credList.item(i); - if (!(credNode instanceof Element)) - continue; - - Element credElement = (Element) credNode; - if (!"credentials".equals(credElement.getTagName())) { - if (LOG.isWarnEnabled()) - LOG.warn("Bad auth conf file: Element <" - + credElement.getTagName() - + "> not recognized in " + authFile - + " - expected <credentials>"); - continue; - } - - String username = credElement.getAttribute("username"); - String password = credElement.getAttribute("password"); - - // For each authentication scope - NodeList scopeList = credElement.getChildNodes(); - for (int j = 0; j < scopeList.getLength(); j++) { - Node scopeNode = scopeList.item(j); - if (!(scopeNode instanceof Element)) - continue; - - Element scopeElement = (Element) scopeNode; - - if ("default".equals(scopeElement.getTagName())) { - - // Determine realm and scheme, if any - String realm = scopeElement.getAttribute("realm"); - String scheme = scopeElement.getAttribute("scheme"); - - // Set default credentials - defaultUsername = username; - defaultPassword = password; - defaultRealm = realm; - defaultScheme = scheme; - - if (LOG.isTraceEnabled()) { - LOG.trace("Credentials - username: " + username - + "; set as default" + " for realm: " - + realm + "; scheme: " + scheme); - } - - } else if ("authscope".equals(scopeElement.getTagName())) { - - // Determine authentication scope details - String host = scopeElement.getAttribute("host"); - int port = -1; // For setting port to AuthScope.ANY_PORT - try { - port = Integer.parseInt(scopeElement - .getAttribute("port")); - } catch (Exception ex) { - // do nothing, port is already set to any port - } - String realm = scopeElement.getAttribute("realm"); - String scheme = scopeElement.getAttribute("scheme"); - - // Set credentials for the determined scope - AuthScope authScope = getAuthScope(host, port, realm, - scheme); - NTCredentials credentials = new NTCredentials(username, - password, agentHost, realm); - - client.getState() - .setCredentials(authScope, credentials); - - if (LOG.isTraceEnabled()) { - LOG.trace("Credentials - username: " + username - + "; set for AuthScope - " + "host: " - + host + "; port: " + port + "; realm: " - + realm + "; scheme: " + scheme); - } - - } else { - if (LOG.isWarnEnabled()) - LOG.warn("Bad auth conf file: Element <" - + scopeElement.getTagName() - + "> not recognized in " + authFile - + " - expected <authscope>"); - } - } - is.close(); - } - } - } - - /** - * If credentials for the authentication scope determined from the specified - * <code>url</code> is not already set in the HTTP client, then this method - * sets the default credentials to fetch the specified <code>url</code>. If - * credentials are found for the authentication scope, the method returns - * without altering the client. - * - * @param url - * URL to be fetched - */ - private void resolveCredentials(URL url) { - - if (defaultUsername != null && defaultUsername.length() > 0) { - - int port = url.getPort(); - if (port == -1) { - if ("https".equals(url.getProtocol())) - port = 443; - else - port = 80; - } - - AuthScope scope = new AuthScope(url.getHost(), port); - - if (client.getState().getCredentials(scope) != null) { - if (LOG.isTraceEnabled()) - LOG.trace("Pre-configured credentials with scope - host: " - + url.getHost() + "; port: " + port - + "; found for url: " + url); - - // Credentials are already configured, so do nothing and return - return; - } - - if (LOG.isTraceEnabled()) - LOG.trace("Pre-configured credentials with scope - host: " - + url.getHost() + "; port: " + port - + "; not found for url: " + url); - - AuthScope serverAuthScope = getAuthScope(url.getHost(), port, - defaultRealm, defaultScheme); - - NTCredentials serverCredentials = new NTCredentials( - defaultUsername, defaultPassword, agentHost, defaultRealm); - - client.getState() - .setCredentials(serverAuthScope, serverCredentials); - } - } - - /** - * Returns an authentication scope for the specified <code>host</code>, - * <code>port</code>, <code>realm</code> and <code>scheme</code>. - * - * @param host - * Host name or address. - * @param port - * Port number. - * @param realm - * Authentication realm. - * @param scheme - * Authentication scheme. - */ - private static AuthScope getAuthScope(String host, int port, String realm, - String scheme) { - - if (host.length() == 0) - host = null; - - if (port < 0) - port = -1; - - if (realm.length() == 0) - realm = null; - - if (scheme.length() == 0) - scheme = null; - - return new AuthScope(host, port, realm, scheme); - } - - /** - * Returns an authentication scope for the specified <code>host</code>, - * <code>port</code> and <code>realm</code>. - * - * @param host - * Host name or address. - * @param port - * Port number. - * @param realm - * Authentication realm. - */ - private static AuthScope getAuthScope(String host, int port, String realm) { + // Since the Configuration has not yet been set, + // then an unconfigured client is returned. + private static HttpClient client = new HttpClient(connectionManager); + private static String defaultUsername; + private static String defaultPassword; + private static String defaultRealm; + private static String defaultScheme; + private static String authFile; + private static String agentHost; + private static boolean authRulesRead = false; + private static Configuration conf; + + int maxThreadsTotal = 10; + + private String proxyUsername; + private String proxyPassword; + private String proxyRealm; + + private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>(); + + static { + FIELDS.add(WebPage.Field.MODIFIED_TIME); + FIELDS.add(WebPage.Field.HEADERS); + } + + @Override + public Collection<Field> getFields() { + return FIELDS; + } + + /** + * Returns the configured HTTP client. + * + * @return HTTP client + */ + static synchronized HttpClient getClient() { + return client; + } + + /** + * Constructs this plugin. + */ + public Http() { + super(LOG); + } + + /** + * Reads the configuration from the Nutch configuration files and sets the + * configuration. + * + * @param conf + * Configuration + */ + public void setConf(Configuration conf) { + super.setConf(conf); + Http.conf = conf; + this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10); + this.proxyUsername = conf.get("http.proxy.username", ""); + this.proxyPassword = conf.get("http.proxy.password", ""); + this.proxyRealm = conf.get("http.proxy.realm", ""); + agentHost = conf.get("http.agent.host", ""); + authFile = conf.get("http.auth.file", ""); + configureClient(); + try { + setCredentials(); + } catch (Exception ex) { + if (LOG.isErrorEnabled()) { + LOG.error("Could not read " + authFile + " : " + ex.getMessage()); + } + } + } + + /** + * Main method. + * + * @param args + * Command line arguments + */ + public static void main(String[] args) throws Exception { + Http http = new Http(); + http.setConf(NutchConfiguration.create()); + main(http, args); + } + + /** + * Fetches the <code>url</code> with a configured HTTP client and gets the + * response. + * + * @param url + * URL to be fetched + * @param datum + * Crawl data + * @param redirect + * Follow redirects if and only if true + * @return HTTP response + */ + protected Response getResponse(URL url, WebPage page, boolean redirect) + throws ProtocolException, IOException { + resolveCredentials(url); + return new HttpResponse(this, url, page, redirect); + } + + /** + * Configures the HTTP client + */ + private void configureClient() { + + // Set up an HTTPS socket factory that accepts self-signed certs. + ProtocolSocketFactory factory = new SSLProtocolSocketFactory(); + Protocol https = new Protocol("https", factory, 443); + Protocol.registerProtocol("https", https); + + HttpConnectionManagerParams params = connectionManager.getParams(); + params.setConnectionTimeout(timeout); + params.setSoTimeout(timeout); + params.setSendBufferSize(BUFFER_SIZE); + params.setReceiveBufferSize(BUFFER_SIZE); + params.setMaxTotalConnections(maxThreadsTotal); + + // Also set max connections per host to maxThreadsTotal since all threads + // might be used to fetch from the same host - otherwise timeout errors can + // occur + params.setDefaultMaxConnectionsPerHost(maxThreadsTotal); + + // executeMethod(HttpMethod) seems to ignore the connection timeout on + // the connection manager. + // set it explicitly on the HttpClient. + client.getParams().setConnectionManagerTimeout(timeout); + + HostConfiguration hostConf = client.getHostConfiguration(); + ArrayList<Header> headers = new ArrayList<Header>(); + // Set the User Agent in the header + headers.add(new Header("User-Agent", userAgent)); + // prefer English + headers.add(new Header("Accept-Language", "en-us,en-gb,en;q=0.7,*;q=0.3")); + // prefer UTF-8 + headers.add(new Header("Accept-Charset", "utf-8,ISO-8859-1;q=0.7,*;q=0.7")); + // prefer understandable formats + headers + .add(new Header( + "Accept", + "text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5")); + // accept gzipped content + headers.add(new Header("Accept-Encoding", "x-gzip, gzip, deflate")); + hostConf.getParams().setParameter("http.default-headers", headers); + + // HTTP proxy server details + if (useProxy) { + hostConf.setProxy(proxyHost, proxyPort); + + if (proxyUsername.length() > 0) { + + AuthScope proxyAuthScope = getAuthScope(this.proxyHost, this.proxyPort, + this.proxyRealm); + + NTCredentials proxyCredentials = new NTCredentials(this.proxyUsername, + this.proxyPassword, Http.agentHost, this.proxyRealm); + + client.getState().setProxyCredentials(proxyAuthScope, proxyCredentials); + } + } + + } + + /** + * Reads authentication configuration file (defined as 'http.auth.file' in + * Nutch configuration file) and sets the credentials for the configured + * authentication scopes in the HTTP client object. + * + * @throws ParserConfigurationException + * If a document builder can not be created. + * @throws SAXException + * If any parsing error occurs. + * @throws IOException + * If any I/O error occurs. + */ + private static synchronized void setCredentials() + throws ParserConfigurationException, SAXException, IOException { + + if (authRulesRead) + return; + + authRulesRead = true; // Avoid re-attempting to read + + InputStream is = conf.getConfResourceAsInputStream(authFile); + if (is != null) { + Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder() + .parse(is); + + Element rootElement = doc.getDocumentElement(); + if (!"auth-configuration".equals(rootElement.getTagName())) { + if (LOG.isWarnEnabled()) + LOG.warn("Bad auth conf file: root element <" + + rootElement.getTagName() + "> found in " + authFile + + " - must be <auth-configuration>"); + } + + // For each set of credentials + NodeList credList = rootElement.getChildNodes(); + for (int i = 0; i < credList.getLength(); i++) { + Node credNode = credList.item(i); + if (!(credNode instanceof Element)) + continue; + + Element credElement = (Element) credNode; + if (!"credentials".equals(credElement.getTagName())) { + if (LOG.isWarnEnabled()) + LOG.warn("Bad auth conf file: Element <" + credElement.getTagName() + + "> not recognized in " + authFile + + " - expected <credentials>"); + continue; + } + + String username = credElement.getAttribute("username"); + String password = credElement.getAttribute("password"); + + // For each authentication scope + NodeList scopeList = credElement.getChildNodes(); + for (int j = 0; j < scopeList.getLength(); j++) { + Node scopeNode = scopeList.item(j); + if (!(scopeNode instanceof Element)) + continue; + + Element scopeElement = (Element) scopeNode; + + if ("default".equals(scopeElement.getTagName())) { + + // Determine realm and scheme, if any + String realm = scopeElement.getAttribute("realm"); + String scheme = scopeElement.getAttribute("scheme"); + + // Set default credentials + defaultUsername = username; + defaultPassword = password; + defaultRealm = realm; + defaultScheme = scheme; + + if (LOG.isTraceEnabled()) { + LOG.trace("Credentials - username: " + username + + "; set as default" + " for realm: " + realm + "; scheme: " + + scheme); + } + + } else if ("authscope".equals(scopeElement.getTagName())) { + + // Determine authentication scope details + String host = scopeElement.getAttribute("host"); + int port = -1; // For setting port to AuthScope.ANY_PORT + try { + port = Integer.parseInt(scopeElement.getAttribute("port")); + } catch (Exception ex) { + // do nothing, port is already set to any port + } + String realm = scopeElement.getAttribute("realm"); + String scheme = scopeElement.getAttribute("scheme"); + + // Set credentials for the determined scope + AuthScope authScope = getAuthScope(host, port, realm, scheme); + NTCredentials credentials = new NTCredentials(username, password, + agentHost, realm); + + client.getState().setCredentials(authScope, credentials); + + if (LOG.isTraceEnabled()) { + LOG.trace("Credentials - username: " + username + + "; set for AuthScope - " + "host: " + host + "; port: " + + port + "; realm: " + realm + "; scheme: " + scheme); + } + + } else { + if (LOG.isWarnEnabled()) + LOG.warn("Bad auth conf file: Element <" + + scopeElement.getTagName() + "> not recognized in " + + authFile + " - expected <authscope>"); + } + } + is.close(); + } + } + } + + /** + * If credentials for the authentication scope determined from the specified + * <code>url</code> is not already set in the HTTP client, then this method + * sets the default credentials to fetch the specified <code>url</code>. If + * credentials are found for the authentication scope, the method returns + * without altering the client. + * + * @param url + * URL to be fetched + */ + private void resolveCredentials(URL url) { + + if (defaultUsername != null && defaultUsername.length() > 0) { + + int port = url.getPort(); + if (port == -1) { + if ("https".equals(url.getProtocol())) + port = 443; + else + port = 80; + } + + AuthScope scope = new AuthScope(url.getHost(), port); + + if (client.getState().getCredentials(scope) != null) { + if (LOG.isTraceEnabled()) + LOG.trace("Pre-configured credentials with scope - host: " + + url.getHost() + "; port: " + port + "; found for url: " + url); + + // Credentials are already configured, so do nothing and return + return; + } + + if (LOG.isTraceEnabled()) + LOG.trace("Pre-configured credentials with scope - host: " + + url.getHost() + "; port: " + port + "; not found for url: " + url); + + AuthScope serverAuthScope = getAuthScope(url.getHost(), port, + defaultRealm, defaultScheme); + + NTCredentials serverCredentials = new NTCredentials(defaultUsername, + defaultPassword, agentHost, defaultRealm); + + client.getState().setCredentials(serverAuthScope, serverCredentials); + } + } + + /** + * Returns an authentication scope for the specified <code>host</code>, + * <code>port</code>, <code>realm</code> and <code>scheme</code>. + * + * @param host + * Host name or address. + * @param port + * Port number. + * @param realm + * Authentication realm. + * @param scheme + * Authentication scheme. + */ + private static AuthScope getAuthScope(String host, int port, String realm, + String scheme) { + + if (host.length() == 0) + host = null; + + if (port < 0) + port = -1; + + if (realm.length() == 0) + realm = null; + + if (scheme.length() == 0) + scheme = null; + + return new AuthScope(host, port, realm, scheme); + } + + /** + * Returns an authentication scope for the specified <code>host</code>, + * <code>port</code> and <code>realm</code>. + * + * @param host + * Host name or address. + * @param port + * Port number. + * @param realm + * Authentication realm. + */ + private static AuthScope getAuthScope(String host, int port, String realm) { - return getAuthScope(host, port, realm, ""); - } + return getAuthScope(host, port, realm, ""); + } } Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java (original) +++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java Fri Jan 9 06:34:33 2015 @@ -15,32 +15,31 @@ * limitations under the License. */ package org.apache.nutch.protocol.httpclient; - + import java.util.List; /** - * The base level of services required for Http Authentication - * + * The base level of services required for Http Authentication + * * @see HttpAuthenticationFactory * - * @author Matt Tencati + * @author Matt Tencati */ public interface HttpAuthentication { - /** - * Gets the credentials generated by the HttpAuthentication - * object. May return null. - * - * @return The credentials value - */ - public List getCredentials(); + /** + * Gets the credentials generated by the HttpAuthentication object. May return + * null. + * + * @return The credentials value + */ + public List getCredentials(); - /** - * Gets the realm used by the HttpAuthentication object during creation. - * - * @return The realm value - */ - public String getRealm(); + /** + * Gets the realm used by the HttpAuthentication object during creation. + * + * @return The realm value + */ + public String getRealm(); } - Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java (original) +++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java Fri Jan 9 06:34:33 2015 @@ -26,40 +26,46 @@ public class HttpAuthenticationException private static final long serialVersionUID = 1L; - /** - * Constructs a new exception with null as its detail message. - */ - public HttpAuthenticationException() { - super(); - } - - /** - * Constructs a new exception with the specified detail message. - * - * @param message the detail message. The detail message is saved for later retrieval by the {@link Throwable#getMessage()} method. - */ - public HttpAuthenticationException(String message) { - super(message); - } - - /** - * Constructs a new exception with the specified message and cause. - * - * @param message the detail message. The detail message is saved for later retrieval by the {@link Throwable#getMessage()} method. - * @param cause the cause (use {@link #getCause()} to retrieve the cause) - */ - public HttpAuthenticationException(String message, Throwable cause) { - super(message, cause); - } - - /** - * Constructs a new exception with the specified cause and detail message from - * given clause if it is not null. - * - * @param cause the cause (use {@link #getCause()} to retrieve the cause) - */ - public HttpAuthenticationException(Throwable cause) { - super(cause); - } + /** + * Constructs a new exception with null as its detail message. + */ + public HttpAuthenticationException() { + super(); + } + + /** + * Constructs a new exception with the specified detail message. + * + * @param message + * the detail message. The detail message is saved for later + * retrieval by the {@link Throwable#getMessage()} method. + */ + public HttpAuthenticationException(String message) { + super(message); + } + + /** + * Constructs a new exception with the specified message and cause. + * + * @param message + * the detail message. The detail message is saved for later + * retrieval by the {@link Throwable#getMessage()} method. + * @param cause + * the cause (use {@link #getCause()} to retrieve the cause) + */ + public HttpAuthenticationException(String message, Throwable cause) { + super(message, cause); + } + + /** + * Constructs a new exception with the specified cause and detail message from + * given clause if it is not null. + * + * @param cause + * the cause (use {@link #getCause()} to retrieve the cause) + */ + public HttpAuthenticationException(Throwable cause) { + super(cause); + } } Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java (original) +++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java Fri Jan 9 06:34:33 2015 @@ -34,12 +34,10 @@ import org.apache.hadoop.conf.Configurab // Nutch imports import org.apache.nutch.metadata.Metadata; - /** - * Provides the Http protocol implementation - * with the ability to authenticate when prompted. The goal is to provide - * multiple authentication types but for now just the {@link HttpBasicAuthentication} authentication - * type is provided. + * Provides the Http protocol implementation with the ability to authenticate + * when prompted. The goal is to provide multiple authentication types but for + * now just the {@link HttpBasicAuthentication} authentication type is provided. * * @see HttpBasicAuthentication * @see Http @@ -49,94 +47,96 @@ import org.apache.nutch.metadata.Metadat */ public class HttpAuthenticationFactory implements Configurable { - /** - * The HTTP Authentication (WWW-Authenticate) header which is returned - * by a webserver requiring authentication. - */ - public static final String WWW_AUTHENTICATE = "WWW-Authenticate"; - - public static final Logger LOG = LoggerFactory.getLogger(HttpAuthenticationFactory.class); - - private static Map<?, ?> auths = new TreeMap<Object, Object>(); - - private Configuration conf = null; - - - public HttpAuthenticationFactory(Configuration conf) { - setConf(conf); - } - - - /* ---------------------------------- * - * <implementation:Configurable> * - * ---------------------------------- */ - - public void setConf(Configuration conf) { - this.conf = conf; - //if (conf.getBoolean("http.auth.verbose", false)) { - // LOG.setLevel(Level.FINE); - //} else { - // LOG.setLevel(Level.WARNING); - //} - } - - public Configuration getConf() { - return conf; - } - - /* ---------------------------------- * - * <implementation:Configurable> * - * ---------------------------------- */ - - - @SuppressWarnings("unchecked") - public HttpAuthentication findAuthentication(Metadata header) { - - if (header == null) return null; - - try { - Collection challenge = null; - if (header instanceof Metadata) { - Object o = header.get(WWW_AUTHENTICATE); - if (o instanceof Collection) { - challenge = (Collection<?>) o; - } else { - challenge = new ArrayList<String>(); - challenge.add(o.toString()); - } - } else { - String challengeString = header.get(WWW_AUTHENTICATE); - if (challengeString != null) { - challenge = new ArrayList<Object>(); - challenge.add(challengeString); - } - } - if (challenge == null) { - if (LOG.isTraceEnabled()) { - LOG.trace("Authentication challenge is null"); - } - return null; - } - - Iterator<?> i = challenge.iterator(); - HttpAuthentication auth = null; - while (i.hasNext() && auth == null) { - String challengeString = (String)i.next(); - if (challengeString.equals("NTLM")) { - challengeString="Basic realm=techweb"; - } - - if (LOG.isTraceEnabled()) { - LOG.trace("Checking challengeString=" + challengeString); - } - auth = HttpBasicAuthentication.getAuthentication(challengeString, conf); - if (auth != null) return auth; - - //TODO Add additional Authentication lookups here - } - } catch (Exception e) { - LOG.error("Failed with following exception: ", e); - } + /** + * The HTTP Authentication (WWW-Authenticate) header which is returned by a + * webserver requiring authentication. + */ + public static final String WWW_AUTHENTICATE = "WWW-Authenticate"; + + public static final Logger LOG = LoggerFactory + .getLogger(HttpAuthenticationFactory.class); + + private static Map<?, ?> auths = new TreeMap<Object, Object>(); + + private Configuration conf = null; + + public HttpAuthenticationFactory(Configuration conf) { + setConf(conf); + } + + /* + * ---------------------------------- * <implementation:Configurable> * + * ---------------------------------- + */ + + public void setConf(Configuration conf) { + this.conf = conf; + // if (conf.getBoolean("http.auth.verbose", false)) { + // LOG.setLevel(Level.FINE); + // } else { + // LOG.setLevel(Level.WARNING); + // } + } + + public Configuration getConf() { + return conf; + } + + /* + * ---------------------------------- * <implementation:Configurable> * + * ---------------------------------- + */ + + @SuppressWarnings("unchecked") + public HttpAuthentication findAuthentication(Metadata header) { + + if (header == null) + return null; + + try { + Collection challenge = null; + if (header instanceof Metadata) { + Object o = header.get(WWW_AUTHENTICATE); + if (o instanceof Collection) { + challenge = (Collection<?>) o; + } else { + challenge = new ArrayList<String>(); + challenge.add(o.toString()); + } + } else { + String challengeString = header.get(WWW_AUTHENTICATE); + if (challengeString != null) { + challenge = new ArrayList<Object>(); + challenge.add(challengeString); + } + } + if (challenge == null) { + if (LOG.isTraceEnabled()) { + LOG.trace("Authentication challenge is null"); + } return null; + } + + Iterator<?> i = challenge.iterator(); + HttpAuthentication auth = null; + while (i.hasNext() && auth == null) { + String challengeString = (String) i.next(); + if (challengeString.equals("NTLM")) { + challengeString = "Basic realm=techweb"; + } + + if (LOG.isTraceEnabled()) { + LOG.trace("Checking challengeString=" + challengeString); + } + auth = HttpBasicAuthentication.getAuthentication(challengeString, conf); + if (auth != null) + return auth; + + // TODO Add additional Authentication lookups here + } + } catch (Exception e) { + LOG.error("Failed with following exception: ", e); } + return null; + } } Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java (original) +++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java Fri Jan 9 06:34:33 2015 @@ -35,154 +35,163 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configurable; - /** - * Implementation of RFC 2617 Basic Authentication. Usernames and passwords are stored - * in standard Nutch configuration files using the following properties: - * http.auth.basic.<realm>.user - * http.auth.basic.<realm>.pass + * Implementation of RFC 2617 Basic Authentication. Usernames and passwords are + * stored in standard Nutch configuration files using the following properties: + * http.auth.basic.<realm>.user http.auth.basic.<realm>.pass */ -public class HttpBasicAuthentication implements HttpAuthentication, Configurable { +public class HttpBasicAuthentication implements HttpAuthentication, + Configurable { - public static final Logger LOG = LoggerFactory.getLogger(HttpBasicAuthentication.class); + public static final Logger LOG = LoggerFactory + .getLogger(HttpBasicAuthentication.class); - private static Pattern basic = Pattern.compile("[bB][aA][sS][iI][cC] [rR][eE][aA][lL][mM]=\"(\\w*)\""); - - private static Map<String, HttpBasicAuthentication> authMap = new TreeMap<String, HttpBasicAuthentication>(); - - private Configuration conf = null; - private String challenge = null; - private ArrayList<String> credentials = null; - private String realm = null; - - - /** - * Construct an HttpBasicAuthentication for the given challenge - * parameters. The challenge parameters are returned by the web - * server using a WWW-Authenticate header. This will typically be - * represented by single line of the form <code>WWW-Authenticate: Basic realm="myrealm"</code> - * - * @param challenge WWW-Authenticate header from web server - */ - protected HttpBasicAuthentication(String challenge, Configuration conf) throws HttpAuthenticationException { - - setConf(conf); - this.challenge = challenge; - credentials = new ArrayList<String>(); - - String username = this.conf.get("http.auth.basic." + challenge + ".user"); - String password = this.conf.get("http.auth.basic." + challenge + ".password"); - - if (LOG.isTraceEnabled()) { - LOG.trace("BasicAuthentication challenge is " + challenge); - LOG.trace("BasicAuthentication username=" + username); - LOG.trace("BasicAuthentication password=" + password); - } - - if (username == null) { - throw new HttpAuthenticationException("Username for " + challenge + " is null"); - } + private static Pattern basic = Pattern + .compile("[bB][aA][sS][iI][cC] [rR][eE][aA][lL][mM]=\"(\\w*)\""); - if (password == null) { - throw new HttpAuthenticationException("Password for " + challenge + " is null"); + private static Map<String, HttpBasicAuthentication> authMap = new TreeMap<String, HttpBasicAuthentication>(); + + private Configuration conf = null; + private String challenge = null; + private ArrayList<String> credentials = null; + private String realm = null; + + /** + * Construct an HttpBasicAuthentication for the given challenge parameters. + * The challenge parameters are returned by the web server using a + * WWW-Authenticate header. This will typically be represented by single line + * of the form <code>WWW-Authenticate: Basic realm="myrealm"</code> + * + * @param challenge + * WWW-Authenticate header from web server + */ + protected HttpBasicAuthentication(String challenge, Configuration conf) + throws HttpAuthenticationException { + + setConf(conf); + this.challenge = challenge; + credentials = new ArrayList<String>(); + + String username = this.conf.get("http.auth.basic." + challenge + ".user"); + String password = this.conf.get("http.auth.basic." + challenge + + ".password"); + + if (LOG.isTraceEnabled()) { + LOG.trace("BasicAuthentication challenge is " + challenge); + LOG.trace("BasicAuthentication username=" + username); + LOG.trace("BasicAuthentication password=" + password); + } + + if (username == null) { + throw new HttpAuthenticationException("Username for " + challenge + + " is null"); + } + + if (password == null) { + throw new HttpAuthenticationException("Password for " + challenge + + " is null"); + } + + byte[] credBytes = (username + ":" + password).getBytes(); + credentials.add("Authorization: Basic " + + new String(Base64.encodeBase64(credBytes))); + if (LOG.isTraceEnabled()) { + LOG.trace("Basic credentials: " + credentials); + } + } + + /* + * ---------------------------------- * <implementation:Configurable> * + * ---------------------------------- + */ + + public void setConf(Configuration conf) { + this.conf = conf; + // if (conf.getBoolean("http.auth.verbose", false)) { + // LOG.setLevel(Level.FINE); + // } else { + // LOG.setLevel(Level.WARNING); + // } + } + + public Configuration getConf() { + return this.conf; + } + + /* + * ---------------------------------- * <implementation:Configurable> * + * ---------------------------------- + */ + + /** + * Gets the Basic credentials generated by this HttpBasicAuthentication object + * + * @return Credentials in the form of + * <code>Authorization: Basic <Base64 encoded userid:password> + * + */ + public List<String> getCredentials() { + return credentials; + } + + /** + * Gets the realm attribute of the HttpBasicAuthentication object. This should + * have been supplied to the {@link #getAuthentication(String, Configuration)} + * static method + * + * @return The realm + */ + public String getRealm() { + return realm; + } + + /** + * This method is responsible for providing Basic authentication information. + * The method caches authentication information for each realm so that the + * required authentication information does not need to be regenerated for + * every request. + * + * @param challenge + * The challenge string provided by the webserver. This is the text + * which follows the WWW-Authenticate header, including the Basic + * tag. + * @return An HttpBasicAuthentication object or null if unable to generate + * appropriate credentials. + */ + public static HttpBasicAuthentication getAuthentication(String challenge, + Configuration conf) { + if (challenge == null) + return null; + Matcher basicMatcher = basic.matcher(challenge); + if (basicMatcher.matches()) { + String realm = basicMatcher.group(1); + Object auth = authMap.get(realm); + if (auth == null) { + HttpBasicAuthentication newAuth = null; + try { + newAuth = new HttpBasicAuthentication(realm, conf); + } catch (HttpAuthenticationException hae) { + if (LOG.isTraceEnabled()) { + LOG.trace("HttpBasicAuthentication failed for " + challenge); + } } - - byte[] credBytes = (username + ":" + password).getBytes(); - credentials.add("Authorization: Basic " + new String(Base64.encodeBase64(credBytes))); - if (LOG.isTraceEnabled()) { - LOG.trace("Basic credentials: " + credentials); - } - } - - - /* ---------------------------------- * - * <implementation:Configurable> * - * ---------------------------------- */ - - public void setConf(Configuration conf) { - this.conf = conf; - //if (conf.getBoolean("http.auth.verbose", false)) { - // LOG.setLevel(Level.FINE); - //} else { - // LOG.setLevel(Level.WARNING); - //} - } - - public Configuration getConf() { - return this.conf; - } - - /* ---------------------------------- * - * <implementation:Configurable> * - * ---------------------------------- */ - - - /** - * Gets the Basic credentials generated by this - * HttpBasicAuthentication object - * - * @return Credentials in the form of <code>Authorization: Basic <Base64 encoded userid:password> - * - */ - public List<String> getCredentials() { - return credentials; - } - - - /** - * Gets the realm attribute of the HttpBasicAuthentication object. - * This should have been supplied to the {@link #getAuthentication(String, Configuration)} - * static method - * - * @return The realm - */ - public String getRealm() { - return realm; - } - - /** - * This method is responsible for providing Basic authentication information. The - * method caches authentication information for each realm so that the required - * authentication information does not need to be regenerated for every request. - * - * @param challenge The challenge string provided by the webserver. This is the - * text which follows the WWW-Authenticate header, including the Basic tag. - * @return An HttpBasicAuthentication object or null - * if unable to generate appropriate credentials. - */ - public static HttpBasicAuthentication getAuthentication(String challenge, Configuration conf) { - if (challenge == null) return null; - Matcher basicMatcher = basic.matcher(challenge); - if (basicMatcher.matches()) { - String realm = basicMatcher.group(1); - Object auth = authMap.get(realm); - if (auth == null) { - HttpBasicAuthentication newAuth = null; - try { - newAuth = new HttpBasicAuthentication(realm, conf); - } catch (HttpAuthenticationException hae) { - if (LOG.isTraceEnabled()) { - LOG.trace("HttpBasicAuthentication failed for " + challenge); - } - } - authMap.put(realm, newAuth); - return newAuth; - } else { - return (HttpBasicAuthentication) auth; - } - } - return null; - } - - /** - * Provides a pattern which can be used by an outside resource to determine if - * this class can provide credentials based on simple header information. It does - * not calculate any information regarding realms or challenges. - * - * @return Returns a Pattern which will match a Basic WWW-Authenticate header. - */ - public static final Pattern getBasicPattern() { - return basic; - } + authMap.put(realm, newAuth); + return newAuth; + } else { + return (HttpBasicAuthentication) auth; + } + } + return null; + } + + /** + * Provides a pattern which can be used by an outside resource to determine if + * this class can provide credentials based on simple header information. It + * does not calculate any information regarding realms or challenges. + * + * @return Returns a Pattern which will match a Basic WWW-Authenticate header. + */ + public static final Pattern getBasicPattern() { + return basic; + } } - Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java?rev=1650447&r1=1650446&r2=1650447&view=diff ============================================================================== --- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java (original) +++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java Fri Jan 9 06:34:33 2015 @@ -41,7 +41,7 @@ import org.apache.nutch.storage.WebPage; /** * An HTTP response. - * + * * @author Susam Pal */ public class HttpResponse implements Response { @@ -53,18 +53,22 @@ public class HttpResponse implements Res /** * Fetches the given <code>url</code> and prepares HTTP response. - * - * @param http An instance of the implementation class - * of this plugin - * @param url URL to be fetched - * @param page WebPage - * @param followRedirects Whether to follow redirects; follows - * redirect if and only if this is true - * @return HTTP response - * @throws IOException When an error occurs + * + * @param http + * An instance of the implementation class of this plugin + * @param url + * URL to be fetched + * @param page + * WebPage + * @param followRedirects + * Whether to follow redirects; follows redirect if and only if this + * is true + * @return HTTP response + * @throws IOException + * When an error occurs */ - HttpResponse(Http http, URL url, WebPage page, - boolean followRedirects) throws IOException { + HttpResponse(Http http, URL url, WebPage page, boolean followRedirects) + throws IOException { // Prepare GET method for HTTP request this.url = url; @@ -99,7 +103,7 @@ public class HttpResponse implements Res for (int i = 0; i < heads.length; i++) { headers.set(heads[i].getName(), heads[i].getValue()); } - + // Limit download size int contentLength = Integer.MAX_VALUE; String contentLengthString = headers.get(Response.CONTENT_LENGTH); @@ -107,12 +111,10 @@ public class HttpResponse implements Res try { contentLength = Integer.parseInt(contentLengthString.trim()); } catch (NumberFormatException ex) { - throw new HttpException("bad content length: " + - contentLengthString); + throw new HttpException("bad content length: " + contentLengthString); } } - if (http.getMaxContent() >= 0 && - contentLength > http.getMaxContent()) { + if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) { contentLength = http.getMaxContent(); } @@ -132,7 +134,8 @@ public class HttpResponse implements Res content = out.toByteArray(); } catch (Exception e) { - if (code == 200) throw new IOException(e.toString()); + if (code == 200) + throw new IOException(e.toString()); // for codes other than 200 OK, we are fine with empty content } finally { if (in != null) { @@ -140,16 +143,15 @@ public class HttpResponse implements Res } get.abort(); } - + StringBuilder fetchTrace = null; if (Http.LOG.isTraceEnabled()) { // Trace message - fetchTrace = new StringBuilder("url: " + url + - "; status code: " + code + - "; bytes received: " + content.length); + fetchTrace = new StringBuilder("url: " + url + "; status code: " + code + + "; bytes received: " + content.length); if (getHeader(Response.CONTENT_LENGTH) != null) - fetchTrace.append("; Content-Length: " + - getHeader(Response.CONTENT_LENGTH)); + fetchTrace.append("; Content-Length: " + + getHeader(Response.CONTENT_LENGTH)); if (getHeader(Response.LOCATION) != null) fetchTrace.append("; Location: " + getHeader(Response.LOCATION)); } @@ -159,8 +161,7 @@ public class HttpResponse implements Res String contentEncoding = headers.get(Response.CONTENT_ENCODING); if (contentEncoding != null && Http.LOG.isTraceEnabled()) fetchTrace.append("; Content-Encoding: " + contentEncoding); - if ("gzip".equals(contentEncoding) || - "x-gzip".equals(contentEncoding)) { + if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) { content = http.processGzipEncoded(content, url); if (Http.LOG.isTraceEnabled()) fetchTrace.append("; extracted to " + content.length + " bytes"); @@ -170,14 +171,14 @@ public class HttpResponse implements Res fetchTrace.append("; extracted to " + content.length + " bytes"); } } - + // add headers in metadata to row - if (page.getHeaders() != null) { - page.getHeaders().clear(); - } - for (String key : headers.names()) { - page.getHeaders().put(new Utf8(key), new Utf8(headers.get(key))); - } + if (page.getHeaders() != null) { + page.getHeaders().clear(); + } + for (String key : headers.names()) { + page.getHeaders().put(new Utf8(key), new Utf8(headers.get(key))); + } // Logger trace message if (Http.LOG.isTraceEnabled()) { @@ -188,15 +189,15 @@ public class HttpResponse implements Res } } - - /* ------------------------- * - * <implementation:Response> * - * ------------------------- */ - + /* + * ------------------------- * <implementation:Response> * + * ------------------------- + */ + public URL getUrl() { return url; } - + public int getCode() { return code; } @@ -204,7 +205,7 @@ public class HttpResponse implements Res public String getHeader(String name) { return headers.get(name); } - + public Metadata getHeaders() { return headers; } @@ -213,8 +214,8 @@ public class HttpResponse implements Res return content; } - /* -------------------------- * - * </implementation:Response> * - * -------------------------- */ + /* + * -------------------------- * </implementation:Response> * + * -------------------------- + */ } -