This is 4+megs of jars. Is an addition of this size really necessary
for this task?

davep

On Wed, Jun 18, 2008 at 1:20 AM,  <[EMAIL PROTECTED]> wrote:
> Author: etnu
> Date: Wed Jun 18 01:19:58 2008
> New Revision: 669108
>
> URL: http://svn.apache.org/viewvc?rev=669108&view=rev
> Log:
> Applied SHINDIG-391, which adds more robust character encoding detection for 
> HttpResponse using ICU, from Patrick Fairbank.
>
>
> Modified:
>    incubator/shindig/trunk/java/gadgets/pom.xml
>    
> incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
>    
> incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
>    incubator/shindig/trunk/pom.xml
>
> Modified: incubator/shindig/trunk/java/gadgets/pom.xml
> URL: 
> http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/pom.xml?rev=669108&r1=669107&r2=669108&view=diff
> ==============================================================================
> --- incubator/shindig/trunk/java/gadgets/pom.xml (original)
> +++ incubator/shindig/trunk/java/gadgets/pom.xml Wed Jun 18 01:19:58 2008
> @@ -135,7 +135,7 @@
>       <artifactId>core</artifactId>
>       <scope>compile</scope>
>     </dependency>
> -               <dependency>
> +    <dependency>
>       <groupId>com.google.code.google-collections</groupId>
>       <artifactId>google-collect</artifactId>
>     </dependency>
> @@ -153,9 +153,14 @@
>       <artifactId>jetty</artifactId>
>       <scope>test</scope>
>     </dependency>
> -               <dependency>
> +    <dependency>
>       <groupId>rome</groupId>
>       <artifactId>rome</artifactId>
>     </dependency>
> +    <dependency>
> +      <groupId>com.ibm.icu</groupId>
> +      <artifactId>icu4j</artifactId>
> +      <scope>compile</scope>
> +    </dependency>
>   </dependencies>
>  </project>
>
> Modified: 
> incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
> URL: 
> http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java?rev=669108&r1=669107&r2=669108&view=diff
> ==============================================================================
> --- 
> incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
>  (original)
> +++ 
> incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
>  Wed Jun 18 01:19:58 2008
> @@ -19,6 +19,9 @@
>
>  import org.apache.shindig.common.util.DateUtil;
>
> +import com.ibm.icu.text.CharsetDetector;
> +import com.ibm.icu.text.CharsetMatch;
> +
>  import java.io.ByteArrayInputStream;
>  import java.io.InputStream;
>  import java.nio.ByteBuffer;
> @@ -28,8 +31,10 @@
>  import java.util.Collections;
>  import java.util.Date;
>  import java.util.HashMap;
> +import java.util.HashSet;
>  import java.util.List;
>  import java.util.Map;
> +import java.util.Set;
>  import java.util.TreeMap;
>  import java.util.concurrent.ConcurrentHashMap;
>
> @@ -42,6 +47,9 @@
>   public final static int SC_NOT_FOUND = 404;
>   public final static int SC_INTERNAL_SERVER_ERROR = 500;
>   public final static int SC_TIMEOUT = 504;
> +  private final static Set<String> BINARY_CONTENT_TYPES = new 
> HashSet<String>(Arrays.asList(
> +    "image/jpeg", "image/png", "image/gif", "image/jpg", 
> "application/x-shockwave-flash"
> +  ));
>
>   private final int httpStatusCode;
>   private static final String DEFAULT_ENCODING = "UTF-8";
> @@ -142,6 +150,9 @@
>     String contentType = getHeader("Content-Type");
>     if (contentType != null) {
>       String[] parts = contentType.split(";");
> +      if (BINARY_CONTENT_TYPES.contains(parts[0])) {
> +        return DEFAULT_ENCODING;
> +      }
>       if (parts.length == 2) {
>         int offset = parts[1].indexOf("charset=");
>         if (offset != -1) {
> @@ -149,7 +160,12 @@
>         }
>       }
>     }
> -    return DEFAULT_ENCODING;
> +
> +    // If the header doesn't specify the charset, try to determine it by 
> examining the content.
> +    CharsetDetector detector = new CharsetDetector();
> +    detector.setText(responseBytes);
> +    CharsetMatch match = detector.detect();
> +    return match.getName().toUpperCase();
>   }
>
>   public int getHttpStatusCode() {
>
> Modified: 
> incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
> URL: 
> http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java?rev=669108&r1=669107&r2=669108&view=diff
> ==============================================================================
> --- 
> incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
>  (original)
> +++ 
> incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
>  Wed Jun 18 01:19:58 2008
> @@ -25,12 +25,30 @@
>
>  import java.util.Arrays;
>  import java.util.HashMap;
> -import java.util.Iterator;
>  import java.util.LinkedList;
>  import java.util.List;
>  import java.util.Map;
>
>  public class HttpResponseTest extends TestCase {
> +  private final static byte[] UTF8_DATA = new byte[] {
> +    (byte)0xEF, (byte)0xBB, (byte)0xBF, 'h', 'e', 'l', 'l', 'o'
> +  };
> +  private final static String UTF8_STRING = "hello";
> +
> +  // A large string is needed for accurate charset detection.
> +  private final static byte[] LATIN1_DATA = new byte[] {
> +    'G', 'a', 'm', 'e', 's', ',', ' ', 'H', 'Q', ',', ' ', 'M', 'a', 'n', 
> 'g', (byte)0xE1, ',', ' ',
> +    'A', 'n', 'i', 'm', 'e', ' ', 'e', ' ', 't', 'u', 'd', 'o', ' ', 'q', 
> 'u', 'e', ' ', 'u', 'm',
> +    ' ', 'b', 'o', 'm', ' ', 'n', 'e', 'r', 'd', ' ', 'a', 'm', 'a'
> +  };
> +  private final static String LATIN1_STRING
> +      = "Games, HQ, Mang\u00E1, Anime e tudo que um bom nerd ama";
> +
> +  private final static byte[] BIG5_DATA = new byte[] {
> +    (byte)0xa7, (byte)0x41, (byte)0xa6, (byte)0x6e
> +  };
> +  private final static String BIG5_STRING = "\u4F60\u597D";
> +
>   private Map<String, List<String>> headers;
>
>   @Override
> @@ -56,32 +74,60 @@
>
>   public void testEncodingDetectionUtf8WithBom() throws Exception {
>     // Input is UTF-8 with BOM.
> -    byte[] data = new byte[] {
> -      (byte)0xEF, (byte)0xBB, (byte)0xBF, 'h', 'e', 'l', 'l', 'o'
> -    };
>     addHeader("Content-Type", "text/plain; charset=UTF-8");
> -    HttpResponse response = new HttpResponse(200, data, headers);
> -    assertEquals("hello", response.getResponseAsString());
> +    HttpResponse response = new HttpResponse(200, UTF8_DATA, headers);
> +    assertEquals(UTF8_STRING, response.getResponseAsString());
>   }
>
>   public void testEncodingDetectionLatin1() throws Exception {
>     // Input is a basic latin-1 string with 1 non-UTF8 compatible char.
> -    byte[] data = new byte[] {
> -      'h', (byte)0xE9, 'l', 'l', 'o'
> -    };
>     addHeader("Content-Type", "text/plain; charset=iso-8859-1");
> -    HttpResponse response = new HttpResponse(200, data, headers);
> -    assertEquals("h\u00E9llo", response.getResponseAsString());
> +    HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
> +    assertEquals(LATIN1_STRING, response.getResponseAsString());
>   }
>
>   public void testEncodingDetectionBig5() throws Exception {
> -    byte[] data = new byte[] {
> -      (byte)0xa7, (byte)0x41, (byte)0xa6, (byte)0x6e
> -    };
>     addHeader("Content-Type", "text/plain; charset=BIG5");
> -    HttpResponse response = new HttpResponse(200, data, headers);
> -    String resp = response.getResponseAsString();
> -    assertEquals("\u4F60\u597D", response.getResponseAsString());
> +    HttpResponse response = new HttpResponse(200, BIG5_DATA, headers);
> +    assertEquals(BIG5_STRING, response.getResponseAsString());
> +  }
> +
> +  public void testEncodingDetectionUtf8WithBomNoCharsetSpecified() throws 
> Exception {
> +    addHeader("Content-Type", "text/plain");
> +    HttpResponse response = new HttpResponse(200, UTF8_DATA, headers);
> +    assertEquals("UTF-8", response.getEncoding().toUpperCase());
> +    assertEquals(UTF8_STRING, response.getResponseAsString());
> +  }
> +
> +  public void testEncodingDetectionLatin1NoCharsetSpecified() throws 
> Exception {
> +    addHeader("Content-Type", "text/plain;");
> +    HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
> +    assertEquals("ISO-8859-1", response.getEncoding().toUpperCase());
> +    assertEquals(LATIN1_STRING, response.getResponseAsString());
> +  }
> +
> +  public void testEncodingDetectionUtf8WithBomNoContentHeader() throws 
> Exception {
> +    HttpResponse response = new HttpResponse(200, UTF8_DATA, headers);
> +    assertEquals("UTF-8", response.getEncoding().toUpperCase());
> +    assertEquals(UTF8_STRING, response.getResponseAsString());
> +  }
> +
> +  public void testEncodingDetectionLatin1NoContentHeader() throws Exception {
> +    HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
> +    assertEquals("ISO-8859-1", response.getEncoding().toUpperCase());
> +    assertEquals(LATIN1_STRING, response.getResponseAsString());
> +  }
> +
> +  public void testGetEncodingForImageContentType() throws Exception {
> +    addHeader("Content-Type", "image/png; charset=iso-8859-1");
> +    HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
> +    assertEquals("UTF-8", response.getEncoding().toUpperCase());
> +  }
> +
> +  public void testGetEncodingForFlashContentType() throws Exception {
> +    addHeader("Content-Type", "application/x-shockwave-flash; 
> charset=iso-8859-1");
> +    HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
> +    assertEquals("UTF-8", response.getEncoding().toUpperCase());
>   }
>
>   public void testPreserveBinaryData() throws Exception {
> @@ -98,12 +144,14 @@
>     addHeader("Cache-Control", "no-cache");
>     HttpResponse response = new HttpResponse(200, new byte[0], headers);
>     assertTrue(response.isStrictNoCache());
> +    assertEquals(-1, response.getCacheExpiration());
>   }
>
>   public void testStrictPragmaNoCache() throws Exception {
>     addHeader("Pragma", "no-cache");
>     HttpResponse response = new HttpResponse(200, new byte[0], headers);
>     assertTrue(response.isStrictNoCache());
> +    assertEquals(-1, response.getCacheExpiration());
>   }
>
>   public void testStrictPragmaJunk() throws Exception {
> @@ -118,7 +166,17 @@
>     long time = ((System.currentTimeMillis() / 1000) * 1000) + 10000L;
>     addHeader("Expires", DateUtil.formatDate(time));
>     HttpResponse response = new HttpResponse(200, new byte[0], headers);
> -    assertEquals(time, response.getExpiration());
> +    assertEquals(time, response.getCacheExpiration());
> +  }
> +
> +  public void testMaxAge() throws Exception {
> +    int maxAge = 10;
> +    long expected = ((System.currentTimeMillis() / 1000) * 1000) + (maxAge * 
> 1000);
> +    addHeader("Cache-Control", "public, max-age=" + maxAge);
> +    HttpResponse response = new HttpResponse(200, new byte[0], headers);
> +    long expiration = response.getCacheExpiration();
> +    assertTrue("getExpiration is less than start time + maxAge", expiration 
> >= expected);
> +    assertTrue("getExpiration is too high.", expiration <= expected + 1000);
>   }
>
>   public void testNegativeCaching() {
>
> Modified: incubator/shindig/trunk/pom.xml
> URL: 
> http://svn.apache.org/viewvc/incubator/shindig/trunk/pom.xml?rev=669108&r1=669107&r2=669108&view=diff
> ==============================================================================
> --- incubator/shindig/trunk/pom.xml (original)
> +++ incubator/shindig/trunk/pom.xml Wed Jun 18 01:19:58 2008
> @@ -832,11 +832,16 @@
>         <artifactId>joda-time</artifactId>
>         <version>1.5.2</version>
>       </dependency>
> -                       <dependency>
> +      <dependency>
>         <groupId>rome</groupId>
>         <artifactId>rome</artifactId>
>         <version>0.9</version>
> -                 </dependency>
> +      </dependency>
> +      <dependency>
> +        <groupId>com.ibm.icu</groupId>
> +        <artifactId>icu4j</artifactId>
> +        <version>3.8</version>
> +      </dependency>
>     </dependencies>
>   </dependencyManagement>
>  </project>
>
>
>

Reply via email to