Author: etnu
Date: Wed Jun 18 01:19:58 2008
New Revision: 669108

URL: http://svn.apache.org/viewvc?rev=669108&view=rev
Log:
Applied SHINDIG-391, which adds more robust character encoding detection for 
HttpResponse using ICU, from Patrick Fairbank.


Modified:
    incubator/shindig/trunk/java/gadgets/pom.xml
    
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
    
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
    incubator/shindig/trunk/pom.xml

Modified: incubator/shindig/trunk/java/gadgets/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/pom.xml?rev=669108&r1=669107&r2=669108&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/pom.xml (original)
+++ incubator/shindig/trunk/java/gadgets/pom.xml Wed Jun 18 01:19:58 2008
@@ -135,7 +135,7 @@
       <artifactId>core</artifactId>
       <scope>compile</scope>
     </dependency>
-               <dependency>
+    <dependency>
       <groupId>com.google.code.google-collections</groupId>
       <artifactId>google-collect</artifactId>
     </dependency>
@@ -153,9 +153,14 @@
       <artifactId>jetty</artifactId>
       <scope>test</scope>
     </dependency>
-               <dependency>
+    <dependency>
       <groupId>rome</groupId>
       <artifactId>rome</artifactId>
     </dependency>
+    <dependency>
+      <groupId>com.ibm.icu</groupId>
+      <artifactId>icu4j</artifactId>
+      <scope>compile</scope>
+    </dependency>
   </dependencies>
 </project>

Modified: 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java?rev=669108&r1=669107&r2=669108&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
 Wed Jun 18 01:19:58 2008
@@ -19,6 +19,9 @@
 
 import org.apache.shindig.common.util.DateUtil;
 
+import com.ibm.icu.text.CharsetDetector;
+import com.ibm.icu.text.CharsetMatch;
+
 import java.io.ByteArrayInputStream;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
@@ -28,8 +31,10 @@
 import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.TreeMap;
 import java.util.concurrent.ConcurrentHashMap;
 
@@ -42,6 +47,9 @@
   public final static int SC_NOT_FOUND = 404;
   public final static int SC_INTERNAL_SERVER_ERROR = 500;
   public final static int SC_TIMEOUT = 504;
+  private final static Set<String> BINARY_CONTENT_TYPES = new 
HashSet<String>(Arrays.asList(
+    "image/jpeg", "image/png", "image/gif", "image/jpg", 
"application/x-shockwave-flash"
+  ));
 
   private final int httpStatusCode;
   private static final String DEFAULT_ENCODING = "UTF-8";
@@ -142,6 +150,9 @@
     String contentType = getHeader("Content-Type");
     if (contentType != null) {
       String[] parts = contentType.split(";");
+      if (BINARY_CONTENT_TYPES.contains(parts[0])) {
+        return DEFAULT_ENCODING;
+      }
       if (parts.length == 2) {
         int offset = parts[1].indexOf("charset=");
         if (offset != -1) {
@@ -149,7 +160,12 @@
         }
       }
     }
-    return DEFAULT_ENCODING;
+
+    // If the header doesn't specify the charset, try to determine it by 
examining the content.
+    CharsetDetector detector = new CharsetDetector();
+    detector.setText(responseBytes);
+    CharsetMatch match = detector.detect();
+    return match.getName().toUpperCase();
   }
 
   public int getHttpStatusCode() {

Modified: 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java?rev=669108&r1=669107&r2=669108&view=diff
==============================================================================
--- 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
 (original)
+++ 
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
 Wed Jun 18 01:19:58 2008
@@ -25,12 +25,30 @@
 
 import java.util.Arrays;
 import java.util.HashMap;
-import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 
 public class HttpResponseTest extends TestCase {
+  private final static byte[] UTF8_DATA = new byte[] {
+    (byte)0xEF, (byte)0xBB, (byte)0xBF, 'h', 'e', 'l', 'l', 'o'
+  };
+  private final static String UTF8_STRING = "hello";
+
+  // A large string is needed for accurate charset detection.
+  private final static byte[] LATIN1_DATA = new byte[] {
+    'G', 'a', 'm', 'e', 's', ',', ' ', 'H', 'Q', ',', ' ', 'M', 'a', 'n', 'g', 
(byte)0xE1, ',', ' ',
+    'A', 'n', 'i', 'm', 'e', ' ', 'e', ' ', 't', 'u', 'd', 'o', ' ', 'q', 'u', 
'e', ' ', 'u', 'm',
+    ' ', 'b', 'o', 'm', ' ', 'n', 'e', 'r', 'd', ' ', 'a', 'm', 'a'
+  };
+  private final static String LATIN1_STRING
+      = "Games, HQ, Mang\u00E1, Anime e tudo que um bom nerd ama";
+
+  private final static byte[] BIG5_DATA = new byte[] {
+    (byte)0xa7, (byte)0x41, (byte)0xa6, (byte)0x6e
+  };
+  private final static String BIG5_STRING = "\u4F60\u597D";
+
   private Map<String, List<String>> headers;
 
   @Override
@@ -56,32 +74,60 @@
 
   public void testEncodingDetectionUtf8WithBom() throws Exception {
     // Input is UTF-8 with BOM.
-    byte[] data = new byte[] {
-      (byte)0xEF, (byte)0xBB, (byte)0xBF, 'h', 'e', 'l', 'l', 'o'
-    };
     addHeader("Content-Type", "text/plain; charset=UTF-8");
-    HttpResponse response = new HttpResponse(200, data, headers);
-    assertEquals("hello", response.getResponseAsString());
+    HttpResponse response = new HttpResponse(200, UTF8_DATA, headers);
+    assertEquals(UTF8_STRING, response.getResponseAsString());
   }
 
   public void testEncodingDetectionLatin1() throws Exception {
     // Input is a basic latin-1 string with 1 non-UTF8 compatible char.
-    byte[] data = new byte[] {
-      'h', (byte)0xE9, 'l', 'l', 'o'
-    };
     addHeader("Content-Type", "text/plain; charset=iso-8859-1");
-    HttpResponse response = new HttpResponse(200, data, headers);
-    assertEquals("h\u00E9llo", response.getResponseAsString());
+    HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
+    assertEquals(LATIN1_STRING, response.getResponseAsString());
   }
 
   public void testEncodingDetectionBig5() throws Exception {
-    byte[] data = new byte[] {
-      (byte)0xa7, (byte)0x41, (byte)0xa6, (byte)0x6e
-    };
     addHeader("Content-Type", "text/plain; charset=BIG5");
-    HttpResponse response = new HttpResponse(200, data, headers);
-    String resp = response.getResponseAsString();
-    assertEquals("\u4F60\u597D", response.getResponseAsString());
+    HttpResponse response = new HttpResponse(200, BIG5_DATA, headers);
+    assertEquals(BIG5_STRING, response.getResponseAsString());
+  }
+
+  public void testEncodingDetectionUtf8WithBomNoCharsetSpecified() throws 
Exception {
+    addHeader("Content-Type", "text/plain");
+    HttpResponse response = new HttpResponse(200, UTF8_DATA, headers);
+    assertEquals("UTF-8", response.getEncoding().toUpperCase());
+    assertEquals(UTF8_STRING, response.getResponseAsString());
+  }
+
+  public void testEncodingDetectionLatin1NoCharsetSpecified() throws Exception 
{
+    addHeader("Content-Type", "text/plain;");
+    HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
+    assertEquals("ISO-8859-1", response.getEncoding().toUpperCase());
+    assertEquals(LATIN1_STRING, response.getResponseAsString());
+  }
+
+  public void testEncodingDetectionUtf8WithBomNoContentHeader() throws 
Exception {
+    HttpResponse response = new HttpResponse(200, UTF8_DATA, headers);
+    assertEquals("UTF-8", response.getEncoding().toUpperCase());
+    assertEquals(UTF8_STRING, response.getResponseAsString());
+  }
+
+  public void testEncodingDetectionLatin1NoContentHeader() throws Exception {
+    HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
+    assertEquals("ISO-8859-1", response.getEncoding().toUpperCase());
+    assertEquals(LATIN1_STRING, response.getResponseAsString());
+  }
+
+  public void testGetEncodingForImageContentType() throws Exception {
+    addHeader("Content-Type", "image/png; charset=iso-8859-1");
+    HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
+    assertEquals("UTF-8", response.getEncoding().toUpperCase());
+  }
+
+  public void testGetEncodingForFlashContentType() throws Exception {
+    addHeader("Content-Type", "application/x-shockwave-flash; 
charset=iso-8859-1");
+    HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
+    assertEquals("UTF-8", response.getEncoding().toUpperCase());
   }
 
   public void testPreserveBinaryData() throws Exception {
@@ -98,12 +144,14 @@
     addHeader("Cache-Control", "no-cache");
     HttpResponse response = new HttpResponse(200, new byte[0], headers);
     assertTrue(response.isStrictNoCache());
+    assertEquals(-1, response.getCacheExpiration());
   }
 
   public void testStrictPragmaNoCache() throws Exception {
     addHeader("Pragma", "no-cache");
     HttpResponse response = new HttpResponse(200, new byte[0], headers);
     assertTrue(response.isStrictNoCache());
+    assertEquals(-1, response.getCacheExpiration());
   }
 
   public void testStrictPragmaJunk() throws Exception {
@@ -118,7 +166,17 @@
     long time = ((System.currentTimeMillis() / 1000) * 1000) + 10000L;
     addHeader("Expires", DateUtil.formatDate(time));
     HttpResponse response = new HttpResponse(200, new byte[0], headers);
-    assertEquals(time, response.getExpiration());
+    assertEquals(time, response.getCacheExpiration());
+  }
+
+  public void testMaxAge() throws Exception {
+    int maxAge = 10;
+    long expected = ((System.currentTimeMillis() / 1000) * 1000) + (maxAge * 
1000);
+    addHeader("Cache-Control", "public, max-age=" + maxAge);
+    HttpResponse response = new HttpResponse(200, new byte[0], headers);
+    long expiration = response.getCacheExpiration();
+    assertTrue("getExpiration is less than start time + maxAge", expiration >= 
expected);
+    assertTrue("getExpiration is too high.", expiration <= expected + 1000);
   }
 
   public void testNegativeCaching() {

Modified: incubator/shindig/trunk/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/shindig/trunk/pom.xml?rev=669108&r1=669107&r2=669108&view=diff
==============================================================================
--- incubator/shindig/trunk/pom.xml (original)
+++ incubator/shindig/trunk/pom.xml Wed Jun 18 01:19:58 2008
@@ -832,11 +832,16 @@
         <artifactId>joda-time</artifactId>
         <version>1.5.2</version>
       </dependency>
-                       <dependency>
+      <dependency>
         <groupId>rome</groupId>
         <artifactId>rome</artifactId>
         <version>0.9</version>
-                 </dependency>
+      </dependency>
+      <dependency>
+        <groupId>com.ibm.icu</groupId>
+        <artifactId>icu4j</artifactId>
+        <version>3.8</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 </project>


Reply via email to