Author: etnu
Date: Wed Jun 18 01:19:58 2008
New Revision: 669108
URL: http://svn.apache.org/viewvc?rev=669108&view=rev
Log:
Applied SHINDIG-391, which adds more robust character encoding detection for
HttpResponse using ICU, from Patrick Fairbank.
Modified:
incubator/shindig/trunk/java/gadgets/pom.xml
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
incubator/shindig/trunk/pom.xml
Modified: incubator/shindig/trunk/java/gadgets/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/pom.xml?rev=669108&r1=669107&r2=669108&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/pom.xml (original)
+++ incubator/shindig/trunk/java/gadgets/pom.xml Wed Jun 18 01:19:58 2008
@@ -135,7 +135,7 @@
<artifactId>core</artifactId>
<scope>compile</scope>
</dependency>
- <dependency>
+ <dependency>
<groupId>com.google.code.google-collections</groupId>
<artifactId>google-collect</artifactId>
</dependency>
@@ -153,9 +153,14 @@
<artifactId>jetty</artifactId>
<scope>test</scope>
</dependency>
- <dependency>
+ <dependency>
<groupId>rome</groupId>
<artifactId>rome</artifactId>
</dependency>
+ <dependency>
+ <groupId>com.ibm.icu</groupId>
+ <artifactId>icu4j</artifactId>
+ <scope>compile</scope>
+ </dependency>
</dependencies>
</project>
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java?rev=669108&r1=669107&r2=669108&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
Wed Jun 18 01:19:58 2008
@@ -19,6 +19,9 @@
import org.apache.shindig.common.util.DateUtil;
+import com.ibm.icu.text.CharsetDetector;
+import com.ibm.icu.text.CharsetMatch;
+
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.ByteBuffer;
@@ -28,8 +31,10 @@
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
@@ -42,6 +47,9 @@
public final static int SC_NOT_FOUND = 404;
public final static int SC_INTERNAL_SERVER_ERROR = 500;
public final static int SC_TIMEOUT = 504;
+ private final static Set<String> BINARY_CONTENT_TYPES = new
HashSet<String>(Arrays.asList(
+ "image/jpeg", "image/png", "image/gif", "image/jpg",
"application/x-shockwave-flash"
+ ));
private final int httpStatusCode;
private static final String DEFAULT_ENCODING = "UTF-8";
@@ -142,6 +150,9 @@
String contentType = getHeader("Content-Type");
if (contentType != null) {
String[] parts = contentType.split(";");
+ if (BINARY_CONTENT_TYPES.contains(parts[0])) {
+ return DEFAULT_ENCODING;
+ }
if (parts.length == 2) {
int offset = parts[1].indexOf("charset=");
if (offset != -1) {
@@ -149,7 +160,12 @@
}
}
}
- return DEFAULT_ENCODING;
+
+ // If the header doesn't specify the charset, try to determine it by
examining the content.
+ CharsetDetector detector = new CharsetDetector();
+ detector.setText(responseBytes);
+ CharsetMatch match = detector.detect();
+ return match.getName().toUpperCase();
}
public int getHttpStatusCode() {
Modified:
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java?rev=669108&r1=669107&r2=669108&view=diff
==============================================================================
---
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
(original)
+++
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
Wed Jun 18 01:19:58 2008
@@ -25,12 +25,30 @@
import java.util.Arrays;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
public class HttpResponseTest extends TestCase {
+ private final static byte[] UTF8_DATA = new byte[] {
+ (byte)0xEF, (byte)0xBB, (byte)0xBF, 'h', 'e', 'l', 'l', 'o'
+ };
+ private final static String UTF8_STRING = "hello";
+
+ // A large string is needed for accurate charset detection.
+ private final static byte[] LATIN1_DATA = new byte[] {
+ 'G', 'a', 'm', 'e', 's', ',', ' ', 'H', 'Q', ',', ' ', 'M', 'a', 'n', 'g',
(byte)0xE1, ',', ' ',
+ 'A', 'n', 'i', 'm', 'e', ' ', 'e', ' ', 't', 'u', 'd', 'o', ' ', 'q', 'u',
'e', ' ', 'u', 'm',
+ ' ', 'b', 'o', 'm', ' ', 'n', 'e', 'r', 'd', ' ', 'a', 'm', 'a'
+ };
+ private final static String LATIN1_STRING
+ = "Games, HQ, Mang\u00E1, Anime e tudo que um bom nerd ama";
+
+ private final static byte[] BIG5_DATA = new byte[] {
+ (byte)0xa7, (byte)0x41, (byte)0xa6, (byte)0x6e
+ };
+ private final static String BIG5_STRING = "\u4F60\u597D";
+
private Map<String, List<String>> headers;
@Override
@@ -56,32 +74,60 @@
public void testEncodingDetectionUtf8WithBom() throws Exception {
// Input is UTF-8 with BOM.
- byte[] data = new byte[] {
- (byte)0xEF, (byte)0xBB, (byte)0xBF, 'h', 'e', 'l', 'l', 'o'
- };
addHeader("Content-Type", "text/plain; charset=UTF-8");
- HttpResponse response = new HttpResponse(200, data, headers);
- assertEquals("hello", response.getResponseAsString());
+ HttpResponse response = new HttpResponse(200, UTF8_DATA, headers);
+ assertEquals(UTF8_STRING, response.getResponseAsString());
}
public void testEncodingDetectionLatin1() throws Exception {
// Input is a basic latin-1 string with 1 non-UTF8 compatible char.
- byte[] data = new byte[] {
- 'h', (byte)0xE9, 'l', 'l', 'o'
- };
addHeader("Content-Type", "text/plain; charset=iso-8859-1");
- HttpResponse response = new HttpResponse(200, data, headers);
- assertEquals("h\u00E9llo", response.getResponseAsString());
+ HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
+ assertEquals(LATIN1_STRING, response.getResponseAsString());
}
public void testEncodingDetectionBig5() throws Exception {
- byte[] data = new byte[] {
- (byte)0xa7, (byte)0x41, (byte)0xa6, (byte)0x6e
- };
addHeader("Content-Type", "text/plain; charset=BIG5");
- HttpResponse response = new HttpResponse(200, data, headers);
- String resp = response.getResponseAsString();
- assertEquals("\u4F60\u597D", response.getResponseAsString());
+ HttpResponse response = new HttpResponse(200, BIG5_DATA, headers);
+ assertEquals(BIG5_STRING, response.getResponseAsString());
+ }
+
+ public void testEncodingDetectionUtf8WithBomNoCharsetSpecified() throws
Exception {
+ addHeader("Content-Type", "text/plain");
+ HttpResponse response = new HttpResponse(200, UTF8_DATA, headers);
+ assertEquals("UTF-8", response.getEncoding().toUpperCase());
+ assertEquals(UTF8_STRING, response.getResponseAsString());
+ }
+
+ public void testEncodingDetectionLatin1NoCharsetSpecified() throws Exception
{
+ addHeader("Content-Type", "text/plain;");
+ HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
+ assertEquals("ISO-8859-1", response.getEncoding().toUpperCase());
+ assertEquals(LATIN1_STRING, response.getResponseAsString());
+ }
+
+ public void testEncodingDetectionUtf8WithBomNoContentHeader() throws
Exception {
+ HttpResponse response = new HttpResponse(200, UTF8_DATA, headers);
+ assertEquals("UTF-8", response.getEncoding().toUpperCase());
+ assertEquals(UTF8_STRING, response.getResponseAsString());
+ }
+
+ public void testEncodingDetectionLatin1NoContentHeader() throws Exception {
+ HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
+ assertEquals("ISO-8859-1", response.getEncoding().toUpperCase());
+ assertEquals(LATIN1_STRING, response.getResponseAsString());
+ }
+
+ public void testGetEncodingForImageContentType() throws Exception {
+ addHeader("Content-Type", "image/png; charset=iso-8859-1");
+ HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
+ assertEquals("UTF-8", response.getEncoding().toUpperCase());
+ }
+
+ public void testGetEncodingForFlashContentType() throws Exception {
+ addHeader("Content-Type", "application/x-shockwave-flash;
charset=iso-8859-1");
+ HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers);
+ assertEquals("UTF-8", response.getEncoding().toUpperCase());
}
public void testPreserveBinaryData() throws Exception {
@@ -98,12 +144,14 @@
addHeader("Cache-Control", "no-cache");
HttpResponse response = new HttpResponse(200, new byte[0], headers);
assertTrue(response.isStrictNoCache());
+ assertEquals(-1, response.getCacheExpiration());
}
public void testStrictPragmaNoCache() throws Exception {
addHeader("Pragma", "no-cache");
HttpResponse response = new HttpResponse(200, new byte[0], headers);
assertTrue(response.isStrictNoCache());
+ assertEquals(-1, response.getCacheExpiration());
}
public void testStrictPragmaJunk() throws Exception {
@@ -118,7 +166,17 @@
long time = ((System.currentTimeMillis() / 1000) * 1000) + 10000L;
addHeader("Expires", DateUtil.formatDate(time));
HttpResponse response = new HttpResponse(200, new byte[0], headers);
- assertEquals(time, response.getExpiration());
+ assertEquals(time, response.getCacheExpiration());
+ }
+
+ public void testMaxAge() throws Exception {
+ int maxAge = 10;
+ long expected = ((System.currentTimeMillis() / 1000) * 1000) + (maxAge *
1000);
+ addHeader("Cache-Control", "public, max-age=" + maxAge);
+ HttpResponse response = new HttpResponse(200, new byte[0], headers);
+ long expiration = response.getCacheExpiration();
+ assertTrue("getExpiration is less than start time + maxAge", expiration >=
expected);
+ assertTrue("getExpiration is too high.", expiration <= expected + 1000);
}
public void testNegativeCaching() {
Modified: incubator/shindig/trunk/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/pom.xml?rev=669108&r1=669107&r2=669108&view=diff
==============================================================================
--- incubator/shindig/trunk/pom.xml (original)
+++ incubator/shindig/trunk/pom.xml Wed Jun 18 01:19:58 2008
@@ -832,11 +832,16 @@
<artifactId>joda-time</artifactId>
<version>1.5.2</version>
</dependency>
- <dependency>
+ <dependency>
<groupId>rome</groupId>
<artifactId>rome</artifactId>
<version>0.9</version>
- </dependency>
+ </dependency>
+ <dependency>
+ <groupId>com.ibm.icu</groupId>
+ <artifactId>icu4j</artifactId>
+ <version>3.8</version>
+ </dependency>
</dependencies>
</dependencyManagement>
</project>