This is 4+megs of jars. Is an addition of this size really necessary for this task?
davep On Wed, Jun 18, 2008 at 1:20 AM, <[EMAIL PROTECTED]> wrote: > Author: etnu > Date: Wed Jun 18 01:19:58 2008 > New Revision: 669108 > > URL: http://svn.apache.org/viewvc?rev=669108&view=rev > Log: > Applied SHINDIG-391, which adds more robust character encoding detection for > HttpResponse using ICU, from Patrick Fairbank. > > > Modified: > incubator/shindig/trunk/java/gadgets/pom.xml > > incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java > > incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java > incubator/shindig/trunk/pom.xml > > Modified: incubator/shindig/trunk/java/gadgets/pom.xml > URL: > http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/pom.xml?rev=669108&r1=669107&r2=669108&view=diff > ============================================================================== > --- incubator/shindig/trunk/java/gadgets/pom.xml (original) > +++ incubator/shindig/trunk/java/gadgets/pom.xml Wed Jun 18 01:19:58 2008 > @@ -135,7 +135,7 @@ > <artifactId>core</artifactId> > <scope>compile</scope> > </dependency> > - <dependency> > + <dependency> > <groupId>com.google.code.google-collections</groupId> > <artifactId>google-collect</artifactId> > </dependency> > @@ -153,9 +153,14 @@ > <artifactId>jetty</artifactId> > <scope>test</scope> > </dependency> > - <dependency> > + <dependency> > <groupId>rome</groupId> > <artifactId>rome</artifactId> > </dependency> > + <dependency> > + <groupId>com.ibm.icu</groupId> > + <artifactId>icu4j</artifactId> > + <scope>compile</scope> > + </dependency> > </dependencies> > </project> > > Modified: > incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java > URL: > http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java?rev=669108&r1=669107&r2=669108&view=diff > ============================================================================== > --- > incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java > (original) > +++ > incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java > Wed Jun 18 01:19:58 2008 > @@ -19,6 +19,9 @@ > > import org.apache.shindig.common.util.DateUtil; > > +import com.ibm.icu.text.CharsetDetector; > +import com.ibm.icu.text.CharsetMatch; > + > import java.io.ByteArrayInputStream; > import java.io.InputStream; > import java.nio.ByteBuffer; > @@ -28,8 +31,10 @@ > import java.util.Collections; > import java.util.Date; > import java.util.HashMap; > +import java.util.HashSet; > import java.util.List; > import java.util.Map; > +import java.util.Set; > import java.util.TreeMap; > import java.util.concurrent.ConcurrentHashMap; > > @@ -42,6 +47,9 @@ > public final static int SC_NOT_FOUND = 404; > public final static int SC_INTERNAL_SERVER_ERROR = 500; > public final static int SC_TIMEOUT = 504; > + private final static Set<String> BINARY_CONTENT_TYPES = new > HashSet<String>(Arrays.asList( > + "image/jpeg", "image/png", "image/gif", "image/jpg", > "application/x-shockwave-flash" > + )); > > private final int httpStatusCode; > private static final String DEFAULT_ENCODING = "UTF-8"; > @@ -142,6 +150,9 @@ > String contentType = getHeader("Content-Type"); > if (contentType != null) { > String[] parts = contentType.split(";"); > + if (BINARY_CONTENT_TYPES.contains(parts[0])) { > + return DEFAULT_ENCODING; > + } > if (parts.length == 2) { > int offset = parts[1].indexOf("charset="); > if (offset != -1) { > @@ -149,7 +160,12 @@ > } > } > } > - return DEFAULT_ENCODING; > + > + // If the header doesn't specify the charset, try to determine it by > examining the content. > + CharsetDetector detector = new CharsetDetector(); > + detector.setText(responseBytes); > + CharsetMatch match = detector.detect(); > + return match.getName().toUpperCase(); > } > > public int getHttpStatusCode() { > > Modified: > incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java > URL: > http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java?rev=669108&r1=669107&r2=669108&view=diff > ============================================================================== > --- > incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java > (original) > +++ > incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java > Wed Jun 18 01:19:58 2008 > @@ -25,12 +25,30 @@ > > import java.util.Arrays; > import java.util.HashMap; > -import java.util.Iterator; > import java.util.LinkedList; > import java.util.List; > import java.util.Map; > > public class HttpResponseTest extends TestCase { > + private final static byte[] UTF8_DATA = new byte[] { > + (byte)0xEF, (byte)0xBB, (byte)0xBF, 'h', 'e', 'l', 'l', 'o' > + }; > + private final static String UTF8_STRING = "hello"; > + > + // A large string is needed for accurate charset detection. > + private final static byte[] LATIN1_DATA = new byte[] { > + 'G', 'a', 'm', 'e', 's', ',', ' ', 'H', 'Q', ',', ' ', 'M', 'a', 'n', > 'g', (byte)0xE1, ',', ' ', > + 'A', 'n', 'i', 'm', 'e', ' ', 'e', ' ', 't', 'u', 'd', 'o', ' ', 'q', > 'u', 'e', ' ', 'u', 'm', > + ' ', 'b', 'o', 'm', ' ', 'n', 'e', 'r', 'd', ' ', 'a', 'm', 'a' > + }; > + private final static String LATIN1_STRING > + = "Games, HQ, Mang\u00E1, Anime e tudo que um bom nerd ama"; > + > + private final static byte[] BIG5_DATA = new byte[] { > + (byte)0xa7, (byte)0x41, (byte)0xa6, (byte)0x6e > + }; > + private final static String BIG5_STRING = "\u4F60\u597D"; > + > private Map<String, List<String>> headers; > > @Override > @@ -56,32 +74,60 @@ > > public void testEncodingDetectionUtf8WithBom() throws Exception { > // Input is UTF-8 with BOM. > - byte[] data = new byte[] { > - (byte)0xEF, (byte)0xBB, (byte)0xBF, 'h', 'e', 'l', 'l', 'o' > - }; > addHeader("Content-Type", "text/plain; charset=UTF-8"); > - HttpResponse response = new HttpResponse(200, data, headers); > - assertEquals("hello", response.getResponseAsString()); > + HttpResponse response = new HttpResponse(200, UTF8_DATA, headers); > + assertEquals(UTF8_STRING, response.getResponseAsString()); > } > > public void testEncodingDetectionLatin1() throws Exception { > // Input is a basic latin-1 string with 1 non-UTF8 compatible char. > - byte[] data = new byte[] { > - 'h', (byte)0xE9, 'l', 'l', 'o' > - }; > addHeader("Content-Type", "text/plain; charset=iso-8859-1"); > - HttpResponse response = new HttpResponse(200, data, headers); > - assertEquals("h\u00E9llo", response.getResponseAsString()); > + HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers); > + assertEquals(LATIN1_STRING, response.getResponseAsString()); > } > > public void testEncodingDetectionBig5() throws Exception { > - byte[] data = new byte[] { > - (byte)0xa7, (byte)0x41, (byte)0xa6, (byte)0x6e > - }; > addHeader("Content-Type", "text/plain; charset=BIG5"); > - HttpResponse response = new HttpResponse(200, data, headers); > - String resp = response.getResponseAsString(); > - assertEquals("\u4F60\u597D", response.getResponseAsString()); > + HttpResponse response = new HttpResponse(200, BIG5_DATA, headers); > + assertEquals(BIG5_STRING, response.getResponseAsString()); > + } > + > + public void testEncodingDetectionUtf8WithBomNoCharsetSpecified() throws > Exception { > + addHeader("Content-Type", "text/plain"); > + HttpResponse response = new HttpResponse(200, UTF8_DATA, headers); > + assertEquals("UTF-8", response.getEncoding().toUpperCase()); > + assertEquals(UTF8_STRING, response.getResponseAsString()); > + } > + > + public void testEncodingDetectionLatin1NoCharsetSpecified() throws > Exception { > + addHeader("Content-Type", "text/plain;"); > + HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers); > + assertEquals("ISO-8859-1", response.getEncoding().toUpperCase()); > + assertEquals(LATIN1_STRING, response.getResponseAsString()); > + } > + > + public void testEncodingDetectionUtf8WithBomNoContentHeader() throws > Exception { > + HttpResponse response = new HttpResponse(200, UTF8_DATA, headers); > + assertEquals("UTF-8", response.getEncoding().toUpperCase()); > + assertEquals(UTF8_STRING, response.getResponseAsString()); > + } > + > + public void testEncodingDetectionLatin1NoContentHeader() throws Exception { > + HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers); > + assertEquals("ISO-8859-1", response.getEncoding().toUpperCase()); > + assertEquals(LATIN1_STRING, response.getResponseAsString()); > + } > + > + public void testGetEncodingForImageContentType() throws Exception { > + addHeader("Content-Type", "image/png; charset=iso-8859-1"); > + HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers); > + assertEquals("UTF-8", response.getEncoding().toUpperCase()); > + } > + > + public void testGetEncodingForFlashContentType() throws Exception { > + addHeader("Content-Type", "application/x-shockwave-flash; > charset=iso-8859-1"); > + HttpResponse response = new HttpResponse(200, LATIN1_DATA, headers); > + assertEquals("UTF-8", response.getEncoding().toUpperCase()); > } > > public void testPreserveBinaryData() throws Exception { > @@ -98,12 +144,14 @@ > addHeader("Cache-Control", "no-cache"); > HttpResponse response = new HttpResponse(200, new byte[0], headers); > assertTrue(response.isStrictNoCache()); > + assertEquals(-1, response.getCacheExpiration()); > } > > public void testStrictPragmaNoCache() throws Exception { > addHeader("Pragma", "no-cache"); > HttpResponse response = new HttpResponse(200, new byte[0], headers); > assertTrue(response.isStrictNoCache()); > + assertEquals(-1, response.getCacheExpiration()); > } > > public void testStrictPragmaJunk() throws Exception { > @@ -118,7 +166,17 @@ > long time = ((System.currentTimeMillis() / 1000) * 1000) + 10000L; > addHeader("Expires", DateUtil.formatDate(time)); > HttpResponse response = new HttpResponse(200, new byte[0], headers); > - assertEquals(time, response.getExpiration()); > + assertEquals(time, response.getCacheExpiration()); > + } > + > + public void testMaxAge() throws Exception { > + int maxAge = 10; > + long expected = ((System.currentTimeMillis() / 1000) * 1000) + (maxAge * > 1000); > + addHeader("Cache-Control", "public, max-age=" + maxAge); > + HttpResponse response = new HttpResponse(200, new byte[0], headers); > + long expiration = response.getCacheExpiration(); > + assertTrue("getExpiration is less than start time + maxAge", expiration > >= expected); > + assertTrue("getExpiration is too high.", expiration <= expected + 1000); > } > > public void testNegativeCaching() { > > Modified: incubator/shindig/trunk/pom.xml > URL: > http://svn.apache.org/viewvc/incubator/shindig/trunk/pom.xml?rev=669108&r1=669107&r2=669108&view=diff > ============================================================================== > --- incubator/shindig/trunk/pom.xml (original) > +++ incubator/shindig/trunk/pom.xml Wed Jun 18 01:19:58 2008 > @@ -832,11 +832,16 @@ > <artifactId>joda-time</artifactId> > <version>1.5.2</version> > </dependency> > - <dependency> > + <dependency> > <groupId>rome</groupId> > <artifactId>rome</artifactId> > <version>0.9</version> > - </dependency> > + </dependency> > + <dependency> > + <groupId>com.ibm.icu</groupId> > + <artifactId>icu4j</artifactId> > + <version>3.8</version> > + </dependency> > </dependencies> > </dependencyManagement> > </project> > > >

