Reviewers: xtof, rjrjr, jlabanca, jat, pdr,
Message:
Rewrite SafeUriHostedModeUtils#isValid without regexp to workaround what
looks like a bug in the Sun/Oracle JVM (error not reproduced with
OpenJDK).
This is a follow-up to issue
http://gwt-code-reviews.appspot.com/1443813/ so I set the same
reviewers.
Description:
Rewrite SafeUriHostedModeUtils#isValid without regexp to workaround what
looks like a bug in the Sun/Oracle JVM (error not reproduced with
OpenJDK).
Please review this at http://gwt-code-reviews.appspot.com/1449814/
Affected files:
M user/src/com/google/gwt/safehtml/shared/SafeUriHostedModeUtils.java
M user/test/com/google/gwt/safehtml/shared/GwtUriUtilsTest.java
Index: user/src/com/google/gwt/safehtml/shared/SafeUriHostedModeUtils.java
diff --git
a/user/src/com/google/gwt/safehtml/shared/SafeUriHostedModeUtils.java
b/user/src/com/google/gwt/safehtml/shared/SafeUriHostedModeUtils.java
index
7b3dbe2602fd6cca029f889a95de8ada75ca8188..242fb02af3e0eb6814152f48ffae300c3b9de31f
100644
--- a/user/src/com/google/gwt/safehtml/shared/SafeUriHostedModeUtils.java
+++ b/user/src/com/google/gwt/safehtml/shared/SafeUriHostedModeUtils.java
@@ -36,20 +36,16 @@ import java.net.URISyntaxException;
public class SafeUriHostedModeUtils {
/**
- * All valid Web Addresses, i.e. the href-ucschar production from RFC
3987bis.
+ * All valid Web Addresses discrete characters, i.e. the href-ucschar
production from RFC
+ * 3987bis, with the exception of ranges.
*
* @see <a href="http://tools.ietf.org/html/rfc3986#section-2">RFC
3986</a>
* @see <a
href="http://tools.ietf.org/html/draft-ietf-iri-3987bis-05#section-7.2">RFC
3987bis Web Addresses</a>
*/
- static final String HREF_UCSCHAR = "("
- + "["
- + ":/?#\\[\\]@!$&'()*+,;=" // reserved
- + "a-zA-Z0-9\\-._~" // iunreserved
- + " <>\"{}|\\\\^`\u0000-\u001F\u001F-\uD7FF\uE000-\uFFFD" //
href-ucschar
- + "]"
- + "|"
- + "[\uD800-\uDBFF][\uDC00-\uDFFF]" // surrogate pairs
- + ")*";
+ static final String HREF_DISCRETE_UCSCHAR =
+ ":/?#[]@!$&'()*+,;=" // reserved
+ + "-._~" // iunreserved
+ + " <>\"{}|\\^`"; // href-ucschar
/**
* Name of system property that if set, enables checks in server-side
code
@@ -104,11 +100,38 @@ public class SafeUriHostedModeUtils {
}
private static boolean isValidUri(String uri) {
- // TODO(xtof): The regex appears to cause stack overflows in some
cases.
- // Investigate and re-enable.
- // if (!uri.matches(HREF_UCSCHAR)) {
- // return false;
- // }
+ int len = uri.length();
+ int i = 0;
+ while (i < len) {
+ int codePoint = uri.codePointAt(i);
+ i += Character.charCount(codePoint);
+ if (!Character.isValidCodePoint(codePoint)) {
+ return false;
+ }
+ if (Character.isSupplementaryCodePoint(codePoint)) {
+ continue;
+ }
+ // from now on, we know codePoint is in the Basic Multilingual Plane
+ // (i.e. it can be cast to 'char' without loss of information)
+ // Let's take advantage of this to detect unpaired surrogates
+ if (Character.isHighSurrogate((char) codePoint) ||
Character.isLowSurrogate((char) codePoint)) {
+ return false;
+ }
+ if (HREF_DISCRETE_UCSCHAR.indexOf(codePoint) >= 0) {
+ continue;
+ }
+ // iunreserved ranges
+ if (('a' <= codePoint && codePoint <= 'z') || ('A' <= codePoint &&
codePoint <= 'Z') || ('0' <= codePoint && codePoint<= '9')) {
+ continue;
+ }
+ // href-ucschar ranges
+ if ((0 <= codePoint && codePoint <= 0x1F) || (0x7F <= codePoint &&
codePoint <= 0xD7FF) || (0xE000 <= codePoint && codePoint <= 0xFFFD)) {
+ continue;
+ }
+ // unknown char (neither whitelisted not explicitly blacklisted)
+ return false;
+ }
+
/*
* pre-process to turn href-ucschars into ucschars, and encode to URI.
*
Index: user/test/com/google/gwt/safehtml/shared/GwtUriUtilsTest.java
diff --git a/user/test/com/google/gwt/safehtml/shared/GwtUriUtilsTest.java
b/user/test/com/google/gwt/safehtml/shared/GwtUriUtilsTest.java
index
f76c66f2df4a380d0122cdefe970e363d96d3504..5025ec0cceb2732f458126c9299868a22c511613
100644
--- a/user/test/com/google/gwt/safehtml/shared/GwtUriUtilsTest.java
+++ b/user/test/com/google/gwt/safehtml/shared/GwtUriUtilsTest.java
@@ -26,9 +26,25 @@ public class GwtUriUtilsTest extends GWTTestCase {
private static final String JAVASCRIPT_URL
= "javascript:alert('BOOM!');";
private static final String MAILTO_URL = "mailto:f...@example.com";
private static final String CONSTANT_URL =
-
"http://gwt.google.com/samples/Showcase/Showcase.html?locale=fr#!CwCheckBox";
+
"http://gwt.google.com/samples/Showcase/Showcase.html?locale=fr#!CwCheckBox";
private static final String EMPTY_GIF_DATA_URL =
-
"data:image/gif;base64,R0lGODlhAQABAPABAP///wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==";
+
"data:image/gif;base64,R0lGODlhAQABAPABAP///wAAACH5BAEKAAAALAAAAAABAAEAAAICRAEAOw==";
+ private static final String LONG_DATA_URL =
+
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAcAAAAHCAMAAADzjKfhAAAAGXRFWHRTb2Z0d2FyZQ"
+
+ "BBZG9iZSBJbWFnZVJlYWR5ccllPAAAAwBQTFRFZmZm////AgICAwMDBAQEBQUFBgYGBwcHCAgICQkJCgoKCwsL"
+
+ "DAwMDQ0NDg4ODw8PEBAQEREREhISExMTFBQUFRUVFhYWFxcXGBgYGRkZGhoaGxsbHBwcHR0dHh4eHx8fICAgIS"
+
+ "EhIiIiIyMjJCQkJSUlJiYmJycnKCgoKSkpKioqKysrLCwsLS0tLi4uLy8vMDAwMTExMjIyMzMzNDQ0NTU1NjY2"
+
+ "Nzc3ODg4OTk5Ojo6Ozs7PDw8PT09Pj4+Pz8/QEBAQUFBQkJCQ0NDRERERUVFRkZGR0dHSEhISUlJSkpKS0tLTE"
+
+ "xMTU1NTk5OT09PUFBQUVFRUlJSU1NTVFRUVVVVVlZWV1dXWFhYWVlZWlpaW1tbXFxcXV1dXl5eX19fYGBgYWFh"
+
+ "YmJiY2NjZGRkZWVlZmZmZ2dnaGhoaWlpampqa2trbGxsbW1tbm5ub29vcHBwcXFxcnJyc3NzdHR0dXV1dnZ2d3"
+
+ "d3eHh4eXl5enp6e3t7fHx8fX19fn5+f39/gICAgYGBgoKCg4ODhISEhYWFhoaGh4eHiIiIiYmJioqKi4uLjIyM"
+
+ "jY2Njo6Oj4+PkJCQkZGRkpKSk5OTlJSUlZWVlpaWl5eXmJiYmZmZmpqam5ubnJycnZ2dnp6en5+foKCgoaGhoq"
+
+ "Kio6OjpKSkpaWlpqamp6enqKioqampqqqqq6urrKysra2trq6ur6+vsLCwsbGxsrKys7OztLS0tbW1tra2t7e3"
+
+ "uLi4ubm5urq6u7u7vLy8vb29vr6+v7+/wMDAwcHBwsLCw8PDxMTExcXFxsbGx8fHyMjIycnJysrKy8vLzMzMzc"
+
+ "3Nzs7Oz8/P0NDQ0dHR0tLS09PT1NTU1dXV1tbW19fX2NjY2dnZ2tra29vb3Nzc3d3d3t7e39/f4ODg4eHh4uLi"
+
+ "4+Pj5OTk5eXl5ubm5+fn6Ojo6enp6urq6+vr7Ozs7e3t7u7u7+/v8PDw8fHx8vLy8/Pz9PT09fX19vb29/f3+P"
+
+ "j4+fn5+vr6+/v7/Pz8/f39/v7+////AADF2QAAAAJ0Uk5T/wDltzBKAAAAH0lEQVR42mJghAAGGJ0GAQyMYAok"
+ + "DqLA8mlI6gACDAC8pAaCn/ezogAAAABJRU5ErkJggg==";
public void testEncode_noEscape() {
StringBuilder sb = new StringBuilder(UriUtils.DONT_NEED_ENCODING);
@@ -80,6 +96,7 @@ public class GwtUriUtilsTest extends GWTTestCase {
assertEquals(CONSTANT_URL,
UriUtils.fromTrustedString(CONSTANT_URL).asString());
assertEquals(MAILTO_URL,
UriUtils.fromTrustedString(MAILTO_URL).asString());
assertEquals(EMPTY_GIF_DATA_URL,
UriUtils.fromTrustedString(EMPTY_GIF_DATA_URL).asString());
+ assertEquals(LONG_DATA_URL,
UriUtils.fromTrustedString(LONG_DATA_URL).asString());
assertEquals(JAVASCRIPT_URL,
UriUtils.fromTrustedString(JAVASCRIPT_URL).asString());
if (GWT.isClient()) {
assertEquals(GWT.getModuleBaseURL(),
--
http://groups.google.com/group/Google-Web-Toolkit-Contributors