Reviewers: xtof, rjrjr, jlabanca, jat, pdr,

Message:
Rewrite SafeUriHostedModeUtils#isValid without regexp to workaround what
looks like a bug in the Sun/Oracle JVM (error not reproduced with
OpenJDK).

This is a follow-up to issue
http://gwt-code-reviews.appspot.com/1443813/ so I set the same
reviewers.

Description:
Rewrite SafeUriHostedModeUtils#isValid without regexp to workaround what
looks like a bug in the Sun/Oracle JVM (error not reproduced with
OpenJDK).

Please review this at http://gwt-code-reviews.appspot.com/1449814/

Affected files:
  M user/src/com/google/gwt/safehtml/shared/SafeUriHostedModeUtils.java
  M user/test/com/google/gwt/safehtml/shared/GwtUriUtilsTest.java


Index: user/src/com/google/gwt/safehtml/shared/SafeUriHostedModeUtils.java
diff --git a/user/src/com/google/gwt/safehtml/shared/SafeUriHostedModeUtils.java b/user/src/com/google/gwt/safehtml/shared/SafeUriHostedModeUtils.java index 7b3dbe2602fd6cca029f889a95de8ada75ca8188..242fb02af3e0eb6814152f48ffae300c3b9de31f 100644
--- a/user/src/com/google/gwt/safehtml/shared/SafeUriHostedModeUtils.java
+++ b/user/src/com/google/gwt/safehtml/shared/SafeUriHostedModeUtils.java
@@ -36,20 +36,16 @@ import java.net.URISyntaxException;
 public class SafeUriHostedModeUtils {

   /**
- * All valid Web Addresses, i.e. the href-ucschar production from RFC 3987bis. + * All valid Web Addresses discrete characters, i.e. the href-ucschar production from RFC
+   * 3987bis, with the exception of ranges.
    *
* @see <a href="http://tools.ietf.org/html/rfc3986#section-2";>RFC 3986</a> * @see <a href="http://tools.ietf.org/html/draft-ietf-iri-3987bis-05#section-7.2";>RFC 3987bis Web Addresses</a>
    */
-  static final String HREF_UCSCHAR = "("
-    + "["
-    + ":/?#\\[\\]@!$&'()*+,;=" // reserved
-    + "a-zA-Z0-9\\-._~" // iunreserved
- + " <>\"{}|\\\\^`\u0000-\u001F\u001F-\uD7FF\uE000-\uFFFD" // href-ucschar
-    + "]"
-    + "|"
-    + "[\uD800-\uDBFF][\uDC00-\uDFFF]" // surrogate pairs
-    + ")*";
+  static final String HREF_DISCRETE_UCSCHAR =
+      ":/?#[]@!$&'()*+,;=" // reserved
+      + "-._~"             // iunreserved
+      + " <>\"{}|\\^`";    // href-ucschar

   /**
* Name of system property that if set, enables checks in server-side code
@@ -104,11 +100,38 @@ public class SafeUriHostedModeUtils {
   }

   private static boolean isValidUri(String uri) {
- // TODO(xtof): The regex appears to cause stack overflows in some cases.
-    // Investigate and re-enable.
-    // if (!uri.matches(HREF_UCSCHAR)) {
-    //   return false;
-    // }
+    int len = uri.length();
+    int i = 0;
+    while (i < len) {
+      int codePoint = uri.codePointAt(i);
+      i += Character.charCount(codePoint);
+      if (!Character.isValidCodePoint(codePoint)) {
+        return false;
+      }
+      if (Character.isSupplementaryCodePoint(codePoint)) {
+        continue;
+      }
+      // from now on, we know codePoint is in the Basic Multilingual Plane
+      // (i.e. it can be cast to 'char' without loss of information)
+      // Let's take advantage of this to detect unpaired surrogates
+ if (Character.isHighSurrogate((char) codePoint) || Character.isLowSurrogate((char) codePoint)) {
+        return false;
+      }
+      if (HREF_DISCRETE_UCSCHAR.indexOf(codePoint) >= 0) {
+        continue;
+      }
+      // iunreserved ranges
+ if (('a' <= codePoint && codePoint <= 'z') || ('A' <= codePoint && codePoint <= 'Z') || ('0' <= codePoint && codePoint<= '9')) {
+        continue;
+      }
+      // href-ucschar ranges
+ if ((0 <= codePoint && codePoint <= 0x1F) || (0x7F <= codePoint && codePoint <= 0xD7FF) || (0xE000 <= codePoint && codePoint <= 0xFFFD)) {
+        continue;
+      }
+      // unknown char (neither whitelisted not explicitly blacklisted)
+      return false;
+    }
+
     /*
      * pre-process to turn href-ucschars into ucschars, and encode to URI.
      *
Index: user/test/com/google/gwt/safehtml/shared/GwtUriUtilsTest.java
diff --git a/user/test/com/google/gwt/safehtml/shared/GwtUriUtilsTest.java b/user/test/com/google/gwt/safehtml/shared/GwtUriUtilsTest.java index f76c66f2df4a380d0122cdefe970e363d96d3504..5025ec0cceb2732f458126c9299868a22c511613 100644
--- a/user/test/com/google/gwt/safehtml/shared/GwtUriUtilsTest.java
+++ b/user/test/com/google/gwt/safehtml/shared/GwtUriUtilsTest.java
@@ -26,9 +26,25 @@ public class GwtUriUtilsTest extends GWTTestCase {
private static final String JAVASCRIPT_URL = "javascript:alert('BOOM!');";
   private static final String MAILTO_URL = "mailto:f...@example.com";;
   private static final String CONSTANT_URL =
-    
"http://gwt.google.com/samples/Showcase/Showcase.html?locale=fr#!CwCheckBox";;
+      
"http://gwt.google.com/samples/Showcase/Showcase.html?locale=fr#!CwCheckBox";;
   private static final String EMPTY_GIF_DATA_URL =
-    
"";
+      
"";
+  private static final String LONG_DATA_URL =
+      
""
+ + "BBZG9iZSBJbWFnZVJlYWR5ccllPAAAAwBQTFRFZmZm////AgICAwMDBAQEBQUFBgYGBwcHCAgICQkJCgoKCwsL" + + "DAwMDQ0NDg4ODw8PEBAQEREREhISExMTFBQUFRUVFhYWFxcXGBgYGRkZGhoaGxsbHBwcHR0dHh4eHx8fICAgIS" + + "EhIiIiIyMjJCQkJSUlJiYmJycnKCgoKSkpKioqKysrLCwsLS0tLi4uLy8vMDAwMTExMjIyMzMzNDQ0NTU1NjY2" + + "Nzc3ODg4OTk5Ojo6Ozs7PDw8PT09Pj4+Pz8/QEBAQUFBQkJCQ0NDRERERUVFRkZGR0dHSEhISUlJSkpKS0tLTE" + + "xMTU1NTk5OT09PUFBQUVFRUlJSU1NTVFRUVVVVVlZWV1dXWFhYWVlZWlpaW1tbXFxcXV1dXl5eX19fYGBgYWFh" + + "YmJiY2NjZGRkZWVlZmZmZ2dnaGhoaWlpampqa2trbGxsbW1tbm5ub29vcHBwcXFxcnJyc3NzdHR0dXV1dnZ2d3" + + "d3eHh4eXl5enp6e3t7fHx8fX19fn5+f39/gICAgYGBgoKCg4ODhISEhYWFhoaGh4eHiIiIiYmJioqKi4uLjIyM" + + "jY2Njo6Oj4+PkJCQkZGRkpKSk5OTlJSUlZWVlpaWl5eXmJiYmZmZmpqam5ubnJycnZ2dnp6en5+foKCgoaGhoq" + + "Kio6OjpKSkpaWlpqamp6enqKioqampqqqqq6urrKysra2trq6ur6+vsLCwsbGxsrKys7OztLS0tbW1tra2t7e3" + + "uLi4ubm5urq6u7u7vLy8vb29vr6+v7+/wMDAwcHBwsLCw8PDxMTExcXFxsbGx8fHyMjIycnJysrKy8vLzMzMzc" + + "3Nzs7Oz8/P0NDQ0dHR0tLS09PT1NTU1dXV1tbW19fX2NjY2dnZ2tra29vb3Nzc3d3d3t7e39/f4ODg4eHh4uLi" + + "4+Pj5OTk5eXl5ubm5+fn6Ojo6enp6urq6+vr7Ozs7e3t7u7u7+/v8PDw8fHx8vLy8/Pz9PT09fX19vb29/f3+P" + + "j4+fn5+vr6+/v7/Pz8/f39/v7+////AADF2QAAAAJ0Uk5T/wDltzBKAAAAH0lEQVR42mJghAAGGJ0GAQyMYAok"
+          + "DqLA8mlI6gACDAC8pAaCn/ezogAAAABJRU5ErkJggg==";

   public void testEncode_noEscape() {
     StringBuilder sb = new StringBuilder(UriUtils.DONT_NEED_ENCODING);
@@ -80,6 +96,7 @@ public class GwtUriUtilsTest extends GWTTestCase {
assertEquals(CONSTANT_URL, UriUtils.fromTrustedString(CONSTANT_URL).asString()); assertEquals(MAILTO_URL, UriUtils.fromTrustedString(MAILTO_URL).asString()); assertEquals(EMPTY_GIF_DATA_URL, UriUtils.fromTrustedString(EMPTY_GIF_DATA_URL).asString()); + assertEquals(LONG_DATA_URL, UriUtils.fromTrustedString(LONG_DATA_URL).asString()); assertEquals(JAVASCRIPT_URL, UriUtils.fromTrustedString(JAVASCRIPT_URL).asString());
     if (GWT.isClient()) {
       assertEquals(GWT.getModuleBaseURL(),


--
http://groups.google.com/group/Google-Web-Toolkit-Contributors

Reply via email to