Title: [206942] trunk
Revision
206942
Author
achristen...@apple.com
Date
2016-10-07 16:06:32 -0700 (Fri, 07 Oct 2016)

Log Message

Non-special URL fragments should percent-encode non-ASCII characters
https://bugs.webkit.org/show_bug.cgi?id=163153

Reviewed by Tim Horton.

Source/WebCore:

This is needed to keep compatibility with data URLs with non-ASCII characters after a '#'
which works in Chrome, Firefox, and Safari, while maintaining compatibility with Chrome, IE, and Edge
which keep non-ASCII characters in the fragments of special URLs.
This was proposed to the spec in https://github.com/whatwg/url/issues/150

Covered by new API tests.

* platform/URLParser.cpp:
(WebCore::URLParser::syntaxViolation):
Removed assertion because we now have fragments that need percent encoding but are all ASCII.
(WebCore::URLParser::fragmentSyntaxViolation):
(WebCore::URLParser::parse):

Tools:

* TestWebKitAPI/Tests/WebCore/URLParser.cpp:
(TestWebKitAPI::TEST_F):

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (206941 => 206942)


--- trunk/Source/WebCore/ChangeLog	2016-10-07 22:59:22 UTC (rev 206941)
+++ trunk/Source/WebCore/ChangeLog	2016-10-07 23:06:32 UTC (rev 206942)
@@ -1,3 +1,23 @@
+2016-10-07  Alex Christensen  <achristen...@webkit.org>
+
+        Non-special URL fragments should percent-encode non-ASCII characters
+        https://bugs.webkit.org/show_bug.cgi?id=163153
+
+        Reviewed by Tim Horton.
+
+        This is needed to keep compatibility with data URLs with non-ASCII characters after a '#'
+        which works in Chrome, Firefox, and Safari, while maintaining compatibility with Chrome, IE, and Edge
+        which keep non-ASCII characters in the fragments of special URLs.
+        This was proposed to the spec in https://github.com/whatwg/url/issues/150
+
+        Covered by new API tests.
+
+        * platform/URLParser.cpp:
+        (WebCore::URLParser::syntaxViolation):
+        Removed assertion because we now have fragments that need percent encoding but are all ASCII.
+        (WebCore::URLParser::fragmentSyntaxViolation):
+        (WebCore::URLParser::parse):
+
 2016-10-07  Brent Fulgham  <bfulg...@apple.com>
 
         EventHandler functions that need to guarantee event handler lifetime need to use Ref<Frame>

Modified: trunk/Source/WebCore/platform/URLParser.cpp (206941 => 206942)


--- trunk/Source/WebCore/platform/URLParser.cpp	2016-10-07 22:59:22 UTC (rev 206941)
+++ trunk/Source/WebCore/platform/URLParser.cpp	2016-10-07 23:06:32 UTC (rev 206942)
@@ -1019,7 +1019,6 @@
     
     ASSERT(m_asciiBuffer.isEmpty());
     ASSERT(m_unicodeFragmentBuffer.isEmpty());
-    ASSERT_WITH_MESSAGE(!m_url.m_queryEnd, "syntaxViolation should not be used in the fragment, which might contain non-ASCII code points when serialized");
     size_t codeUnitsToCopy = iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
     RELEASE_ASSERT(codeUnitsToCopy <= m_inputString.length());
     m_asciiBuffer.reserveCapacity(m_inputString.length());
@@ -1032,10 +1031,10 @@
 template<typename CharacterType>
 void URLParser::fragmentSyntaxViolation(const CodePointIterator<CharacterType>& iterator)
 {
+    ASSERT(m_didSeeUnicodeFragmentCodePoint);
     if (m_didSeeSyntaxViolation)
         return;
     m_didSeeSyntaxViolation = true;
-    m_didSeeUnicodeFragmentCodePoint = true;
 
     ASSERT(m_asciiBuffer.isEmpty());
     ASSERT(m_unicodeFragmentBuffer.isEmpty());
@@ -1814,22 +1813,32 @@
         case State::Fragment:
             do {
                 URL_PARSER_LOG("State Fragment");
-                if (!m_didSeeUnicodeFragmentCodePoint && isASCII(*c))
-                    appendToASCIIBuffer(*c);
-                else {
-                    m_didSeeUnicodeFragmentCodePoint = true;
-                    if (UNLIKELY(m_didSeeSyntaxViolation))
-                        appendCodePoint(m_unicodeFragmentBuffer, *c);
-                    else {
-                        ASSERT(m_asciiBuffer.isEmpty());
-                        ASSERT(m_unicodeFragmentBuffer.isEmpty());
-                    }
+                if (!c.atEnd() && isTabOrNewline(*c)) {
+                    if (m_didSeeUnicodeFragmentCodePoint)
+                        fragmentSyntaxViolation(c);
+                    else
+                        syntaxViolation(c);
+                    ++c;
+                    continue;
                 }
+                if (!m_didSeeUnicodeFragmentCodePoint && isASCII(*c)) {
+                    if (m_urlIsSpecial)
+                        appendToASCIIBuffer(*c);
+                    else
+                        utf8PercentEncode<isInSimpleEncodeSet>(c);
+                } else {
+                    if (m_urlIsSpecial) {
+                        m_didSeeUnicodeFragmentCodePoint = true;
+                        if (UNLIKELY(m_didSeeSyntaxViolation))
+                            appendCodePoint(m_unicodeFragmentBuffer, *c);
+                        else {
+                            ASSERT(m_asciiBuffer.isEmpty());
+                            ASSERT(m_unicodeFragmentBuffer.isEmpty());
+                        }
+                    } else
+                        utf8PercentEncode<isInSimpleEncodeSet>(c);
+                }
                 ++c;
-                while (UNLIKELY(!c.atEnd() && isTabOrNewline(*c))) {
-                    fragmentSyntaxViolation(c);
-                    ++c;
-                }
             } while (!c.atEnd());
             break;
         }

Modified: trunk/Tools/ChangeLog (206941 => 206942)


--- trunk/Tools/ChangeLog	2016-10-07 22:59:22 UTC (rev 206941)
+++ trunk/Tools/ChangeLog	2016-10-07 23:06:32 UTC (rev 206942)
@@ -1,3 +1,13 @@
+2016-10-07  Alex Christensen  <achristen...@webkit.org>
+
+        Non-special URL fragments should percent-encode non-ASCII characters
+        https://bugs.webkit.org/show_bug.cgi?id=163153
+
+        Reviewed by Tim Horton.
+
+        * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
+        (TestWebKitAPI::TEST_F):
+
 2016-10-07  Jonathan Bedard  <jbed...@apple.com>
 
         Build fix for “Move functionality common to Darwin ports into a base class”

Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp (206941 => 206942)


--- trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	2016-10-07 22:59:22 UTC (rev 206941)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	2016-10-07 23:06:32 UTC (rev 206942)
@@ -329,6 +329,8 @@
     checkURL("foo:#", {"foo", "", "", "", 0, "", "", "", "foo:#"});
     checkURL("A://", {"a", "", "", "", 0, "//", "", "", "a://"});
     checkURL("aA://", {"aa", "", "", "", 0, "//", "", "", "aa://"});
+    checkURL(utf16String(u"foo://host/#ПП\u0007 a</"), {"foo", "", "", "host", 0, "/", "", "%D0%9F%D0%9F%07 a</", "foo://host/#%D0%9F%D0%9F%07 a</"});
+    checkURL(utf16String(u"foo://host/#\u0007 a</"), {"foo", "", "", "host", 0, "/", "", "%07 a</", "foo://host/#%07 a</"});
 
     // This disagrees with the web platform test for http://:@www.example.com but agrees with Chrome and URL::parse,
     // and Firefox fails the web platform test differently. Maybe the web platform test ought to be changed.
@@ -1058,6 +1060,12 @@
     checkURLDifferences("file://:0/path",
         {"", "", "", "", 0, "", "", "", "file://:0/path"},
         {"file", "", "", "", 0, "/path", "", "", "file://:0/path"});
+    checkURLDifferences(utf16String(u"http://host/#ПП\u0007 a</"),
+        {"http", "", "", "host", 0, "/", "", utf16String(u"ПП\u0007 a</"), utf16String(u"http://host/#ПП\u0007 a</")},
+        {"http", "", "", "host", 0, "/", "", "%D0%9F%D0%9F%07 a</", "http://host/#%D0%9F%D0%9F%07 a</"});
+    checkURLDifferences(utf16String(u"http://host/#\u0007 a</"),
+        {"http", "", "", "host", 0, "/", "", "\a a</", "http://host/#\a a</"},
+        {"http", "", "", "host", 0, "/", "", "%07 a</", "http://host/#%07 a</"});
 }
     
 static void shouldFail(const String& urlString)
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to