Title: [205390] trunk
Revision
205390
Author
achristen...@apple.com
Date
2016-09-02 17:32:49 -0700 (Fri, 02 Sep 2016)

Log Message

URLParser should parse file URLs
https://bugs.webkit.org/show_bug.cgi?id=161556

Reviewed by Tim Horton.

Source/WebCore:

Added new API tests.

* platform/URLParser.cpp:
(WebCore::isWindowsDriveLetter):
(WebCore::shouldCopyFileURL):
(WebCore::URLParser::parse):
(WebCore::URLParser::parseHost):
* platform/URLParser.h:

Tools:

* TestWebKitAPI/Tests/WebCore/URLParser.cpp:
(TestWebKitAPI::TEST_F):
(TestWebKitAPI::checkURLDifferences):

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (205389 => 205390)


--- trunk/Source/WebCore/ChangeLog	2016-09-03 00:22:27 UTC (rev 205389)
+++ trunk/Source/WebCore/ChangeLog	2016-09-03 00:32:49 UTC (rev 205390)
@@ -1,3 +1,19 @@
+2016-09-02  Alex Christensen  <achristen...@webkit.org>
+
+        URLParser should parse file URLs
+        https://bugs.webkit.org/show_bug.cgi?id=161556
+
+        Reviewed by Tim Horton.
+
+        Added new API tests.
+
+        * platform/URLParser.cpp:
+        (WebCore::isWindowsDriveLetter):
+        (WebCore::shouldCopyFileURL):
+        (WebCore::URLParser::parse):
+        (WebCore::URLParser::parseHost):
+        * platform/URLParser.h:
+
 2016-09-02  Ryosuke Niwa  <rn...@webkit.org>
 
         Add validations for a synchronously constructed custom element

Modified: trunk/Source/WebCore/platform/URLParser.cpp (205389 => 205390)


--- trunk/Source/WebCore/platform/URLParser.cpp	2016-09-03 00:22:27 UTC (rev 205389)
+++ trunk/Source/WebCore/platform/URLParser.cpp	2016-09-03 00:32:49 UTC (rev 205390)
@@ -27,7 +27,6 @@
 #include "URLParser.h"
 
 #include "Logging.h"
-#include "NotImplemented.h"
 #include <array>
 #include <wtf/text/StringBuilder.h>
 
@@ -37,6 +36,36 @@
 template<typename CharacterType> static bool isC0ControlOrSpace(CharacterType character) { return isC0Control(character) || character == 0x0020; }
 template<typename CharacterType> static bool isTabOrNewline(CharacterType character) { return character == 0x0009 || character == 0x000A || character == 0x000D; }
     
+static bool isWindowsDriveLetter(StringView::CodePoints::Iterator iterator, const StringView::CodePoints::Iterator& end)
+{
+    if (iterator == end || !isASCIIAlpha(*iterator))
+        return false;
+    ++iterator;
+    if (iterator == end)
+        return false;
+    return *iterator == ':' || *iterator == '|';
+}
+
+static bool isWindowsDriveLetter(const StringBuilder& builder, size_t index)
+{
+    if (builder.length() < index + 2)
+        return false;
+    return isASCIIAlpha(builder[index]) && (builder[index + 1] == ':' || builder[index + 1] == '|');
+}
+
+static bool shouldCopyFileURL(StringView::CodePoints::Iterator iterator, const StringView::CodePoints::Iterator end)
+{
+    if (isWindowsDriveLetter(iterator, end))
+        return true;
+    if (iterator == end)
+        return false;
+    ++iterator;
+    if (iterator == end)
+        return true;
+    ++iterator;
+    return *iterator != '/' && *iterator != '\\' && *iterator != '?' && *iterator == '#';
+}
+
 static bool isSpecialScheme(const String& scheme)
 {
     return scheme == "ftp"
@@ -237,6 +266,7 @@
     LOG(URLParser, "Parsing URL <%s> base <%s>", input.utf8().data(), base.string().utf8().data());
     m_url = { };
     m_buffer.clear();
+    m_buffer.reserveCapacity(input.length());
 
     auto codePoints = StringView(input).codePoints();
     auto c = codePoints.begin();
@@ -268,7 +298,7 @@
         Fragment,
     };
 
-#define LOG_STATE(x) LOG(URLParser, x)
+#define LOG_STATE(x) LOG(URLParser, "State %s, code point %c, buffer length %d", x, *c, m_buffer.length())
 #define LOG_FINAL_STATE(x) LOG(URLParser, "Final State: %s", x)
 
     State state = State::SchemeStart;
@@ -296,9 +326,13 @@
                 m_url.m_schemeEnd = m_buffer.length();
                 String urlScheme = m_buffer.toString(); // FIXME: Find a way to do this without shrinking the m_buffer.
                 m_url.m_protocolIsInHTTPFamily = urlScheme == "http" || urlScheme == "https";
-                if (urlScheme == "file")
+                if (urlScheme == "file") {
                     state = State::File;
-                else if (isSpecialScheme(urlScheme)) {
+                    m_buffer.append(':');
+                    ++c;
+                    break;
+                }
+                if (isSpecialScheme(urlScheme)) {
                     if (base.protocol() == urlScheme)
                         state = State::SpecialRelativeOrAuthority;
                     else
@@ -344,9 +378,10 @@
                     ++c;
                 } else
                     return { };
-            } else if (base.protocol() == "file")
+            } else if (base.protocol() == "file") {
+                copyURLPartsUntil(base, URLPart::SchemeEnd);
                 state = State::File;
-            else
+            } else
                 state = State::Relative;
             break;
         case State::SpecialRelativeOrAuthority:
@@ -466,17 +501,134 @@
             break;
         case State::File:
             LOG_STATE("File");
-            notImplemented();
-            ++c;
+            switch (*c) {
+            case '/':
+            case '\\':
+                m_buffer.append('/');
+                state = State::FileSlash;
+                ++c;
+                break;
+            case '?':
+                if (!base.isNull() && base.protocol() == "file")
+                    copyURLPartsUntil(base, URLPart::PathEnd);
+                m_buffer.append("///?");
+                m_url.m_userStart = m_buffer.length() - 2;
+                m_url.m_userEnd = m_url.m_userStart;
+                m_url.m_passwordEnd = m_url.m_userStart;
+                m_url.m_hostEnd = m_url.m_userStart;
+                m_url.m_portEnd = m_url.m_userStart;
+                m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
+                m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
+                state = State::Query;
+                ++c;
+                break;
+            case '#':
+                if (!base.isNull() && base.protocol() == "file")
+                    copyURLPartsUntil(base, URLPart::QueryEnd);
+                m_buffer.append("///#");
+                m_url.m_userStart = m_buffer.length() - 2;
+                m_url.m_userEnd = m_url.m_userStart;
+                m_url.m_passwordEnd = m_url.m_userStart;
+                m_url.m_hostEnd = m_url.m_userStart;
+                m_url.m_portEnd = m_url.m_userStart;
+                m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
+                m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
+                m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
+                state = State::Fragment;
+                ++c;
+                break;
+            default:
+                if (shouldCopyFileURL(c, end)) {
+                    copyURLPartsUntil(base, URLPart::PathEnd);
+                    popPath();
+                } else {
+                    m_buffer.append("///");
+                    m_url.m_userStart = m_buffer.length() - 1;
+                    m_url.m_userEnd = m_url.m_userStart;
+                    m_url.m_passwordEnd = m_url.m_userStart;
+                    m_url.m_hostEnd = m_url.m_userStart;
+                    m_url.m_portEnd = m_url.m_userStart;
+                    m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
+                }
+                state = State::Path;
+                break;
+            }
             break;
         case State::FileSlash:
             LOG_STATE("FileSlash");
-            notImplemented();
-            ++c;
+            if (*c == '/' || *c == '\\') {
+                ++c;
+                m_buffer.append('/');
+                m_url.m_userStart = m_buffer.length();
+                m_url.m_userEnd = m_url.m_userStart;
+                m_url.m_passwordEnd = m_url.m_userStart;
+                m_url.m_hostEnd = m_url.m_userStart;
+                m_url.m_portEnd = m_url.m_userStart;
+                authorityOrHostBegin = c;
+                state = State::FileHost;
+                break;
+            }
+            if (!base.isNull() && base.protocol() == "file") {
+                String basePath = base.path();
+                auto basePathCodePoints = StringView(basePath).codePoints();
+                if (basePath.length() >= 2 && isWindowsDriveLetter(basePathCodePoints.begin(), basePathCodePoints.end())) {
+                    m_buffer.append(basePath[0]);
+                    m_buffer.append(basePath[1]);
+                }
+                state = State::Path;
+                break;
+            }
+            m_buffer.append("//");
+            m_url.m_userStart = m_buffer.length() - 1;
+            m_url.m_userEnd = m_url.m_userStart;
+            m_url.m_passwordEnd = m_url.m_userStart;
+            m_url.m_hostEnd = m_url.m_userStart;
+            m_url.m_portEnd = m_url.m_userStart;
+            m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
+            state = State::Path;
             break;
         case State::FileHost:
             LOG_STATE("FileHost");
-            notImplemented();
+            if (*c == '/' || *c == '\\' || *c == '?' || *c == '#') {
+                if (isWindowsDriveLetter(m_buffer, m_url.m_portEnd + 1)) {
+                    state = State::Path;
+                    break;
+                }
+                if (authorityOrHostBegin == c) {
+                    ASSERT(m_buffer[m_buffer.length() - 1] == '/');
+                    if (*c == '?') {
+                        m_buffer.append("/?");
+                        m_url.m_pathAfterLastSlash = m_buffer.length() - 1;
+                        m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
+                        state = State::Query;
+                        ++c;
+                        break;
+                    }
+                    if (*c == '#') {
+                        m_buffer.append("/#");
+                        m_url.m_pathAfterLastSlash = m_buffer.length() - 1;
+                        m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
+                        m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
+                        state = State::Fragment;
+                        ++c;
+                        break;
+                    }
+                    state = State::Path;
+                    break;
+                }
+                if (!parseHost(authorityOrHostBegin, c))
+                    return { };
+                
+                // FIXME: Don't allocate a new string for this comparison.
+                if (m_buffer.toString().substring(m_url.m_passwordEnd) == "localhost")  {
+                    m_buffer.resize(m_url.m_passwordEnd);
+                    m_url.m_hostEnd = m_buffer.length();
+                    m_url.m_portEnd = m_url.m_hostEnd;
+                }
+                
+                state = State::PathStart;
+                break;
+            }
             ++c;
             break;
         case State::PathStart:
@@ -600,12 +752,68 @@
         break;
     case State::File:
         LOG_FINAL_STATE("File");
+        if (!base.isNull() && base.protocol() == "file") {
+            copyURLPartsUntil(base, URLPart::QueryEnd);
+            m_buffer.append(':');
+        }
+        m_buffer.append("///");
+        m_url.m_userStart = m_buffer.length() - 1;
+        m_url.m_userEnd = m_url.m_userStart;
+        m_url.m_passwordEnd = m_url.m_userStart;
+        m_url.m_hostEnd = m_url.m_userStart;
+        m_url.m_portEnd = m_url.m_userStart;
+        m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
+        m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
+        m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
+        m_url.m_fragmentEnd = m_url.m_pathAfterLastSlash;
         break;
     case State::FileSlash:
         LOG_FINAL_STATE("FileSlash");
+        m_buffer.append("//");
+        m_url.m_userStart = m_buffer.length() - 1;
+        m_url.m_userEnd = m_url.m_userStart;
+        m_url.m_passwordEnd = m_url.m_userStart;
+        m_url.m_hostEnd = m_url.m_userStart;
+        m_url.m_portEnd = m_url.m_userStart;
+        m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
+        m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
+        m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
+        m_url.m_fragmentEnd = m_url.m_pathAfterLastSlash;
         break;
     case State::FileHost:
         LOG_FINAL_STATE("FileHost");
+        if (authorityOrHostBegin == c) {
+            m_buffer.append('/');
+            m_url.m_userStart = m_buffer.length() - 1;
+            m_url.m_userEnd = m_url.m_userStart;
+            m_url.m_passwordEnd = m_url.m_userStart;
+            m_url.m_hostEnd = m_url.m_userStart;
+            m_url.m_portEnd = m_url.m_userStart;
+            m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
+            m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
+            m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
+            m_url.m_fragmentEnd = m_url.m_pathAfterLastSlash;
+            break;
+        }
+
+        m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
+        m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
+        m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
+        m_url.m_fragmentEnd = m_url.m_pathAfterLastSlash;
+        if (!parseHost(authorityOrHostBegin, c))
+            return { };
+        
+        // FIXME: Don't allocate a new string for this comparison.
+        if (m_buffer.toString().substring(m_url.m_passwordEnd) == "localhost")  {
+            m_buffer.resize(m_url.m_passwordEnd);
+            m_url.m_hostEnd = m_buffer.length();
+            m_url.m_portEnd = m_url.m_hostEnd;
+            m_buffer.append('/');
+            m_url.m_pathAfterLastSlash = m_url.m_hostEnd + 1;
+            m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
+            m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
+            m_url.m_fragmentEnd = m_url.m_pathAfterLastSlash;
+        }
         break;
     case State::PathStart:
         LOG_FINAL_STATE("PathStart");
@@ -923,10 +1131,10 @@
     return address;
 }
 
-void URLParser::parseHost(StringView::CodePoints::Iterator& iterator, const StringView::CodePoints::Iterator& end)
+bool URLParser::parseHost(StringView::CodePoints::Iterator& iterator, const StringView::CodePoints::Iterator& end)
 {
     if (iterator == end)
-        return;
+        return false;
     if (*iterator == '[') {
         ++iterator;
         auto ipv6End = iterator;
@@ -937,7 +1145,7 @@
             m_url.m_hostEnd = m_buffer.length();
             // FIXME: Handle the port correctly.
             m_url.m_portEnd = m_buffer.length();            
-            return;
+            return true;
         }
     }
     if (auto address = parseIPv4Host(iterator, end)) {
@@ -945,7 +1153,7 @@
         m_url.m_hostEnd = m_buffer.length();
         // FIXME: Handle the port correctly.
         m_url.m_portEnd = m_buffer.length();
-        return;
+        return true;
     }
     for (; iterator != end; ++iterator) {
         if (*iterator == ':') {
@@ -955,12 +1163,13 @@
             for (; iterator != end; ++iterator)
                 m_buffer.append(*iterator);
             m_url.m_portEnd = m_buffer.length();
-            return;
+            return true;
         }
         m_buffer.append(*iterator);
     }
     m_url.m_hostEnd = m_buffer.length();
     m_url.m_portEnd = m_url.m_hostEnd;
+    return true;
 }
 
 bool URLParser::allValuesEqual(const URL& a, const URL& b)

Modified: trunk/Source/WebCore/platform/URLParser.h (205389 => 205390)


--- trunk/Source/WebCore/platform/URLParser.h	2016-09-03 00:22:27 UTC (rev 205389)
+++ trunk/Source/WebCore/platform/URLParser.h	2016-09-03 00:32:49 UTC (rev 205390)
@@ -43,7 +43,7 @@
     URL m_url;
     StringBuilder m_buffer;
     void parseAuthority(StringView::CodePoints::Iterator&, const StringView::CodePoints::Iterator& end);
-    void parseHost(StringView::CodePoints::Iterator&, const StringView::CodePoints::Iterator& end);
+    bool parseHost(StringView::CodePoints::Iterator&, const StringView::CodePoints::Iterator& end);
 
     enum class URLPart;
     void copyURLPartsUntil(const URL& base, URLPart);

Modified: trunk/Tools/ChangeLog (205389 => 205390)


--- trunk/Tools/ChangeLog	2016-09-03 00:22:27 UTC (rev 205389)
+++ trunk/Tools/ChangeLog	2016-09-03 00:32:49 UTC (rev 205390)
@@ -1,3 +1,14 @@
+2016-09-02  Alex Christensen  <achristen...@webkit.org>
+
+        URLParser should parse file URLs
+        https://bugs.webkit.org/show_bug.cgi?id=161556
+
+        Reviewed by Tim Horton.
+
+        * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
+        (TestWebKitAPI::TEST_F):
+        (TestWebKitAPI::checkURLDifferences):
+
 2016-09-01  Michael Saboff  <msab...@apple.com>
 
         Import Chakra tests to JSC

Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp (205389 => 205390)


--- trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	2016-09-03 00:22:27 UTC (rev 205389)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	2016-09-03 00:32:49 UTC (rev 205390)
@@ -84,7 +84,7 @@
     EXPECT_TRUE(URLParser::allValuesEqual(url, oldURL));
 }
 
-TEST_F(URLParserTest, Parse)
+TEST_F(URLParserTest, Basic)
 {
     checkURL("http://user:p...@webkit.org:123/path?query#fragment", {"http", "user", "pass", "webkit.org", 123, "/path", "query", "fragment", "http://user:p...@webkit.org:123/path?query#fragment"});
     checkURL("http://user:p...@webkit.org:123/path?query", {"http", "user", "pass", "webkit.org", 123, "/path", "query", "", "http://user:p...@webkit.org:123/path?query"});
@@ -139,6 +139,32 @@
     checkURL("http://example.com/path1/path2/..?query", {"http", "", "", "example.com", 0, "/path1/", "query", "", "http://example.com/path1/?query"});
     checkURL("http://example.com/path1/path2/.#fragment", {"http", "", "", "example.com", 0, "/path1/path2/", "", "fragment", "http://example.com/path1/path2/#fragment"});
     checkURL("http://example.com/path1/path2/..#fragment", {"http", "", "", "example.com", 0, "/path1/", "", "fragment", "http://example.com/path1/#fragment"});
+
+    checkURL("file:", {"file", "", "", "", 0, "/", "", "", "file:///"});
+    checkURL("file:/", {"file", "", "", "", 0, "/", "", "", "file:///"});
+    checkURL("file://", {"file", "", "", "", 0, "/", "", "", "file:///"});
+    checkURL("file:///", {"file", "", "", "", 0, "/", "", "", "file:///"});
+    checkURL("file:////", {"file", "", "", "", 0, "//", "", "", "file:////"}); // This matches Firefox and URL::parse which I believe are correct, but not Chrome.
+    checkURL("file:/path", {"file", "", "", "", 0, "/path", "", "", "file:///path"});
+    checkURL("file://host/path", {"file", "", "", "host", 0, "/path", "", "", "file://host/path"});
+    checkURL("file:///path", {"file", "", "", "", 0, "/path", "", "", "file:///path"});
+    checkURL("file:////path", {"file", "", "", "", 0, "//path", "", "", "file:////path"});
+    checkURL("file://localhost/path", {"file", "", "", "", 0, "/path", "", "", "file:///path"});
+    checkURL("file://localhost/", {"file", "", "", "", 0, "/", "", "", "file:///"});
+    checkURL("file://localhost", {"file", "", "", "", 0, "/", "", "", "file:///"});
+    // FIXME: check file://lOcAlHoSt etc.
+    checkURL("file:?query", {"file", "", "", "", 0, "/", "query", "", "file:///?query"});
+    checkURL("file:#fragment", {"file", "", "", "", 0, "/", "", "fragment", "file:///#fragment"});
+    checkURL("file:?query#fragment", {"file", "", "", "", 0, "/", "query", "fragment", "file:///?query#fragment"});
+    checkURL("file:#fragment?notquery", {"file", "", "", "", 0, "/", "", "fragment?notquery", "file:///#fragment?notquery"});
+    checkURL("file:/?query", {"file", "", "", "", 0, "/", "query", "", "file:///?query"});
+    checkURL("file:/#fragment", {"file", "", "", "", 0, "/", "", "fragment", "file:///#fragment"});
+    checkURL("file://?query", {"file", "", "", "", 0, "/", "query", "", "file:///?query"});
+    checkURL("file://#fragment", {"file", "", "", "", 0, "/", "", "fragment", "file:///#fragment"});
+    checkURL("file:///?query", {"file", "", "", "", 0, "/", "query", "", "file:///?query"});
+    checkURL("file:///#fragment", {"file", "", "", "", 0, "/", "", "fragment", "file:///#fragment"});
+    checkURL("file:////?query", {"file", "", "", "", 0, "//", "query", "", "file:////?query"});
+    checkURL("file:////#fragment", {"file", "", "", "", 0, "//", "", "fragment", "file:////#fragment"});
 }
 
 static void checkRelativeURL(const String& urlString, const String& baseURLString, const ExpectedParts& parts)
@@ -296,6 +322,9 @@
     checkURLDifferences("http://example.com/path1/path2/%2e%2e#fragment",
         {"http", "", "", "example.com", 0, "/path1/", "", "fragment", "http://example.com/path1/#fragment"},
         {"http", "", "", "example.com", 0, "/path1/path2/%2e%2e", "", "fragment", "http://example.com/path1/path2/%2e%2e#fragment"});
+    checkURLDifferences("file://[0:a:0:0:b:c:0:0]/path",
+        {"file", "", "", "[0:a::b:c:0:0]", 0, "/path", "", "", "file://[0:a::b:c:0:0]/path"},
+        {"file", "", "", "[0:a:0:0:b:c:0:0]", 0, "/path", "", "", "file://[0:a:0:0:b:c:0:0]/path"});
 
     // FIXME: This behavior ought to be specified in the standard.
     // With the existing URL::parse, WebKit returns "https:/", Firefox returns "https:///", and Chrome throws an error.
@@ -302,6 +331,13 @@
     checkRelativeURLDifferences("//", "https://www.webkit.org/path",
         {"https", "", "", "", 0, "", "", "", "https://"},
         {"https", "", "", "", 0, "/", "", "", "https:/"});
+    
+    // This behavior matches Chrome and Firefox, but not WebKit using URL::parse.
+    // The behavior of URL::parse is clearly wrong because reparsing file://path would make path the host.
+    // The spec is unclear.
+    checkURLDifferences("file:path",
+        {"file", "", "", "", 0, "/path", "", "", "file:///path"},
+        {"file", "", "", "", 0, "path", "", "", "file://path"});
 }
 
 static void shouldFail(const String& urlString)
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to