Title: [204417] trunk
Revision
204417
Author
achristen...@apple.com
Date
2016-08-12 13:03:49 -0700 (Fri, 12 Aug 2016)

Log Message

Initial URLParser implementation
https://bugs.webkit.org/show_bug.cgi?id=160811

Reviewed by Brady Eidson.

Source/WebCore:

There are a lot of missing parts, but it works in one case, so I test that one case.

* platform/URLParser.cpp:
(WebCore::isC0Control):
(WebCore::isC0ControlOrSpace):
(WebCore::isTabOrNewline):
(WebCore::isASCIIDigit):
(WebCore::isASCIIAlpha):
(WebCore::isASCIIAlphanumeric):
(WebCore::isSpecialScheme):
(WebCore::URLParser::parse):

Tools:

* TestWebKitAPI/Tests/WebCore/URLParser.cpp:
(TestWebKitAPI::eq):
(TestWebKitAPI::checkURL):
(TestWebKitAPI::TEST_F):

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (204416 => 204417)


--- trunk/Source/WebCore/ChangeLog	2016-08-12 18:53:51 UTC (rev 204416)
+++ trunk/Source/WebCore/ChangeLog	2016-08-12 20:03:49 UTC (rev 204417)
@@ -1,3 +1,22 @@
+2016-08-12  Alex Christensen  <achristen...@webkit.org>
+
+        Initial URLParser implementation
+        https://bugs.webkit.org/show_bug.cgi?id=160811
+
+        Reviewed by Brady Eidson.
+
+        There are a lot of missing parts, but it works in one case, so I test that one case.
+
+        * platform/URLParser.cpp:
+        (WebCore::isC0Control):
+        (WebCore::isC0ControlOrSpace):
+        (WebCore::isTabOrNewline):
+        (WebCore::isASCIIDigit):
+        (WebCore::isASCIIAlpha):
+        (WebCore::isASCIIAlphanumeric):
+        (WebCore::isSpecialScheme):
+        (WebCore::URLParser::parse):
+
 2016-08-12  Commit Queue  <commit-qu...@webkit.org>
 
         Unreviewed, rolling out r204404.

Modified: trunk/Source/WebCore/platform/URLParser.cpp (204416 => 204417)


--- trunk/Source/WebCore/platform/URLParser.cpp	2016-08-12 18:53:51 UTC (rev 204416)
+++ trunk/Source/WebCore/platform/URLParser.cpp	2016-08-12 20:03:49 UTC (rev 204417)
@@ -25,12 +25,301 @@
 
 #include "config.h"
 #include "URLParser.h"
+#include "NotImplemented.h"
 
+#include <wtf/text/StringBuilder.h>
+
 namespace WebCore {
 
-Optional<URL> URLParser::parse(const String&, const URL&, const TextEncoding&)
+// 1. Infrastructure
+static bool isC0Control(const StringView::CodePoints::Iterator& c) { return *c <= 0x001F; }
+static bool isC0ControlOrSpace(const StringView::CodePoints::Iterator& c) { return isC0Control(c) || *c == 0x0020; }
+static bool isTabOrNewline(const StringView::CodePoints::Iterator& c) { return *c == 0x0009 || *c == 0x000A || *c == 0x000D; }
+static bool isASCIIDigit(const StringView::CodePoints::Iterator& c) { return *c >= 0x0030  && *c <= 0x0039; }
+static bool isASCIIAlpha(const StringView::CodePoints::Iterator& c) { return (*c >= 0x0041 && *c <= 0x005A) || (*c >= 0x0061 && *c <= 0x007A); }
+static bool isASCIIAlphanumeric(const StringView::CodePoints::Iterator& c) { return isASCIIDigit(c) || isASCIIAlpha(c); }
+    
+// 4. URLs
+static bool isSpecialScheme(const String& scheme)
 {
-    return Nullopt;
+    return scheme == "ftp"
+        || scheme == "file"
+        || scheme == "gopher"
+        || scheme == "http"
+        || scheme == "https"
+        || scheme == "ws"
+        || scheme == "wss";
 }
+
+Optional<URL> URLParser::parse(const String& input, const URL& base, const TextEncoding&)
+{
+    URL url;
     
+    auto codePoints = StringView(input).codePoints();
+    auto c = codePoints.begin();
+    auto end = codePoints.end();
+    StringBuilder buffer;
+    while (isC0ControlOrSpace(c))
+        ++c;
+    
+    enum class State : uint8_t {
+        SchemeStart,
+        Scheme,
+        SchemeEndCheckForSlashes, // Scheme state steps 2. 8.
+        NoScheme,
+        SpecialRelativeOrAuthority,
+        PathOrAuthority,
+        Relative,
+        RelativeSlash,
+        SpecialAuthoritySlashes,
+        SpecialAuthorityIgnoreSlashes,
+        Authority,
+        Host,
+        Hostname,
+        Port,
+        File,
+        FileSlash,
+        FileHost,
+        PathStart,
+        Path,
+        CannotBeABaseURLPath,
+        Query,
+        Fragment,
+    };
+
+#define LOG_STATE(x)
+
+    State state = State::SchemeStart;
+    while (c != end) {
+        if (isTabOrNewline(c)) {
+            ++c;
+            continue;
+        }
+
+        switch (state) {
+        case State::SchemeStart:
+            LOG_STATE("SchemeStart");
+            if (isASCIIAlpha(c)) {
+                buffer.append(toASCIILower(*c));
+                state = State::Scheme;
+            } else
+                state = State::NoScheme;
+            ++c;
+            break;
+        case State::Scheme:
+            LOG_STATE("Scheme");
+            if (isASCIIAlphanumeric(c) || *c == '+' || *c == '-' || *c == '.')
+                buffer.append(toASCIILower(*c));
+            else if (*c == ':') {
+                url.m_schemeEnd = buffer.length();
+                String urlScheme = buffer.toString(); // FIXME: Find a way to do this without shrinking the buffer.
+                if (urlScheme == "file")
+                    state = State::File;
+                else if (isSpecialScheme(urlScheme)) {
+                    if (base.protocol() == urlScheme)
+                        state = State::SpecialRelativeOrAuthority;
+                    else
+                        state = State::SpecialAuthoritySlashes;
+                } else
+                    state = State::SchemeEndCheckForSlashes;
+                buffer.append(':');
+            } else {
+                buffer.clear();
+                state = State::NoScheme;
+                // FIXME: Find a way to start over here.
+                notImplemented();
+                continue;
+            }
+            ++c;
+            break;
+        case State::SchemeEndCheckForSlashes:
+            LOG_STATE("SchemeEndCheckForSlashes");
+            if (*c == '/') {
+                state = State::PathOrAuthority;
+                ++c;
+            } else
+                state = State::CannotBeABaseURLPath;
+            break;
+        case State::NoScheme:
+            LOG_STATE("NoScheme");
+            notImplemented();
+            ++c;
+            break;
+        case State::SpecialRelativeOrAuthority:
+            LOG_STATE("SpecialRelativeOrAuthority");
+            if (*c == '/') {
+                ++c;
+                if (c == end)
+                    return Nullopt;
+                if (*c == '/') {
+                    state = State::SpecialAuthorityIgnoreSlashes;
+                    ++c;
+                } else
+                    notImplemented();
+            } else
+                state = State::Relative;
+            break;
+        case State::PathOrAuthority:
+            LOG_STATE("PathOrAuthority");
+            notImplemented();
+            ++c;
+            break;
+        case State::Relative:
+            LOG_STATE("Relative");
+            notImplemented();
+            ++c;
+            break;
+        case State::RelativeSlash:
+            LOG_STATE("RelativeSlash");
+            notImplemented();
+            ++c;
+            break;
+        case State::SpecialAuthoritySlashes:
+            LOG_STATE("SpecialAuthoritySlashes");
+            if (*c == '/') {
+                ++c;
+                if (c == end)
+                    return Nullopt;
+                buffer.append('/');
+                if (*c == '/') {
+                    buffer.append('/');
+                    state = State::SpecialAuthorityIgnoreSlashes;
+                    ++c;
+                    break;
+                }
+                notImplemented();
+            } else
+                notImplemented();
+            ++c;
+            break;
+        case State::SpecialAuthorityIgnoreSlashes:
+            LOG_STATE("SpecialAuthorityIgnoreSlashes");
+            if (*c != '/' && *c != '\\') {
+                state = State::Authority;
+                break;
+            }
+            notImplemented();
+            ++c;
+            break;
+        case State::Authority:
+            LOG_STATE("Authority");
+            if (!url.m_userStart)
+                url.m_userStart = buffer.length();
+            if (*c == '@') {
+                url.m_passwordEnd = buffer.length();
+                buffer.append('@');
+                state = State::Host;
+                notImplemented();
+            } else if (*c == ':') {
+                url.m_userEnd = buffer.length();
+                buffer.append(*c);
+            } else {
+                if (*c == '/' || *c == '?' || *c == '#') {
+                    url.m_passwordEnd = buffer.length();
+                    state = State::Host;
+                }
+                buffer.append(*c);
+            }
+            ++c;
+            break;
+        case State::Host:
+        case State::Hostname:
+            LOG_STATE("Host/Hostname");
+            if (*c == ':') {
+                url.m_hostEnd = buffer.length();
+                buffer.append(':');
+                state = State::Port;
+            } else if (*c == '/' || *c == '?' || *c == '#') {
+                url.m_hostEnd = buffer.length();
+                state = State::Path;
+                continue;
+            } else
+                buffer.append(*c);
+            ++c;
+            break;
+        case State::Port:
+            LOG_STATE("Port");
+            if (isASCIIDigit(c)) {
+                buffer.append(*c);
+            } else if (*c == '/' || *c == '?' || *c == '#') {
+                url.m_portEnd = buffer.length();
+                state = State::PathStart;
+                continue;
+            } else
+                return Nullopt;
+            ++c;
+            break;
+        case State::File:
+            LOG_STATE("File");
+            notImplemented();
+            ++c;
+            break;
+        case State::FileSlash:
+            LOG_STATE("FileSlash");
+            notImplemented();
+            ++c;
+            break;
+        case State::FileHost:
+            LOG_STATE("FileHost");
+            notImplemented();
+            ++c;
+            break;
+        case State::PathStart:
+            LOG_STATE("PathStart");
+            state = State::Path;
+            continue;
+        case State::Path:
+            LOG_STATE("Path");
+            if (*c == '/') {
+                ++c;
+                if (c == end)
+                    return Nullopt;
+                if (*c == '.') {
+                    ++c;
+                    if (c == end)
+                        return Nullopt;
+                    if (*c == '.')
+                        notImplemented();
+                    notImplemented();
+                }
+                buffer.append('/');
+            } else if (*c == '?') {
+                url.m_pathEnd = buffer.length();
+                state = State::Query;
+                continue;
+            } else if (*c == '#') {
+                url.m_pathEnd = buffer.length();
+                state = State::Fragment;
+                continue;
+            }
+            // FIXME: Percent encode c
+            buffer.append(*c);
+            ++c;
+            break;
+        case State::CannotBeABaseURLPath:
+            LOG_STATE("CannotBeABaseURLPath");
+            notImplemented();
+            ++c;
+            break;
+        case State::Query:
+            LOG_STATE("Query");
+            if (*c == '#') {
+                url.m_queryEnd = buffer.length();
+                state = State::Fragment;
+                continue;
+            }
+            buffer.append(*c);
+            ++c;
+            break;
+        case State::Fragment:
+            LOG_STATE("Fragment");
+            buffer.append(*c);
+            ++c;
+            break;
+        }
+    }
+    url.m_string = buffer.toString();
+    return url;
+}
+    
 } // namespace WebCore

Modified: trunk/Tools/ChangeLog (204416 => 204417)


--- trunk/Tools/ChangeLog	2016-08-12 18:53:51 UTC (rev 204416)
+++ trunk/Tools/ChangeLog	2016-08-12 20:03:49 UTC (rev 204417)
@@ -1,3 +1,15 @@
+2016-08-12  Alex Christensen  <achristen...@webkit.org>
+
+        Initial URLParser implementation
+        https://bugs.webkit.org/show_bug.cgi?id=160811
+
+        Reviewed by Brady Eidson.
+
+        * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
+        (TestWebKitAPI::eq):
+        (TestWebKitAPI::checkURL):
+        (TestWebKitAPI::TEST_F):
+
 2016-08-12  Brady Eidson  <beid...@apple.com>
 
         Fix the 32-bit Mac build after:

Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp (204416 => 204417)


--- trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	2016-08-12 18:53:51 UTC (rev 204416)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	2016-08-12 20:03:49 UTC (rev 204417)
@@ -38,9 +38,34 @@
     }
 };
 
+struct ExpectedParts {
+    String protocol;
+    String user;
+    String password;
+    String host;
+    unsigned short port;
+    String path;
+    String query;
+    String fragment;
+};
+    
+static void eq(const String& s1, const String& s2) { EXPECT_STREQ(s1.utf8().data(), s2.utf8().data()); }
+static void checkURL(const URL& url, const ExpectedParts& parts)
+{
+    eq(url.protocol(), parts.protocol);
+    eq(url.user(), parts.user);
+    eq(url.pass(), parts.password);
+    eq(url.host(), parts.host);
+    EXPECT_EQ(url.port(), parts.port);
+    eq(url.path(), parts.path);
+    eq(url.query(), parts.query);
+    eq(url.fragmentIdentifier(), parts.fragment);
+}
+
 TEST_F(URLParserTest, Parse)
 {
-    EXPECT_TRUE(WebCore::URLParser::parse("invalid") == Nullopt);
+    auto url = ""
+    checkURL(url.value(), {"http", "user", "pass", "webkit.org", 123, "/path", "query", "fragment"});
 }
 
 } // namespace TestWebKitAPI
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to