- Revision
- 204417
- Author
- achristen...@apple.com
- Date
- 2016-08-12 13:03:49 -0700 (Fri, 12 Aug 2016)
Log Message
Initial URLParser implementation
https://bugs.webkit.org/show_bug.cgi?id=160811
Reviewed by Brady Eidson.
Source/WebCore:
There are a lot of missing parts, but it works in one case, so I test that one case.
* platform/URLParser.cpp:
(WebCore::isC0Control):
(WebCore::isC0ControlOrSpace):
(WebCore::isTabOrNewline):
(WebCore::isASCIIDigit):
(WebCore::isASCIIAlpha):
(WebCore::isASCIIAlphanumeric):
(WebCore::isSpecialScheme):
(WebCore::URLParser::parse):
Tools:
* TestWebKitAPI/Tests/WebCore/URLParser.cpp:
(TestWebKitAPI::eq):
(TestWebKitAPI::checkURL):
(TestWebKitAPI::TEST_F):
Modified Paths
Diff
Modified: trunk/Source/WebCore/ChangeLog (204416 => 204417)
--- trunk/Source/WebCore/ChangeLog 2016-08-12 18:53:51 UTC (rev 204416)
+++ trunk/Source/WebCore/ChangeLog 2016-08-12 20:03:49 UTC (rev 204417)
@@ -1,3 +1,22 @@
+2016-08-12 Alex Christensen <achristen...@webkit.org>
+
+ Initial URLParser implementation
+ https://bugs.webkit.org/show_bug.cgi?id=160811
+
+ Reviewed by Brady Eidson.
+
+ There are a lot of missing parts, but it works in one case, so I test that one case.
+
+ * platform/URLParser.cpp:
+ (WebCore::isC0Control):
+ (WebCore::isC0ControlOrSpace):
+ (WebCore::isTabOrNewline):
+ (WebCore::isASCIIDigit):
+ (WebCore::isASCIIAlpha):
+ (WebCore::isASCIIAlphanumeric):
+ (WebCore::isSpecialScheme):
+ (WebCore::URLParser::parse):
+
2016-08-12 Commit Queue <commit-qu...@webkit.org>
Unreviewed, rolling out r204404.
Modified: trunk/Source/WebCore/platform/URLParser.cpp (204416 => 204417)
--- trunk/Source/WebCore/platform/URLParser.cpp 2016-08-12 18:53:51 UTC (rev 204416)
+++ trunk/Source/WebCore/platform/URLParser.cpp 2016-08-12 20:03:49 UTC (rev 204417)
@@ -25,12 +25,301 @@
#include "config.h"
#include "URLParser.h"
+#include "NotImplemented.h"
+#include <wtf/text/StringBuilder.h>
+
namespace WebCore {
-Optional<URL> URLParser::parse(const String&, const URL&, const TextEncoding&)
+// 1. Infrastructure
+static bool isC0Control(const StringView::CodePoints::Iterator& c) { return *c <= 0x001F; }
+static bool isC0ControlOrSpace(const StringView::CodePoints::Iterator& c) { return isC0Control(c) || *c == 0x0020; }
+static bool isTabOrNewline(const StringView::CodePoints::Iterator& c) { return *c == 0x0009 || *c == 0x000A || *c == 0x000D; }
+static bool isASCIIDigit(const StringView::CodePoints::Iterator& c) { return *c >= 0x0030 && *c <= 0x0039; }
+static bool isASCIIAlpha(const StringView::CodePoints::Iterator& c) { return (*c >= 0x0041 && *c <= 0x005A) || (*c >= 0x0061 && *c <= 0x007A); }
+static bool isASCIIAlphanumeric(const StringView::CodePoints::Iterator& c) { return isASCIIDigit(c) || isASCIIAlpha(c); }
+
+// 4. URLs
+static bool isSpecialScheme(const String& scheme)
{
- return Nullopt;
+ return scheme == "ftp"
+ || scheme == "file"
+ || scheme == "gopher"
+ || scheme == "http"
+ || scheme == "https"
+ || scheme == "ws"
+ || scheme == "wss";
}
+
+Optional<URL> URLParser::parse(const String& input, const URL& base, const TextEncoding&)
+{
+ URL url;
+ auto codePoints = StringView(input).codePoints();
+ auto c = codePoints.begin();
+ auto end = codePoints.end();
+ StringBuilder buffer;
+ while (isC0ControlOrSpace(c))
+ ++c;
+
+ enum class State : uint8_t {
+ SchemeStart,
+ Scheme,
+ SchemeEndCheckForSlashes, // Scheme state steps 2. 8.
+ NoScheme,
+ SpecialRelativeOrAuthority,
+ PathOrAuthority,
+ Relative,
+ RelativeSlash,
+ SpecialAuthoritySlashes,
+ SpecialAuthorityIgnoreSlashes,
+ Authority,
+ Host,
+ Hostname,
+ Port,
+ File,
+ FileSlash,
+ FileHost,
+ PathStart,
+ Path,
+ CannotBeABaseURLPath,
+ Query,
+ Fragment,
+ };
+
+#define LOG_STATE(x)
+
+ State state = State::SchemeStart;
+ while (c != end) {
+ if (isTabOrNewline(c)) {
+ ++c;
+ continue;
+ }
+
+ switch (state) {
+ case State::SchemeStart:
+ LOG_STATE("SchemeStart");
+ if (isASCIIAlpha(c)) {
+ buffer.append(toASCIILower(*c));
+ state = State::Scheme;
+ } else
+ state = State::NoScheme;
+ ++c;
+ break;
+ case State::Scheme:
+ LOG_STATE("Scheme");
+ if (isASCIIAlphanumeric(c) || *c == '+' || *c == '-' || *c == '.')
+ buffer.append(toASCIILower(*c));
+ else if (*c == ':') {
+ url.m_schemeEnd = buffer.length();
+ String urlScheme = buffer.toString(); // FIXME: Find a way to do this without shrinking the buffer.
+ if (urlScheme == "file")
+ state = State::File;
+ else if (isSpecialScheme(urlScheme)) {
+ if (base.protocol() == urlScheme)
+ state = State::SpecialRelativeOrAuthority;
+ else
+ state = State::SpecialAuthoritySlashes;
+ } else
+ state = State::SchemeEndCheckForSlashes;
+ buffer.append(':');
+ } else {
+ buffer.clear();
+ state = State::NoScheme;
+ // FIXME: Find a way to start over here.
+ notImplemented();
+ continue;
+ }
+ ++c;
+ break;
+ case State::SchemeEndCheckForSlashes:
+ LOG_STATE("SchemeEndCheckForSlashes");
+ if (*c == '/') {
+ state = State::PathOrAuthority;
+ ++c;
+ } else
+ state = State::CannotBeABaseURLPath;
+ break;
+ case State::NoScheme:
+ LOG_STATE("NoScheme");
+ notImplemented();
+ ++c;
+ break;
+ case State::SpecialRelativeOrAuthority:
+ LOG_STATE("SpecialRelativeOrAuthority");
+ if (*c == '/') {
+ ++c;
+ if (c == end)
+ return Nullopt;
+ if (*c == '/') {
+ state = State::SpecialAuthorityIgnoreSlashes;
+ ++c;
+ } else
+ notImplemented();
+ } else
+ state = State::Relative;
+ break;
+ case State::PathOrAuthority:
+ LOG_STATE("PathOrAuthority");
+ notImplemented();
+ ++c;
+ break;
+ case State::Relative:
+ LOG_STATE("Relative");
+ notImplemented();
+ ++c;
+ break;
+ case State::RelativeSlash:
+ LOG_STATE("RelativeSlash");
+ notImplemented();
+ ++c;
+ break;
+ case State::SpecialAuthoritySlashes:
+ LOG_STATE("SpecialAuthoritySlashes");
+ if (*c == '/') {
+ ++c;
+ if (c == end)
+ return Nullopt;
+ buffer.append('/');
+ if (*c == '/') {
+ buffer.append('/');
+ state = State::SpecialAuthorityIgnoreSlashes;
+ ++c;
+ break;
+ }
+ notImplemented();
+ } else
+ notImplemented();
+ ++c;
+ break;
+ case State::SpecialAuthorityIgnoreSlashes:
+ LOG_STATE("SpecialAuthorityIgnoreSlashes");
+ if (*c != '/' && *c != '\\') {
+ state = State::Authority;
+ break;
+ }
+ notImplemented();
+ ++c;
+ break;
+ case State::Authority:
+ LOG_STATE("Authority");
+ if (!url.m_userStart)
+ url.m_userStart = buffer.length();
+ if (*c == '@') {
+ url.m_passwordEnd = buffer.length();
+ buffer.append('@');
+ state = State::Host;
+ notImplemented();
+ } else if (*c == ':') {
+ url.m_userEnd = buffer.length();
+ buffer.append(*c);
+ } else {
+ if (*c == '/' || *c == '?' || *c == '#') {
+ url.m_passwordEnd = buffer.length();
+ state = State::Host;
+ }
+ buffer.append(*c);
+ }
+ ++c;
+ break;
+ case State::Host:
+ case State::Hostname:
+ LOG_STATE("Host/Hostname");
+ if (*c == ':') {
+ url.m_hostEnd = buffer.length();
+ buffer.append(':');
+ state = State::Port;
+ } else if (*c == '/' || *c == '?' || *c == '#') {
+ url.m_hostEnd = buffer.length();
+ state = State::Path;
+ continue;
+ } else
+ buffer.append(*c);
+ ++c;
+ break;
+ case State::Port:
+ LOG_STATE("Port");
+ if (isASCIIDigit(c)) {
+ buffer.append(*c);
+ } else if (*c == '/' || *c == '?' || *c == '#') {
+ url.m_portEnd = buffer.length();
+ state = State::PathStart;
+ continue;
+ } else
+ return Nullopt;
+ ++c;
+ break;
+ case State::File:
+ LOG_STATE("File");
+ notImplemented();
+ ++c;
+ break;
+ case State::FileSlash:
+ LOG_STATE("FileSlash");
+ notImplemented();
+ ++c;
+ break;
+ case State::FileHost:
+ LOG_STATE("FileHost");
+ notImplemented();
+ ++c;
+ break;
+ case State::PathStart:
+ LOG_STATE("PathStart");
+ state = State::Path;
+ continue;
+ case State::Path:
+ LOG_STATE("Path");
+ if (*c == '/') {
+ ++c;
+ if (c == end)
+ return Nullopt;
+ if (*c == '.') {
+ ++c;
+ if (c == end)
+ return Nullopt;
+ if (*c == '.')
+ notImplemented();
+ notImplemented();
+ }
+ buffer.append('/');
+ } else if (*c == '?') {
+ url.m_pathEnd = buffer.length();
+ state = State::Query;
+ continue;
+ } else if (*c == '#') {
+ url.m_pathEnd = buffer.length();
+ state = State::Fragment;
+ continue;
+ }
+ // FIXME: Percent encode c
+ buffer.append(*c);
+ ++c;
+ break;
+ case State::CannotBeABaseURLPath:
+ LOG_STATE("CannotBeABaseURLPath");
+ notImplemented();
+ ++c;
+ break;
+ case State::Query:
+ LOG_STATE("Query");
+ if (*c == '#') {
+ url.m_queryEnd = buffer.length();
+ state = State::Fragment;
+ continue;
+ }
+ buffer.append(*c);
+ ++c;
+ break;
+ case State::Fragment:
+ LOG_STATE("Fragment");
+ buffer.append(*c);
+ ++c;
+ break;
+ }
+ }
+ url.m_string = buffer.toString();
+ return url;
+}
+
} // namespace WebCore
Modified: trunk/Tools/ChangeLog (204416 => 204417)
--- trunk/Tools/ChangeLog 2016-08-12 18:53:51 UTC (rev 204416)
+++ trunk/Tools/ChangeLog 2016-08-12 20:03:49 UTC (rev 204417)
@@ -1,3 +1,15 @@
+2016-08-12 Alex Christensen <achristen...@webkit.org>
+
+ Initial URLParser implementation
+ https://bugs.webkit.org/show_bug.cgi?id=160811
+
+ Reviewed by Brady Eidson.
+
+ * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
+ (TestWebKitAPI::eq):
+ (TestWebKitAPI::checkURL):
+ (TestWebKitAPI::TEST_F):
+
2016-08-12 Brady Eidson <beid...@apple.com>
Fix the 32-bit Mac build after:
Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp (204416 => 204417)
--- trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-08-12 18:53:51 UTC (rev 204416)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-08-12 20:03:49 UTC (rev 204417)
@@ -38,9 +38,34 @@
}
};
+struct ExpectedParts {
+ String protocol;
+ String user;
+ String password;
+ String host;
+ unsigned short port;
+ String path;
+ String query;
+ String fragment;
+};
+
+static void eq(const String& s1, const String& s2) { EXPECT_STREQ(s1.utf8().data(), s2.utf8().data()); }
+static void checkURL(const URL& url, const ExpectedParts& parts)
+{
+ eq(url.protocol(), parts.protocol);
+ eq(url.user(), parts.user);
+ eq(url.pass(), parts.password);
+ eq(url.host(), parts.host);
+ EXPECT_EQ(url.port(), parts.port);
+ eq(url.path(), parts.path);
+ eq(url.query(), parts.query);
+ eq(url.fragmentIdentifier(), parts.fragment);
+}
+
TEST_F(URLParserTest, Parse)
{
- EXPECT_TRUE(WebCore::URLParser::parse("invalid") == Nullopt);
+ auto url = ""
+ checkURL(url.value(), {"http", "user", "pass", "webkit.org", 123, "/path", "query", "fragment"});
}
} // namespace TestWebKitAPI