Modified: trunk/Source/WebCore/platform/URLParser.cpp (206647 => 206648)
--- trunk/Source/WebCore/platform/URLParser.cpp 2016-09-30 18:44:16 UTC (rev 206647)
+++ trunk/Source/WebCore/platform/URLParser.cpp 2016-09-30 18:46:28 UTC (rev 206648)
@@ -494,6 +494,7 @@
void URLParser::percentEncodeByte(uint8_t byte)
{
+ ASSERT(m_didSeeSyntaxViolation);
appendToASCIIBuffer('%');
appendToASCIIBuffer(upperNibbleToASCIIHexDigit(byte));
appendToASCIIBuffer(lowerNibbleToASCIIHexDigit(byte));
@@ -562,15 +563,39 @@
appendToASCIIBuffer(byte);
}
}
-
-void URLParser::encodeQuery(const Vector<UChar>& source, const TextEncoding& encoding)
+
+template<typename CharacterType>
+void URLParser::encodeQuery(const Vector<UChar>& source, const TextEncoding& encoding, CodePointIterator<CharacterType> iterator)
{
// FIXME: It is unclear in the spec what to do when encoding fails. The behavior should be specified and tested.
CString encoded = encoding.encode(StringView(source.data(), source.size()), URLEncodedEntitiesForUnencodables);
const char* data = ""
size_t length = encoded.length();
- for (size_t i = 0; i < length; ++i) {
+
+ if (!length == !iterator.atEnd()) {
+ syntaxViolation(iterator);
+ return;
+ }
+
+ size_t i = 0;
+ for (; i < length; ++i) {
+ ASSERT(!iterator.atEnd());
uint8_t byte = data[i];
+ if (UNLIKELY(byte != *iterator)) {
+ syntaxViolation(iterator);
+ break;
+ }
+ if (UNLIKELY(shouldPercentEncodeQueryByte(byte))) {
+ syntaxViolation(iterator);
+ break;
+ }
+ appendToASCIIBuffer(byte);
+ ++iterator;
+ }
+ ASSERT((i == length) == iterator.atEnd());
+ for (; i < length; ++i) {
+ ASSERT(m_didSeeSyntaxViolation);
+ uint8_t byte = data[i];
if (shouldPercentEncodeQueryByte(byte))
percentEncodeByte(byte);
else
@@ -1103,6 +1128,7 @@
}
CodePointIterator<CharacterType> c(input, input + endIndex);
CodePointIterator<CharacterType> authorityOrHostBegin;
+ CodePointIterator<CharacterType> queryBegin;
while (UNLIKELY(!c.atEnd() && isC0ControlOrSpace(*c))) {
syntaxViolation(c);
++c;
@@ -1127,7 +1153,8 @@
PathStart,
Path,
CannotBeABaseURLPath,
- Query,
+ UTF8Query,
+ NonUTF8Query,
Fragment,
};
@@ -1288,8 +1315,13 @@
case '?':
copyURLPartsUntil(base, URLPart::PathEnd, c);
appendToASCIIBuffer('?');
- state = State::Query;
++c;
+ if (isUTF8Encoding)
+ state = State::UTF8Query;
+ else {
+ queryBegin = c;
+ state = State::NonUTF8Query;
+ }
break;
case '#':
copyURLPartsUntil(base, URLPart::QueryEnd, c);
@@ -1431,6 +1463,13 @@
if (base.isValid() && base.protocolIs("file"))
copyURLPartsUntil(base, URLPart::PathEnd, c);
appendToASCIIBuffer("///?", 4);
+ ++c;
+ if (isUTF8Encoding)
+ state = State::UTF8Query;
+ else {
+ queryBegin = c;
+ state = State::NonUTF8Query;
+ }
m_url.m_userStart = currentPosition(c) - 2;
m_url.m_userEnd = m_url.m_userStart;
m_url.m_passwordEnd = m_url.m_userStart;
@@ -1438,8 +1477,6 @@
m_url.m_portEnd = m_url.m_userStart;
m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
- state = State::Query;
- ++c;
break;
case '#':
syntaxViolation(c);
@@ -1533,9 +1570,14 @@
syntaxViolation(c);
appendToASCIIBuffer("/?", 2);
++c;
+ if (isUTF8Encoding)
+ state = State::UTF8Query;
+ else {
+ queryBegin = c;
+ state = State::NonUTF8Query;
+ }
m_url.m_pathAfterLastSlash = currentPosition(c) - 1;
m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
- state = State::Query;
break;
}
if (UNLIKELY(*c == '#')) {
@@ -1601,7 +1643,14 @@
}
if (*c == '?') {
m_url.m_pathEnd = currentPosition(c);
- state = State::Query;
+ appendToASCIIBuffer('?');
+ ++c;
+ if (isUTF8Encoding)
+ state = State::UTF8Query;
+ else {
+ queryBegin = c;
+ state = State::NonUTF8Query;
+ }
break;
}
if (*c == '#') {
@@ -1628,7 +1677,14 @@
LOG_STATE("CannotBeABaseURLPath");
if (*c == '?') {
m_url.m_pathEnd = currentPosition(c);
- state = State::Query;
+ appendToASCIIBuffer('?');
+ ++c;
+ if (isUTF8Encoding)
+ state = State::UTF8Query;
+ else {
+ queryBegin = c;
+ state = State::NonUTF8Query;
+ }
} else if (*c == '#') {
m_url.m_pathEnd = currentPosition(c);
m_url.m_queryEnd = m_url.m_pathEnd;
@@ -1642,11 +1698,10 @@
++c;
}
break;
- case State::Query:
- LOG_STATE("Query");
+ case State::UTF8Query:
+ LOG_STATE("UTF8Query");
+ ASSERT(queryBegin == CodePointIterator<CharacterType>());
if (*c == '#') {
- if (!isUTF8Encoding)
- encodeQuery(queryBuffer, encoding);
m_url.m_queryEnd = currentPosition(c);
state = State::Fragment;
break;
@@ -1657,6 +1712,20 @@
appendCodePoint(queryBuffer, *c);
++c;
break;
+ case State::NonUTF8Query:
+ do {
+ LOG_STATE("NonUTF8Query");
+ ASSERT(queryBegin != CodePointIterator<CharacterType>());
+ if (*c == '#') {
+ encodeQuery(queryBuffer, encoding, CodePointIterator<CharacterType>(queryBegin, c));
+ m_url.m_queryEnd = currentPosition(c);
+ state = State::Fragment;
+ break;
+ }
+ appendCodePoint(queryBuffer, *c);
+ advance(c, queryBegin);
+ } while (!c.atEnd());
+ break;
case State::Fragment:
do {
LOG(URLParser, "State Fragment");
@@ -1859,13 +1928,19 @@
m_url.m_queryEnd = m_url.m_pathEnd;
m_url.m_fragmentEnd = m_url.m_pathEnd;
break;
- case State::Query:
- LOG_FINAL_STATE("Query");
- if (!isUTF8Encoding)
- encodeQuery(queryBuffer, encoding);
+ case State::UTF8Query:
+ LOG_FINAL_STATE("UTF8Query");
+ ASSERT(queryBegin == CodePointIterator<CharacterType>());
m_url.m_queryEnd = currentPosition(c);
m_url.m_fragmentEnd = m_url.m_queryEnd;
break;
+ case State::NonUTF8Query:
+ LOG_FINAL_STATE("NonUTF8Query");
+ ASSERT(queryBegin != CodePointIterator<CharacterType>());
+ encodeQuery(queryBuffer, encoding, CodePointIterator<CharacterType>(queryBegin, c));
+ m_url.m_queryEnd = currentPosition(c);
+ m_url.m_fragmentEnd = m_url.m_queryEnd;
+ break;
case State::Fragment:
{
LOG_FINAL_STATE("Fragment");
Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp (206647 => 206648)
--- trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-09-30 18:44:16 UTC (rev 206647)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-09-30 18:46:28 UTC (rev 206648)
@@ -961,7 +961,7 @@
{"http", "", "", "w", 0, "/", "%ED%A0%80", "", "http://w/?%ED%A0%80"});
}
-static void checkURL(const String& urlString, const TextEncoding& encoding, const ExpectedParts& parts)
+static void checkURL(const String& urlString, const TextEncoding& encoding, const ExpectedParts& parts, bool checkTabs = true)
{
URLParser parser(urlString, { }, encoding);
auto url = ""
@@ -975,13 +975,32 @@
EXPECT_TRUE(eq(parts.fragment, url.fragmentIdentifier()));
EXPECT_TRUE(eq(parts.string, url.string()));
- // FIXME: check tabs here like we do for checkURL and checkRelativeURL.
+ if (checkTabs) {
+ for (size_t i = 0; i < urlString.length(); ++i) {
+ String urlStringWithTab = makeString(urlString.substring(0, i), "\t", urlString.substring(i));
+ ExpectedParts invalidPartsWithTab = {"", "", "", "", 0, "" , "", "", urlStringWithTab};
+ checkURL(urlStringWithTab, encoding, parts.isInvalid() ? invalidPartsWithTab : parts, false);
+ }
+ }
}
TEST_F(URLParserTest, QueryEncoding)
{
- checkURL(utf16String(u"http://host?ß😍#ß😍"), UTF8Encoding(), {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", utf16String(u"ß😍"), utf16String(u"http://host/?%C3%9F%F0%9F%98%8D#ß😍")});
- // FIXME: Add tests with other encodings.
+ checkURL(utf16String(u"http://host?ß😍#ß😍"), UTF8Encoding(), {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", utf16String(u"ß😍"), utf16String(u"http://host/?%C3%9F%F0%9F%98%8D#ß😍")}, false);
+ checkURL(utf16String(u"http://host?ß😍#ß😍"), UTF8Encoding(), {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", utf16String(u"ß😍"), utf16String(u"http://host/?%C3%9F%F0%9F%98%8D#ß😍")}, false);
+
+ TextEncoding latin1(String("latin1"));
+ checkURL("http://host/?query with%20spaces", latin1, {"http", "", "", "host", 0, "/", "query%20with%20spaces", "", "http://host/?query%20with%20spaces"});
+ checkURL("http://host/?query", latin1, {"http", "", "", "host", 0, "/", "query", "", "http://host/?query"});
+ checkURL("http://host/?\tquery", latin1, {"http", "", "", "host", 0, "/", "query", "", "http://host/?query"});
+ checkURL("http://host/?q\tuery", latin1, {"http", "", "", "host", 0, "/", "query", "", "http://host/?query"});
+ checkURL("http://host/?query with SpAcEs#fragment", latin1, {"http", "", "", "host", 0, "/", "query%20with%20SpAcEs", "fragment", "http://host/?query%20with%20SpAcEs#fragment"});
+
+ TextEncoding unrecognized(String("unrecognized invalid encoding name"));
+ checkURL("http://host/?query", unrecognized, {"http", "", "", "host", 0, "/", "", "", "http://host/?"});
+ checkURL("http://host/?", unrecognized, {"http", "", "", "host", 0, "/", "", "", "http://host/?"});
+
+ // FIXME: Add more tests with other encodings and things like non-ascii characters, emoji and unmatched surrogate pairs.
}
} // namespace TestWebKitAPI