Modified: trunk/Source/WebCore/platform/URLParser.cpp (206176 => 206177)
--- trunk/Source/WebCore/platform/URLParser.cpp 2016-09-20 21:43:26 UTC (rev 206176)
+++ trunk/Source/WebCore/platform/URLParser.cpp 2016-09-20 21:50:30 UTC (rev 206177)
@@ -30,10 +30,6 @@
#include <array>
#include <unicode/uidna.h>
#include <unicode/utypes.h>
-#include <wtf/HashMap.h>
-#include <wtf/NeverDestroyed.h>
-#include <wtf/text/StringBuilder.h>
-#include <wtf/text/StringHash.h>
namespace WebCore {
@@ -115,6 +111,17 @@
m_begin += i;
return *this;
}
+
+static void appendCodePoint(Vector<UChar>& destination, UChar32 codePoint)
+{
+ if (U_IS_BMP(codePoint)) {
+ destination.append(static_cast<UChar>(codePoint));
+ return;
+ }
+ destination.reserveCapacity(destination.size() + 2);
+ destination.uncheckedAppend(U16_LEAD(codePoint));
+ destination.uncheckedAppend(U16_TRAIL(codePoint));
+}
enum URLCharacterClass {
UserInfo = 0x1,
@@ -504,10 +511,10 @@
}
}
-inline static void encodeQuery(const StringBuilder& source, Vector<LChar>& destination, const TextEncoding& encoding)
+inline static void encodeQuery(const Vector<UChar>& source, Vector<LChar>& destination, const TextEncoding& encoding)
{
// FIXME: It is unclear in the spec what to do when encoding fails. The behavior should be specified and tested.
- CString encoded = encoding.encode(source.toStringPreserveCapacity(), URLEncodedEntitiesForUnencodables);
+ CString encoded = encoding.encode(StringView(source.data(), source.size()), URLEncodedEntitiesForUnencodables);
const char* data = ""
size_t length = encoded.length();
for (size_t i = 0; i < length; ++i) {
@@ -912,7 +919,7 @@
return parse<serialized>(input.characters8(), input.length(), { }, UTF8Encoding());
return parse<serialized>(input.characters16(), input.length(), { }, UTF8Encoding());
}
-
+
template<bool serialized, typename CharacterType>
URL URLParser::parse(const CharacterType* input, const unsigned length, const URL& base, const TextEncoding& encoding)
{
@@ -923,7 +930,7 @@
m_asciiBuffer.reserveCapacity(length);
bool isUTF8Encoding = encoding == UTF8Encoding();
- StringBuilder queryBuffer;
+ Vector<UChar> queryBuffer;
unsigned endIndex = length;
while (endIndex && isC0ControlOrSpace(input[endIndex - 1]))
@@ -1408,7 +1415,7 @@
if (isUTF8Encoding)
utf8PercentEncodeQuery<serialized>(*c, m_asciiBuffer);
else
- queryBuffer.append(*c);
+ appendCodePoint(queryBuffer, *c);
++c;
break;
case State::Fragment:
@@ -1416,7 +1423,7 @@
if (m_unicodeFragmentBuffer.isEmpty() && isASCII(*c))
m_asciiBuffer.append(*c);
else
- m_unicodeFragmentBuffer.append(*c);
+ appendCodePoint(m_unicodeFragmentBuffer, *c);
++c;
break;
}
@@ -1926,25 +1933,27 @@
return address;
}
-// FIXME: This should return a CString.
-inline static String percentDecode(const LChar* input, size_t length)
+const size_t defaultInlineBufferSize = 2048;
+
+inline static Vector<LChar, defaultInlineBufferSize> percentDecode(const LChar* input, size_t length)
{
- StringBuilder output;
+ Vector<LChar, defaultInlineBufferSize> output;
+ output.reserveInitialCapacity(length);
for (size_t i = 0; i < length; ++i) {
uint8_t byte = input[i];
if (byte != '%')
- output.append(byte);
+ output.uncheckedAppend(byte);
else if (i < length - 2) {
if (isASCIIHexDigit(input[i + 1]) && isASCIIHexDigit(input[i + 2])) {
- output.append(toASCIIHexValue(input[i + 1], input[i + 2]));
+ output.uncheckedAppend(toASCIIHexValue(input[i + 1], input[i + 2]));
i += 2;
} else
- output.append(byte);
+ output.uncheckedAppend(byte);
} else
- output.append(byte);
+ output.uncheckedAppend(byte);
}
- return output.toStringPreserveCapacity();
+ return output;
}
inline static bool containsOnlyASCII(const String& string)
@@ -1954,22 +1963,26 @@
return charactersAreAllASCII(string.characters16(), string.length());
}
-inline static Optional<String> domainToASCII(const String& domain)
+inline static Optional<Vector<LChar, defaultInlineBufferSize>> domainToASCII(const String& domain)
{
- const unsigned hostnameBufferLength = 2048;
-
+ Vector<LChar, defaultInlineBufferSize> ascii;
if (containsOnlyASCII(domain)) {
- if (domain.is8Bit())
- return domain.convertToASCIILowercase();
- Vector<LChar, hostnameBufferLength> buffer;
size_t length = domain.length();
- buffer.reserveInitialCapacity(length);
- for (size_t i = 0; i < length; ++i)
- buffer.append(toASCIILower(domain[i]));
- return String(buffer.data(), length);
+ if (domain.is8Bit()) {
+ const LChar* characters = domain.characters8();
+ ascii.reserveInitialCapacity(length);
+ for (size_t i = 0; i < length; ++i)
+ ascii.uncheckedAppend(toASCIILower(characters[i]));
+ } else {
+ const UChar* characters = domain.characters16();
+ ascii.reserveInitialCapacity(length);
+ for (size_t i = 0; i < length; ++i)
+ ascii.uncheckedAppend(toASCIILower(characters[i]));
+ }
+ return ascii;
}
- UChar hostnameBuffer[hostnameBufferLength];
+ UChar hostnameBuffer[defaultInlineBufferSize];
UErrorCode error = U_ZERO_ERROR;
#if COMPILER(GCC) || COMPILER(CLANG)
@@ -1977,18 +1990,19 @@
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
// FIXME: This should use uidna_openUTS46 / uidna_close instead
- int32_t numCharactersConverted = uidna_IDNToASCII(StringView(domain).upconvertedCharacters(), domain.length(), hostnameBuffer, hostnameBufferLength, UIDNA_ALLOW_UNASSIGNED, nullptr, &error);
+ int32_t numCharactersConverted = uidna_IDNToASCII(StringView(domain).upconvertedCharacters(), domain.length(), hostnameBuffer, defaultInlineBufferSize, UIDNA_ALLOW_UNASSIGNED, nullptr, &error);
#if COMPILER(GCC) || COMPILER(CLANG)
#pragma GCC diagnostic pop
#endif
+ ASSERT(numCharactersConverted <= static_cast<int32_t>(defaultInlineBufferSize));
if (error == U_ZERO_ERROR) {
- LChar buffer[hostnameBufferLength];
for (int32_t i = 0; i < numCharactersConverted; ++i) {
ASSERT(isASCII(hostnameBuffer[i]));
- buffer[i] = hostnameBuffer[i];
+ ASSERT(!isASCIIUpper(hostnameBuffer[i]));
}
- return String(buffer, numCharactersConverted);
+ ascii.append(hostnameBuffer, numCharactersConverted);
+ return ascii;
}
// FIXME: Check for U_BUFFER_OVERFLOW_ERROR and retry with an allocated buffer.
@@ -1995,12 +2009,10 @@
return Nullopt;
}
-inline static bool hasInvalidDomainCharacter(const String& asciiDomain)
+inline static bool hasInvalidDomainCharacter(const Vector<LChar, defaultInlineBufferSize>& asciiDomain)
{
- RELEASE_ASSERT(asciiDomain.is8Bit());
- const LChar* characters = asciiDomain.characters8();
- for (size_t i = 0; i < asciiDomain.length(); ++i) {
- if (isInvalidDomainCharacter(characters[i]))
+ for (size_t i = 0; i < asciiDomain.size(); ++i) {
+ if (isInvalidDomainCharacter(asciiDomain[i]))
return true;
}
return false;
@@ -2095,9 +2107,8 @@
m_url.m_portEnd = m_asciiBuffer.size();
return true;
}
-
- // FIXME: We probably don't need to make so many buffers and String copies.
- StringBuilder utf8Encoded;
+
+ Vector<LChar, defaultInlineBufferSize> utf8Encoded;
for (; !iterator.atEnd(); ++iterator) {
if (!serialized && isTabOrNewline(*iterator))
continue;
@@ -2111,18 +2122,15 @@
// FIXME: Check error.
utf8Encoded.append(buffer, offset);
}
- RELEASE_ASSERT(utf8Encoded.is8Bit());
- String percentDecoded = percentDecode(utf8Encoded.characters8(), utf8Encoded.length());
- RELEASE_ASSERT(percentDecoded.is8Bit());
- String domain = String::fromUTF8(percentDecoded.characters8(), percentDecoded.length());
+ Vector<LChar, defaultInlineBufferSize> percentDecoded = percentDecode(utf8Encoded.data(), utf8Encoded.size());
+ String domain = String::fromUTF8(percentDecoded.data(), percentDecoded.size());
auto asciiDomain = domainToASCII(domain);
if (!asciiDomain || hasInvalidDomainCharacter(asciiDomain.value()))
return false;
- String& asciiDomainValue = asciiDomain.value();
- RELEASE_ASSERT(asciiDomainValue.is8Bit());
- const LChar* asciiDomainCharacters = asciiDomainValue.characters8();
+ Vector<LChar, defaultInlineBufferSize>& asciiDomainValue = asciiDomain.value();
+ const LChar* asciiDomainCharacters = asciiDomainValue.data();
- if (auto address = parseIPv4Host(CodePointIterator<LChar>(asciiDomainCharacters, asciiDomainCharacters + asciiDomainValue.length()))) {
+ if (auto address = parseIPv4Host(CodePointIterator<LChar>(asciiDomainValue.begin(), asciiDomainValue.end()))) {
serializeIPv4(address.value(), m_asciiBuffer);
m_url.m_hostEnd = m_asciiBuffer.size();
if (iterator.atEnd()) {
@@ -2133,7 +2141,7 @@
return parsePort<serialized>(iterator);
}
- m_asciiBuffer.append(asciiDomainCharacters, asciiDomainValue.length());
+ m_asciiBuffer.append(asciiDomainCharacters, asciiDomainValue.size());
m_url.m_hostEnd = m_asciiBuffer.size();
if (!iterator.atEnd()) {
ASSERT(*iterator == ':');
@@ -2150,8 +2158,7 @@
if (utf8.isNull())
return Nullopt;
auto percentDecoded = percentDecode(reinterpret_cast<const LChar*>(utf8.data()), utf8.length());
- RELEASE_ASSERT(percentDecoded.is8Bit());
- return String::fromUTF8(percentDecoded.characters8(), percentDecoded.length());
+ return String::fromUTF8(percentDecoded.data(), percentDecoded.size());
}
auto URLParser::parseURLEncodedForm(StringView input) -> URLEncodedForm
Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp (206176 => 206177)
--- trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-09-20 21:43:26 UTC (rev 206176)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-09-20 21:50:30 UTC (rev 206177)
@@ -26,6 +26,7 @@
#include "config.h"
#include <WebCore/URLParser.h>
#include <wtf/MainThread.h>
+#include <wtf/text/StringBuilder.h>
using namespace WebCore;
@@ -558,6 +559,9 @@
checkRelativeURLDifferences("foo://", "http://example.org/foo/bar",
{"foo", "", "", "", 0, "/", "", "", "foo:///"},
{"foo", "", "", "", 0, "//", "", "", "foo://"});
+ checkURLDifferences(wideString(L"http://host?ß😍#ß😍"),
+ {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", wideString(L"ß😍"), wideString(L"http://host/?%C3%9F%F0%9F%98%8D#ß😍")},
+ {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", "%C3%9F%F0%9F%98%8D", "http://host/?%C3%9F%F0%9F%98%8D#%C3%9F%F0%9F%98%8D"});
// This matches the spec and web platform tests, but not Chrome, Firefox, or URL::parse.
checkRelativeURLDifferences("notspecial:/", "about:blank",
@@ -649,6 +653,10 @@
checkURLDifferences("unknown://host:81",
{"unknown", "", "", "host", 81, "/", "", "", "unknown://host:81/"},
{"unknown", "", "", "host", 81, "", "", "", "unknown://host:81"});
+ checkURLDifferences("http://%48OsT",
+ {"http", "", "", "host", 0, "/", "", "", "http://host/"},
+ {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});
+
}
static void shouldFail(const String& urlString)
@@ -713,4 +721,25 @@
checkRelativeURL("notspecial:", "http://example.org/foo/bar", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
}
+static void checkURL(const String& urlString, const TextEncoding& encoding, const ExpectedParts& parts)
+{
+ URLParser parser;
+ auto url = "" { }, encoding);
+ EXPECT_TRUE(eq(parts.protocol, url.protocol()));
+ EXPECT_TRUE(eq(parts.user, url.user()));
+ EXPECT_TRUE(eq(parts.password, url.pass()));
+ EXPECT_TRUE(eq(parts.host, url.host()));
+ EXPECT_EQ(parts.port, url.port());
+ EXPECT_TRUE(eq(parts.path, url.path()));
+ EXPECT_TRUE(eq(parts.query, url.query()));
+ EXPECT_TRUE(eq(parts.fragment, url.fragmentIdentifier()));
+ EXPECT_TRUE(eq(parts.string, url.string()));
+}
+
+TEST_F(URLParserTest, QueryEncoding)
+{
+ checkURL(wideString(L"http://host?ß😍#ß😍"), UTF8Encoding(), {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", wideString(L"ß😍"), wideString(L"http://host/?%C3%9F%F0%9F%98%8D#ß😍")});
+ // FIXME: Add tests with other encodings.
+}
+
} // namespace TestWebKitAPI