Modified: trunk/Source/WebCore/platform/URLParser.cpp (206456 => 206457)
--- trunk/Source/WebCore/platform/URLParser.cpp 2016-09-27 19:49:38 UTC (rev 206456)
+++ trunk/Source/WebCore/platform/URLParser.cpp 2016-09-27 20:07:15 UTC (rev 206457)
@@ -410,11 +410,11 @@
static bool shouldPercentEncodeQueryByte(uint8_t byte) { return characterClassTable[byte] & QueryPercent; }
template<typename CharacterType>
-void URLParser::incrementIteratorSkippingTabAndNewLine(CodePointIterator<CharacterType>& iterator)
+void URLParser::advance(CodePointIterator<CharacterType>& iterator, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition)
{
++iterator;
- while (!iterator.atEnd() && isTabOrNewline(*iterator)) {
- syntaxError(iterator);
+ while (UNLIKELY(!iterator.atEnd() && isTabOrNewline(*iterator))) {
+ syntaxViolation(iteratorForSyntaxViolationPosition);
++iterator;
}
}
@@ -424,13 +424,13 @@
{
if (iterator.atEnd() || !isASCIIAlpha(*iterator))
return false;
- incrementIteratorSkippingTabAndNewLine(iterator);
+ advance(iterator);
if (iterator.atEnd())
return false;
if (*iterator == ':')
return true;
- if (*iterator == '|') {
- syntaxError(iterator);
+ if (UNLIKELY(*iterator == '|')) {
+ syntaxViolation(iterator);
return true;
}
return false;
@@ -447,7 +447,7 @@
{
ASSERT(m_unicodeFragmentBuffer.isEmpty());
ASSERT(isASCII(codePoint));
- if (m_seenSyntaxError)
+ if (UNLIKELY(m_didSeeSyntaxViolation))
m_asciiBuffer.append(codePoint);
}
@@ -454,7 +454,7 @@
void URLParser::appendToASCIIBuffer(const char* characters, size_t length)
{
ASSERT(m_unicodeFragmentBuffer.isEmpty());
- if (m_seenSyntaxError)
+ if (UNLIKELY(m_didSeeSyntaxViolation))
m_asciiBuffer.append(characters, length);
}
@@ -463,11 +463,11 @@
{
if (isWindowsDriveLetter(iterator)) {
appendToASCIIBuffer(*iterator);
- incrementIteratorSkippingTabAndNewLine(iterator);
+ advance(iterator);
ASSERT(!iterator.atEnd());
ASSERT(*iterator == ':' || *iterator == '|');
appendToASCIIBuffer(':');
- incrementIteratorSkippingTabAndNewLine(iterator);
+ advance(iterator);
}
}
@@ -478,10 +478,10 @@
return true;
if (iterator.atEnd())
return false;
- incrementIteratorSkippingTabAndNewLine(iterator);
+ advance(iterator);
if (iterator.atEnd())
return true;
- incrementIteratorSkippingTabAndNewLine(iterator);
+ advance(iterator);
if (iterator.atEnd())
return true;
return !isSlashQuestionOrHash(*iterator);
@@ -504,17 +504,21 @@
const char replacementCharacterUTF8PercentEncoded[10] = "%EF%BF%BD";
const size_t replacementCharacterUTF8PercentEncodedLength = sizeof(replacementCharacterUTF8PercentEncoded) - 1;
-template<bool(*isInCodeSet)(UChar32)>
-void URLParser::utf8PercentEncode(UChar32 codePoint)
+template<bool(*isInCodeSet)(UChar32), typename CharacterType>
+void URLParser::utf8PercentEncode(const CodePointIterator<CharacterType>& iterator)
{
- if (isASCII(codePoint)) {
- if (isInCodeSet(codePoint))
+ ASSERT(!iterator.atEnd());
+ UChar32 codePoint = *iterator;
+ if (LIKELY(isASCII(codePoint))) {
+ if (UNLIKELY(isInCodeSet(codePoint))) {
+ syntaxViolation(iterator);
percentEncodeByte(codePoint);
- else
+ } else
appendToASCIIBuffer(codePoint);
return;
}
ASSERT_WITH_MESSAGE(isInCodeSet(codePoint), "isInCodeSet should always return true for non-ASCII characters");
+ syntaxViolation(iterator);
if (!U_IS_UNICODE_CHAR(codePoint)) {
appendToASCIIBuffer(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);
@@ -528,17 +532,22 @@
percentEncodeByte(buffer[i]);
}
-
-void URLParser::utf8QueryEncode(UChar32 codePoint)
+template<typename CharacterType>
+void URLParser::utf8QueryEncode(const CodePointIterator<CharacterType>& iterator)
{
- if (isASCII(codePoint)) {
- if (shouldPercentEncodeQueryByte(codePoint))
+ ASSERT(!iterator.atEnd());
+ UChar32 codePoint = *iterator;
+ if (LIKELY(isASCII(codePoint))) {
+ if (UNLIKELY(shouldPercentEncodeQueryByte(codePoint))) {
+ syntaxViolation(iterator);
percentEncodeByte(codePoint);
- else
+ } else
appendToASCIIBuffer(codePoint);
return;
}
+ syntaxViolation(iterator);
+
if (!U_IS_UNICODE_CHAR(codePoint)) {
appendToASCIIBuffer(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);
return;
@@ -748,8 +757,11 @@
}
}
-void URLParser::copyURLPartsUntil(const URL& base, URLPart part)
+template<typename CharacterType>
+void URLParser::copyURLPartsUntil(const URL& base, URLPart part, const CodePointIterator<CharacterType>& iterator)
{
+ syntaxViolation(iterator);
+
m_asciiBuffer.clear();
m_unicodeFragmentBuffer.clear();
if (part == URLPart::FragmentEnd) {
@@ -933,11 +945,47 @@
}
template<typename CharacterType>
-void URLParser::syntaxError(const CodePointIterator<CharacterType>&)
+void URLParser::syntaxViolation(const CodePointIterator<CharacterType>& iterator)
{
- // FIXME: Implement.
+ if (m_didSeeSyntaxViolation)
+ return;
+ m_didSeeSyntaxViolation = true;
+
+ ASSERT(m_asciiBuffer.isEmpty());
+ ASSERT(m_unicodeFragmentBuffer.isEmpty());
+ ASSERT_WITH_MESSAGE(!m_url.m_queryEnd, "syntaxViolation should not be used in the fragment, which might contain non-ASCII code points when serialized");
+ size_t codeUnitsToCopy = iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
+ RELEASE_ASSERT(codeUnitsToCopy <= m_inputString.length());
+ m_asciiBuffer.reserveCapacity(m_inputString.length());
+ for (size_t i = 0; i < codeUnitsToCopy; ++i) {
+ ASSERT(isASCII(m_inputString[i]));
+ m_asciiBuffer.uncheckedAppend(m_inputString[i]);
+ }
}
+template<typename CharacterType>
+void URLParser::fragmentSyntaxViolation(const CodePointIterator<CharacterType>& iterator)
+{
+ if (m_didSeeSyntaxViolation)
+ return;
+ m_didSeeSyntaxViolation = true;
+
+ ASSERT(m_asciiBuffer.isEmpty());
+ ASSERT(m_unicodeFragmentBuffer.isEmpty());
+ size_t codeUnitsToCopy = iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
+ size_t asciiCodeUnitsToCopy = m_url.m_queryEnd;
+ size_t unicodeCodeUnitsToCopy = codeUnitsToCopy - asciiCodeUnitsToCopy;
+ RELEASE_ASSERT(codeUnitsToCopy <= m_inputString.length());
+ m_asciiBuffer.reserveCapacity(asciiCodeUnitsToCopy);
+ for (size_t i = 0; i < asciiCodeUnitsToCopy; ++i) {
+ ASSERT(isASCII(m_inputString[i]));
+ m_asciiBuffer.uncheckedAppend(m_inputString[i]);
+ }
+ m_unicodeFragmentBuffer.reserveCapacity(m_inputString.length() - asciiCodeUnitsToCopy);
+ for (size_t i = asciiCodeUnitsToCopy; i < asciiCodeUnitsToCopy + unicodeCodeUnitsToCopy; ++i)
+ m_unicodeFragmentBuffer.uncheckedAppend(m_inputString[i]);
+}
+
void URLParser::failure()
{
m_url.invalidate();
@@ -944,11 +992,23 @@
m_url.m_string = m_inputString;
}
+StringView URLParser::parsedDataView(size_t start, size_t length)
+{
+ if (UNLIKELY(m_didSeeSyntaxViolation)) {
+ ASSERT(start + length <= m_asciiBuffer.size());
+ return StringView(m_asciiBuffer.data() + start, length);
+ }
+ ASSERT(start + length <= m_inputString.length());
+ return StringView(m_inputString).substring(start, length);
+}
+
template<typename CharacterType>
size_t URLParser::currentPosition(const CodePointIterator<CharacterType>& iterator)
{
- if (m_seenSyntaxError)
+ if (UNLIKELY(m_didSeeSyntaxViolation)) {
+ ASSERT(m_unicodeFragmentBuffer.isEmpty());
return m_asciiBuffer.size();
+ }
return iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
}
@@ -956,8 +1016,11 @@
URLParser::URLParser(const String& input, const URL& base, const TextEncoding& encoding)
: m_inputString(input)
{
- if (input.isNull())
+ if (input.isNull()) {
+ if (base.isValid() && !base.m_cannotBeABaseURL)
+ m_url = base;
return;
+ }
if (input.is8Bit()) {
m_inputBegin = input.characters8();
@@ -966,6 +1029,9 @@
m_inputBegin = input.characters16();
parse(input.characters16(), input.length(), base, encoding);
}
+ ASSERT(!m_url.m_isValid
+ || m_didSeeSyntaxViolation == (m_url.string() != input)
+ || (input.isEmpty() && m_url.m_string == base.m_string));
}
template<typename CharacterType>
@@ -975,18 +1041,21 @@
m_url = { };
ASSERT(m_asciiBuffer.isEmpty());
ASSERT(m_unicodeFragmentBuffer.isEmpty());
- m_asciiBuffer.reserveInitialCapacity(length);
bool isUTF8Encoding = encoding == UTF8Encoding();
Vector<UChar> queryBuffer;
unsigned endIndex = length;
- while (endIndex && isC0ControlOrSpace(input[endIndex - 1]))
+ while (UNLIKELY(endIndex && isC0ControlOrSpace(input[endIndex - 1]))) {
+ syntaxViolation(CodePointIterator<CharacterType>(input, input));
endIndex--;
+ }
CodePointIterator<CharacterType> c(input, input + endIndex);
CodePointIterator<CharacterType> authorityOrHostBegin;
- while (!c.atEnd() && isC0ControlOrSpace(*c))
+ while (UNLIKELY(!c.atEnd() && isC0ControlOrSpace(*c))) {
+ syntaxViolation(c);
++c;
+ }
auto beginAfterControlAndSpace = c;
enum class State : uint8_t {
@@ -1011,13 +1080,13 @@
Fragment,
};
-#define LOG_STATE(x) LOG(URLParser, "State %s, code point %c, asciiBuffer size %zu", x, *c, currentPosition(c))
+#define LOG_STATE(x) LOG(URLParser, "State %s, code point %c, parsed data <%s> size %zu", x, *c, parsedDataView(0, currentPosition(c)).utf8().data(), currentPosition(c))
#define LOG_FINAL_STATE(x) LOG(URLParser, "Final State: %s", x)
State state = State::SchemeStart;
while (!c.atEnd()) {
- if (isTabOrNewline(*c)) {
- syntaxError(c);
+ if (UNLIKELY(isTabOrNewline(*c))) {
+ syntaxViolation(c);
++c;
continue;
}
@@ -1026,8 +1095,10 @@
case State::SchemeStart:
LOG_STATE("SchemeStart");
if (isASCIIAlpha(*c)) {
+ if (UNLIKELY(isASCIIUpper(*c)))
+ syntaxViolation(c);
appendToASCIIBuffer(toASCIILower(*c));
- incrementIteratorSkippingTabAndNewLine(c);
+ advance(c);
if (c.atEnd()) {
m_asciiBuffer.clear();
state = State::NoScheme;
@@ -1039,36 +1110,40 @@
break;
case State::Scheme:
LOG_STATE("Scheme");
- if (isValidSchemeCharacter(*c))
+ if (isValidSchemeCharacter(*c)) {
+ if (UNLIKELY(isASCIIUpper(*c)))
+ syntaxViolation(c);
appendToASCIIBuffer(toASCIILower(*c));
- else if (*c == ':') {
+ } else if (*c == ':') {
m_url.m_schemeEnd = currentPosition(c);
- StringView urlScheme = StringView(m_asciiBuffer.data(), m_url.m_schemeEnd);
+ StringView urlScheme = parsedDataView(0, m_url.m_schemeEnd);
m_url.m_protocolIsInHTTPFamily = urlScheme == "http" || urlScheme == "https";
+ appendToASCIIBuffer(':');
if (urlScheme == "file") {
m_urlIsSpecial = true;
state = State::File;
- appendToASCIIBuffer(':');
++c;
break;
}
- appendToASCIIBuffer(':');
if (isSpecialScheme(urlScheme)) {
m_urlIsSpecial = true;
- if (base.protocolIs(m_asciiBuffer.data(), currentPosition(c) - 1))
+ if (base.protocolIs(urlScheme))
state = State::SpecialRelativeOrAuthority;
else
state = State::SpecialAuthoritySlashes;
+ ++c;
} else {
auto maybeSlash = c;
- incrementIteratorSkippingTabAndNewLine(maybeSlash);
+ advance(maybeSlash);
if (!maybeSlash.atEnd() && *maybeSlash == '/') {
appendToASCIIBuffer('/');
- m_url.m_userStart = currentPosition(c);
+ c = maybeSlash;
state = State::PathOrAuthority;
- c = maybeSlash;
ASSERT(*c == '/');
+ ++c;
+ m_url.m_userStart = currentPosition(c);
} else {
+ ++c;
m_url.m_userStart = currentPosition(c);
m_url.m_userEnd = m_url.m_userStart;
m_url.m_passwordEnd = m_url.m_userStart;
@@ -1079,7 +1154,6 @@
state = State::CannotBeABaseURLPath;
}
}
- ++c;
break;
} else {
m_asciiBuffer.clear();
@@ -1087,7 +1161,7 @@
c = beginAfterControlAndSpace;
break;
}
- incrementIteratorSkippingTabAndNewLine(c);
+ advance(c);
if (c.atEnd()) {
m_asciiBuffer.clear();
state = State::NoScheme;
@@ -1101,7 +1175,7 @@
return;
}
if (base.m_cannotBeABaseURL && *c == '#') {
- copyURLPartsUntil(base, URLPart::QueryEnd);
+ copyURLPartsUntil(base, URLPart::QueryEnd, c);
state = State::Fragment;
appendToASCIIBuffer('#');
++c;
@@ -1111,7 +1185,7 @@
state = State::Relative;
break;
}
- copyURLPartsUntil(base, URLPart::SchemeEnd);
+ copyURLPartsUntil(base, URLPart::SchemeEnd, c);
appendToASCIIBuffer(':');
state = State::File;
break;
@@ -1119,7 +1193,7 @@
LOG_STATE("SpecialRelativeOrAuthority");
if (*c == '/') {
appendToASCIIBuffer('/');
- incrementIteratorSkippingTabAndNewLine(c);
+ advance(c);
if (c.atEnd()) {
failure();
return;
@@ -1137,12 +1211,12 @@
LOG_STATE("PathOrAuthority");
if (*c == '/') {
appendToASCIIBuffer('/');
- m_url.m_userStart = currentPosition(c);
state = State::AuthorityOrHost;
++c;
+ m_url.m_userStart = currentPosition(c);
authorityOrHostBegin = c;
} else {
- ASSERT(m_asciiBuffer.last() == '/');
+ ASSERT(parsedDataView(currentPosition(c) - 1, 1) == "/");
m_url.m_userStart = currentPosition(c) - 1;
m_url.m_userEnd = m_url.m_userStart;
m_url.m_passwordEnd = m_url.m_userStart;
@@ -1161,19 +1235,19 @@
++c;
break;
case '?':
- copyURLPartsUntil(base, URLPart::PathEnd);
+ copyURLPartsUntil(base, URLPart::PathEnd, c);
appendToASCIIBuffer('?');
state = State::Query;
++c;
break;
case '#':
- copyURLPartsUntil(base, URLPart::QueryEnd);
+ copyURLPartsUntil(base, URLPart::QueryEnd, c);
appendToASCIIBuffer('#');
state = State::Fragment;
++c;
break;
default:
- copyURLPartsUntil(base, URLPart::PathAfterLastSlash);
+ copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c);
state = State::Path;
break;
}
@@ -1182,11 +1256,11 @@
LOG_STATE("RelativeSlash");
if (*c == '/' || *c == '\\') {
++c;
- copyURLPartsUntil(base, URLPart::SchemeEnd);
+ copyURLPartsUntil(base, URLPart::SchemeEnd, c);
appendToASCIIBuffer("://", 3);
state = State::SpecialAuthorityIgnoreSlashes;
} else {
- copyURLPartsUntil(base, URLPart::PortEnd);
+ copyURLPartsUntil(base, URLPart::PortEnd, c);
appendToASCIIBuffer('/');
m_url.m_pathAfterLastSlash = base.m_portEnd + 1;
state = State::Path;
@@ -1194,11 +1268,23 @@
break;
case State::SpecialAuthoritySlashes:
LOG_STATE("SpecialAuthoritySlashes");
- appendToASCIIBuffer("//", 2);
- if (*c == '/' || *c == '\\') {
- incrementIteratorSkippingTabAndNewLine(c);
- if (!c.atEnd() && (*c == '/' || *c == '\\'))
+ if (LIKELY(*c == '/' || *c == '\\')) {
+ if (UNLIKELY(*c == '\\'))
+ syntaxViolation(c);
+ appendToASCIIBuffer('/');
+ advance(c);
+ if (LIKELY(!c.atEnd() && (*c == '/' || *c == '\\'))) {
+ if (UNLIKELY(*c == '\\'))
+ syntaxViolation(c);
++c;
+ appendToASCIIBuffer('/');
+ } else {
+ syntaxViolation(c);
+ appendToASCIIBuffer('/');
+ }
+ } else {
+ syntaxViolation(c);
+ appendToASCIIBuffer("//", 2);
}
state = State::SpecialAuthorityIgnoreSlashes;
break;
@@ -1213,8 +1299,8 @@
authorityOrHostBegin = c;
break;
case State::AuthorityOrHost:
- LOG_STATE("AuthorityOrHost");
- {
+ do {
+ LOG_STATE("AuthorityOrHost");
if (*c == '@') {
auto lastAt = c;
auto findLastAt = c;
@@ -1225,7 +1311,7 @@
}
parseAuthority(CodePointIterator<CharacterType>(authorityOrHostBegin, lastAt));
c = lastAt;
- incrementIteratorSkippingTabAndNewLine(c);
+ advance(c);
authorityOrHostBegin = c;
state = State::Host;
m_hostHasPercentOrNonASCII = false;
@@ -1233,13 +1319,14 @@
}
bool isSlash = *c == '/' || (m_urlIsSpecial && *c == '\\');
if (isSlash || *c == '?' || *c == '#') {
- m_url.m_userEnd = currentPosition(c);
+ m_url.m_userEnd = currentPosition(authorityOrHostBegin);
m_url.m_passwordEnd = m_url.m_userEnd;
if (!parseHostAndPort(CodePointIterator<CharacterType>(authorityOrHostBegin, c))) {
failure();
return;
}
- if (!isSlash) {
+ if (UNLIKELY(!isSlash)) {
+ syntaxViolation(c);
appendToASCIIBuffer('/');
m_url.m_pathAfterLastSlash = currentPosition(c);
}
@@ -1249,7 +1336,7 @@
if (isPercentOrNonASCII(*c))
m_hostHasPercentOrNonASCII = true;
++c;
- }
+ } while (!c.atEnd());
break;
case State::Host:
LOG_STATE("Host");
@@ -1268,15 +1355,18 @@
case State::File:
LOG_STATE("File");
switch (*c) {
+ case '\\':
+ syntaxViolation(c);
+ FALLTHROUGH;
case '/':
- case '\\':
appendToASCIIBuffer('/');
state = State::FileSlash;
++c;
break;
case '?':
+ syntaxViolation(c);
if (base.isValid() && base.protocolIs("file"))
- copyURLPartsUntil(base, URLPart::PathEnd);
+ copyURLPartsUntil(base, URLPart::PathEnd, c);
appendToASCIIBuffer("///?", 4);
m_url.m_userStart = currentPosition(c) - 2;
m_url.m_userEnd = m_url.m_userStart;
@@ -1289,8 +1379,9 @@
++c;
break;
case '#':
+ syntaxViolation(c);
if (base.isValid() && base.protocolIs("file"))
- copyURLPartsUntil(base, URLPart::QueryEnd);
+ copyURLPartsUntil(base, URLPart::QueryEnd, c);
appendToASCIIBuffer("///#", 4);
m_url.m_userStart = currentPosition(c) - 2;
m_url.m_userEnd = m_url.m_userStart;
@@ -1304,8 +1395,9 @@
++c;
break;
default:
+ syntaxViolation(c);
if (base.isValid() && base.protocolIs("file") && shouldCopyFileURL(c))
- copyURLPartsUntil(base, URLPart::PathAfterLastSlash);
+ copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c);
else {
appendToASCIIBuffer("///", 3);
m_url.m_userStart = currentPosition(c) - 1;
@@ -1322,7 +1414,9 @@
break;
case State::FileSlash:
LOG_STATE("FileSlash");
- if (*c == '/' || *c == '\\') {
+ if (LIKELY(*c == '/' || *c == '\\')) {
+ if (UNLIKELY(*c == '\\'))
+ syntaxViolation(c);
++c;
appendToASCIIBuffer('/');
m_url.m_userStart = currentPosition(c);
@@ -1347,6 +1441,7 @@
}
}
}
+ syntaxViolation(c);
appendToASCIIBuffer("//", 2);
m_url.m_userStart = currentPosition(c) - 1;
m_url.m_userEnd = m_url.m_userStart;
@@ -1365,22 +1460,24 @@
break;
}
if (authorityOrHostBegin == c) {
- ASSERT(m_asciiBuffer[currentPosition(c) - 1] == '/');
- if (*c == '?') {
+ ASSERT(parsedDataView(currentPosition(c) - 1, 1) == "/");
+ if (UNLIKELY(*c == '?')) {
+ syntaxViolation(c);
appendToASCIIBuffer("/?", 2);
+ ++c;
m_url.m_pathAfterLastSlash = currentPosition(c) - 1;
m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
state = State::Query;
- ++c;
break;
}
- if (*c == '#') {
+ if (UNLIKELY(*c == '#')) {
+ syntaxViolation(c);
appendToASCIIBuffer("/#", 2);
+ ++c;
m_url.m_pathAfterLastSlash = currentPosition(c) - 1;
m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
state = State::Fragment;
- ++c;
break;
}
state = State::Path;
@@ -1390,8 +1487,8 @@
failure();
return;
}
-
- if (StringView(m_asciiBuffer.data() + m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd) == "localhost") {
+ if (UNLIKELY(equalLettersIgnoringASCIICase(parsedDataView(m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd), "localhost"))) {
+ syntaxViolation(c);
m_asciiBuffer.shrink(m_url.m_passwordEnd);
m_url.m_hostEnd = currentPosition(c);
m_url.m_portEnd = m_url.m_hostEnd;
@@ -1413,18 +1510,22 @@
case State::Path:
LOG_STATE("Path");
if (*c == '/' || (m_urlIsSpecial && *c == '\\')) {
+ if (UNLIKELY(m_urlIsSpecial && *c == '\\'))
+ syntaxViolation(c);
appendToASCIIBuffer('/');
+ ++c;
m_url.m_pathAfterLastSlash = currentPosition(c);
- ++c;
break;
}
- if (currentPosition(c) && m_asciiBuffer[currentPosition(c) - 1] == '/') {
- if (isDoubleDotPathSegment(c)) {
+ if (UNLIKELY(currentPosition(c) && parsedDataView(currentPosition(c) - 1, 1) == "/")) {
+ if (UNLIKELY(isDoubleDotPathSegment(c))) {
+ syntaxViolation(c);
consumeDoubleDotPathSegment(c);
popPath();
break;
}
- if (m_asciiBuffer[currentPosition(c) - 1] == '/' && isSingleDotPathSegment(c)) {
+ if (UNLIKELY(isSingleDotPathSegment(c))) {
+ syntaxViolation(c);
consumeSingleDotPathSegment(c);
break;
}
@@ -1441,6 +1542,8 @@
break;
}
if (isPercentEncodedDot(c)) {
+ if (UNLIKELY(*c != '.'))
+ syntaxViolation(c);
appendToASCIIBuffer('.');
ASSERT(*c == '%');
++c;
@@ -1450,7 +1553,7 @@
++c;
break;
}
- utf8PercentEncode<isInDefaultEncodeSet>(*c);
+ utf8PercentEncode<isInDefaultEncodeSet>(c);
++c;
break;
case State::CannotBeABaseURLPath:
@@ -1464,10 +1567,10 @@
state = State::Fragment;
} else if (*c == '/') {
appendToASCIIBuffer('/');
+ ++c;
m_url.m_pathAfterLastSlash = currentPosition(c);
- ++c;
} else {
- utf8PercentEncode<isInSimpleEncodeSet>(*c);
+ utf8PercentEncode<isInSimpleEncodeSet>(c);
++c;
}
break;
@@ -1481,18 +1584,31 @@
break;
}
if (isUTF8Encoding)
- utf8QueryEncode(*c);
+ utf8QueryEncode(c);
else
appendCodePoint(queryBuffer, *c);
++c;
break;
case State::Fragment:
- LOG_STATE("Fragment");
- if (m_unicodeFragmentBuffer.isEmpty() && isASCII(*c))
- appendToASCIIBuffer(*c);
- else
- appendCodePoint(m_unicodeFragmentBuffer, *c);
- ++c;
+ do {
+ LOG(URLParser, "State Fragment");
+ if (!m_didSeeUnicodeFragmentCodePoint && isASCII(*c))
+ appendToASCIIBuffer(*c);
+ else {
+ m_didSeeUnicodeFragmentCodePoint = true;
+ if (UNLIKELY(m_didSeeSyntaxViolation))
+ appendCodePoint(m_unicodeFragmentBuffer, *c);
+ else {
+ ASSERT(m_asciiBuffer.isEmpty());
+ ASSERT(m_unicodeFragmentBuffer.isEmpty());
+ }
+ }
+ ++c;
+ while (UNLIKELY(!c.atEnd() && isTabOrNewline(*c))) {
+ fragmentSyntaxViolation(c);
+ ++c;
+ }
+ } while (!c.atEnd());
break;
}
}
@@ -1500,7 +1616,7 @@
switch (state) {
case State::SchemeStart:
LOG_FINAL_STATE("SchemeStart");
- if (!currentPosition(c) && base.isValid()) {
+ if (!currentPosition(c) && base.isValid() && !base.m_cannotBeABaseURL) {
m_url = base;
return;
}
@@ -1515,7 +1631,7 @@
RELEASE_ASSERT_NOT_REACHED();
case State::SpecialRelativeOrAuthority:
LOG_FINAL_STATE("SpecialRelativeOrAuthority");
- copyURLPartsUntil(base, URLPart::QueryEnd);
+ copyURLPartsUntil(base, URLPart::QueryEnd, c);
m_url.m_fragmentEnd = m_url.m_queryEnd;
break;
case State::PathOrAuthority:
@@ -1522,7 +1638,7 @@
LOG_FINAL_STATE("PathOrAuthority");
ASSERT(m_url.m_userStart);
ASSERT(m_url.m_userStart == currentPosition(c));
- ASSERT(m_asciiBuffer.last() == '/');
+ ASSERT(parsedDataView(currentPosition(c) - 1, 1) == "/");
m_url.m_userStart--;
m_url.m_userEnd = m_url.m_userStart;
m_url.m_passwordEnd = m_url.m_userStart;
@@ -1535,11 +1651,11 @@
break;
case State::Relative:
LOG_FINAL_STATE("Relative");
- copyURLPartsUntil(base, URLPart::FragmentEnd);
+ copyURLPartsUntil(base, URLPart::FragmentEnd, c);
break;
case State::RelativeSlash:
LOG_FINAL_STATE("RelativeSlash");
- copyURLPartsUntil(base, URLPart::PortEnd);
+ copyURLPartsUntil(base, URLPart::PortEnd, c);
appendToASCIIBuffer('/');
m_url.m_pathAfterLastSlash = base.m_portEnd + 1;
m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
@@ -1565,7 +1681,7 @@
break;
case State::AuthorityOrHost:
LOG_FINAL_STATE("AuthorityOrHost");
- m_url.m_userEnd = currentPosition(c);
+ m_url.m_userEnd = currentPosition(authorityOrHostBegin);
m_url.m_passwordEnd = m_url.m_userEnd;
if (authorityOrHostBegin.atEnd()) {
m_url.m_hostEnd = m_url.m_userEnd;
@@ -1574,6 +1690,7 @@
failure();
return;
}
+ syntaxViolation(c);
appendToASCIIBuffer('/');
m_url.m_pathEnd = m_url.m_portEnd + 1;
m_url.m_pathAfterLastSlash = m_url.m_pathEnd;
@@ -1586,6 +1703,7 @@
failure();
return;
}
+ syntaxViolation(c);
appendToASCIIBuffer('/');
m_url.m_pathEnd = m_url.m_portEnd + 1;
m_url.m_pathAfterLastSlash = m_url.m_pathEnd;
@@ -1595,9 +1713,10 @@
case State::File:
LOG_FINAL_STATE("File");
if (base.isValid() && base.protocolIs("file")) {
- copyURLPartsUntil(base, URLPart::QueryEnd);
+ copyURLPartsUntil(base, URLPart::QueryEnd, c);
appendToASCIIBuffer(':');
}
+ syntaxViolation(c);
appendToASCIIBuffer("///", 3);
m_url.m_userStart = currentPosition(c) - 1;
m_url.m_userEnd = m_url.m_userStart;
@@ -1611,8 +1730,9 @@
break;
case State::FileSlash:
LOG_FINAL_STATE("FileSlash");
+ syntaxViolation(c);
+ m_url.m_userStart = currentPosition(c) + 1;
appendToASCIIBuffer("//", 2);
- m_url.m_userStart = currentPosition(c) - 1;
m_url.m_userEnd = m_url.m_userStart;
m_url.m_passwordEnd = m_url.m_userStart;
m_url.m_hostEnd = m_url.m_userStart;
@@ -1625,6 +1745,7 @@
case State::FileHost:
LOG_FINAL_STATE("FileHost");
if (authorityOrHostBegin == c) {
+ syntaxViolation(c);
appendToASCIIBuffer('/');
m_url.m_userStart = currentPosition(c) - 1;
m_url.m_userEnd = m_url.m_userStart;
@@ -1643,7 +1764,8 @@
return;
}
- if (StringView(m_asciiBuffer.data() + m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd) == "localhost") {
+ syntaxViolation(c);
+ if (equalLettersIgnoringASCIICase(parsedDataView(m_url.m_passwordEnd, currentPosition(c) - m_url.m_passwordEnd), "localhost")) {
m_asciiBuffer.shrink(m_url.m_passwordEnd);
m_url.m_hostEnd = currentPosition(c);
m_url.m_portEnd = m_url.m_hostEnd;
@@ -1677,20 +1799,24 @@
m_url.m_fragmentEnd = m_url.m_queryEnd;
break;
case State::Fragment:
- LOG_FINAL_STATE("Fragment");
- m_url.m_fragmentEnd = currentPosition(c) + m_unicodeFragmentBuffer.size();
- break;
+ {
+ LOG_FINAL_STATE("Fragment");
+ size_t length = m_didSeeSyntaxViolation ? m_asciiBuffer.size() + m_unicodeFragmentBuffer.size() : c.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
+ m_url.m_fragmentEnd = length;
+ break;
+ }
}
- if (!m_seenSyntaxError) {
+ if (LIKELY(!m_didSeeSyntaxViolation)) {
m_url.m_string = m_inputString;
ASSERT(m_asciiBuffer.isEmpty());
ASSERT(m_unicodeFragmentBuffer.isEmpty());
- } else if (m_unicodeFragmentBuffer.isEmpty())
+ } else if (!m_didSeeUnicodeFragmentCodePoint) {
+ ASSERT(m_unicodeFragmentBuffer.isEmpty());
m_url.m_string = String::adopt(WTFMove(m_asciiBuffer));
- else {
+ } else {
Vector<UChar> buffer;
- buffer.reserveInitialCapacity(currentPosition(c) + m_unicodeFragmentBuffer.size());
+ buffer.reserveInitialCapacity(m_asciiBuffer.size() + m_unicodeFragmentBuffer.size());
buffer.appendVector(m_asciiBuffer);
buffer.appendVector(m_unicodeFragmentBuffer);
m_url.m_string = String::adopt(WTFMove(buffer));
@@ -1703,16 +1829,20 @@
template<typename CharacterType>
void URLParser::parseAuthority(CodePointIterator<CharacterType> iterator)
{
- if (iterator.atEnd()) {
+ if (UNLIKELY(iterator.atEnd())) {
+ syntaxViolation(iterator);
m_url.m_userEnd = currentPosition(iterator);
m_url.m_passwordEnd = m_url.m_userEnd;
return;
}
- for (; !iterator.atEnd(); ++iterator) {
+ auto authorityOrHostBegin = iterator;
+ for (; !iterator.atEnd(); advance(iterator)) {
if (*iterator == ':') {
- ++iterator;
m_url.m_userEnd = currentPosition(iterator);
- if (iterator.atEnd()) {
+ auto iteratorAtColon = iterator;
+ advance(iterator, authorityOrHostBegin);
+ if (UNLIKELY(iterator.atEnd())) {
+ syntaxViolation(iteratorAtColon);
m_url.m_passwordEnd = m_url.m_userEnd;
if (m_url.m_userEnd > m_url.m_userStart)
appendToASCIIBuffer('@');
@@ -1721,10 +1851,10 @@
appendToASCIIBuffer(':');
break;
}
- utf8PercentEncode<isInUserInfoEncodeSet>(*iterator);
+ utf8PercentEncode<isInUserInfoEncodeSet>(iterator);
}
- for (; !iterator.atEnd(); ++iterator)
- utf8PercentEncode<isInUserInfoEncodeSet>(*iterator);
+ for (; !iterator.atEnd(); advance(iterator))
+ utf8PercentEncode<isInUserInfoEncodeSet>(iterator);
m_url.m_passwordEnd = currentPosition(iterator);
if (!m_url.m_userEnd)
m_url.m_userEnd = m_url.m_passwordEnd;
@@ -1824,7 +1954,7 @@
}
template<typename CharacterType>
-inline static Optional<uint32_t> parseIPv4Number(CodePointIterator<CharacterType>& iterator)
+Optional<uint32_t> URLParser::parseIPv4Number(CodePointIterator<CharacterType>& iterator, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition)
{
// FIXME: Check for overflow.
enum class State : uint8_t {
@@ -1843,7 +1973,7 @@
}
switch (state) {
case State::UnknownBase:
- if (*iterator == '0') {
+ if (UNLIKELY(*iterator == '0')) {
++iterator;
state = State::OctalOrHex;
break;
@@ -1851,6 +1981,7 @@
state = State::Decimal;
break;
case State::OctalOrHex:
+ syntaxViolation(iteratorForSyntaxViolationPosition);
if (*iterator == 'x' || *iterator == 'X') {
++iterator;
state = State::Hex;
@@ -1866,6 +1997,7 @@
++iterator;
break;
case State::Octal:
+ ASSERT(m_didSeeSyntaxViolation);
if (*iterator < '0' || *iterator > '7')
return Nullopt;
value *= 8;
@@ -1873,6 +2005,7 @@
++iterator;
break;
case State::Hex:
+ ASSERT(m_didSeeSyntaxViolation);
if (!isASCIIHexDigit(*iterator))
return Nullopt;
value *= 16;
@@ -1894,12 +2027,14 @@
template<typename CharacterType>
Optional<URLParser::IPv4Address> URLParser::parseIPv4Host(CodePointIterator<CharacterType> iterator)
{
+ auto hostBegin = iterator;
+
Vector<uint32_t, 4> items;
items.reserveInitialCapacity(4);
while (!iterator.atEnd()) {
if (items.size() >= 4)
return Nullopt;
- if (auto item = parseIPv4Number(iterator))
+ if (auto item = parseIPv4Number(iterator, hostBegin))
items.append(item.value());
else
return Nullopt;
@@ -1918,6 +2053,10 @@
if (item > 255)
return Nullopt;
}
+
+ if (UNLIKELY(items.size() != 4))
+ syntaxViolation(hostBegin);
+
IPv4Address ipv4 = items.takeLast();
for (size_t counter = 0; counter < items.size(); ++counter)
ipv4 += items[counter] * pow256(3 - counter);
@@ -1927,6 +2066,9 @@
template<typename CharacterType>
Optional<URLParser::IPv6Address> URLParser::parseIPv6Host(CodePointIterator<CharacterType> c)
{
+ ASSERT(*c == '[');
+ auto hostBegin = c;
+ advance(c, hostBegin);
if (c.atEnd())
return Nullopt;
@@ -1935,12 +2077,12 @@
Optional<size_t> compressPointer;
if (*c == ':') {
- ++c;
+ advance(c, hostBegin);
if (c.atEnd())
return Nullopt;
if (*c != ':')
return Nullopt;
- ++c;
+ advance(c, hostBegin);
++piecePointer;
compressPointer = piecePointer;
}
@@ -1951,26 +2093,30 @@
if (*c == ':') {
if (compressPointer)
return Nullopt;
- ++c;
+ advance(c, hostBegin);
++piecePointer;
compressPointer = piecePointer;
continue;
}
uint16_t value = 0;
- for (size_t length = 0; length < 4; length++) {
+ size_t length = 0;
+ for (; length < 4; length++) {
if (c.atEnd())
break;
if (!isASCIIHexDigit(*c))
break;
value = value * 0x10 + toASCIIHexValue(*c);
- ++c;
+ advance(c, hostBegin);
}
+ if (UNLIKELY(length > 1 && !value))
+ syntaxViolation(hostBegin);
+
address[piecePointer++] = value;
if (c.atEnd())
break;
if (*c != ':')
return Nullopt;
- ++c;
+ advance(c, hostBegin);
}
if (!c.atEnd()) {
@@ -1989,7 +2135,7 @@
return Nullopt;
else
value = value.value() * 10 + number;
- ++c;
+ advance(c, hostBegin);
if (c.atEnd())
return Nullopt;
if (value.value() > 255)
@@ -2001,7 +2147,7 @@
if (dotsSeen == 1 || dotsSeen == 3)
piecePointer++;
if (!c.atEnd())
- ++c;
+ advance(c, hostBegin);
if (dotsSeen == 3 && !c.atEnd())
return Nullopt;
dotsSeen++;
@@ -2014,6 +2160,13 @@
std::swap(address[piecePointer--], address[compressPointer.value() + swaps-- - 1]);
} else if (piecePointer != 8)
return Nullopt;
+
+ Optional<size_t> possibleCompressPointer = findLongestZeroSequence(address);
+ if (possibleCompressPointer)
+ possibleCompressPointer.value()++;
+ if (UNLIKELY(compressPointer != possibleCompressPointer))
+ syntaxViolation(hostBegin);
+
return address;
}
@@ -2105,15 +2258,20 @@
template<typename CharacterType>
bool URLParser::parsePort(CodePointIterator<CharacterType>& iterator)
{
+ ASSERT(*iterator == ':');
+ auto colonIterator = iterator;
+ advance(iterator, colonIterator);
uint32_t port = 0;
- if (iterator.atEnd()) {
- m_url.m_portEnd = currentPosition(iterator);
+ if (UNLIKELY(iterator.atEnd())) {
+ m_url.m_portEnd = currentPosition(colonIterator);
+ syntaxViolation(colonIterator);
return true;
}
- appendToASCIIBuffer(':');
for (; !iterator.atEnd(); ++iterator) {
- if (isTabOrNewline(*iterator))
+ if (UNLIKELY(isTabOrNewline(*iterator))) {
+ syntaxViolation(colonIterator);
continue;
+ }
if (isASCIIDigit(*iterator)) {
port = port * 10 + *iterator - '0';
if (port > std::numeric_limits<uint16_t>::max())
@@ -2122,10 +2280,10 @@
return false;
}
- if (isDefaultPort(StringView(m_asciiBuffer.data(), m_url.m_schemeEnd), port)) {
- ASSERT(m_asciiBuffer.last() == ':');
- m_asciiBuffer.shrink(currentPosition(iterator) - 1);
- } else {
+ if (UNLIKELY(isDefaultPort(parsedDataView(0, m_url.m_schemeEnd), port)))
+ syntaxViolation(colonIterator);
+ else {
+ appendToASCIIBuffer(':');
ASSERT(port <= std::numeric_limits<uint16_t>::max());
appendNumberToASCIIBuffer<uint16_t>(static_cast<uint16_t>(port));
}
@@ -2140,27 +2298,27 @@
if (iterator.atEnd())
return false;
if (*iterator == '[') {
- ++iterator;
auto ipv6End = iterator;
while (!ipv6End.atEnd() && *ipv6End != ']')
++ipv6End;
if (auto address = parseIPv6Host(CodePointIterator<CharacterType>(iterator, ipv6End))) {
serializeIPv6(address.value());
- m_url.m_hostEnd = currentPosition(iterator);
if (!ipv6End.atEnd()) {
- ++ipv6End;
+ advance(ipv6End);
if (!ipv6End.atEnd() && *ipv6End == ':') {
- ++ipv6End;
+ m_url.m_hostEnd = currentPosition(ipv6End);
return parsePort(ipv6End);
}
- m_url.m_portEnd = currentPosition(iterator);
+ m_url.m_hostEnd = currentPosition(ipv6End);
+ m_url.m_portEnd = m_url.m_hostEnd;
return true;
}
+ m_url.m_hostEnd = currentPosition(ipv6End);
return true;
}
}
-
- if (!m_hostHasPercentOrNonASCII) {
+
+ if (LIKELY(!m_hostHasPercentOrNonASCII)) {
auto hostIterator = iterator;
for (; !iterator.atEnd(); ++iterator) {
if (isTabOrNewline(*iterator))
@@ -2177,23 +2335,25 @@
m_url.m_portEnd = currentPosition(iterator);
return true;
}
- ++iterator;
return parsePort(iterator);
}
for (; hostIterator != iterator; ++hostIterator) {
- if (!isTabOrNewline(*hostIterator))
+ if (LIKELY(!isTabOrNewline(*hostIterator))) {
+ if (UNLIKELY(isASCIIUpper(*hostIterator)))
+ syntaxViolation(hostIterator);
appendToASCIIBuffer(toASCIILower(*hostIterator));
+ } else
+ syntaxViolation(hostIterator);
}
m_url.m_hostEnd = currentPosition(iterator);
- if (!hostIterator.atEnd()) {
- ASSERT(*hostIterator == ':');
- incrementIteratorSkippingTabAndNewLine(hostIterator);
+ if (!hostIterator.atEnd())
return parsePort(hostIterator);
- }
m_url.m_portEnd = currentPosition(iterator);
return true;
}
+ syntaxViolation(iterator);
+
Vector<LChar, defaultInlineBufferSize> utf8Encoded;
for (; !iterator.atEnd(); ++iterator) {
if (isTabOrNewline(*iterator))
@@ -2223,17 +2383,13 @@
m_url.m_portEnd = currentPosition(iterator);
return true;
}
- ++iterator;
return parsePort(iterator);
}
appendToASCIIBuffer(asciiDomainCharacters, asciiDomainValue.size());
m_url.m_hostEnd = currentPosition(iterator);
- if (!iterator.atEnd()) {
- ASSERT(*iterator == ':');
- incrementIteratorSkippingTabAndNewLine(iterator);
+ if (!iterator.atEnd())
return parsePort(iterator);
- }
m_url.m_portEnd = currentPosition(iterator);
return true;
}
@@ -2281,7 +2437,7 @@
|| (byte >= 0x30 && byte <= 0x39)
|| (byte >= 0x41 && byte <= 0x5A)
|| byte == 0x5F
- || (byte >= 0x61 && byte <= 0x7A))
+ || (byte >= 0x61 && byte <= 0x7A)) // FIXME: Put these in the characterClassTable to avoid branches.
output.append(byte);
else
percentEncodeByte(byte, output);
@@ -2349,7 +2505,7 @@
}
bool URLParser::internalValuesConsistent(const URL& url)
-{
+{
return url.m_schemeEnd <= url.m_userStart
&& url.m_userStart <= url.m_userEnd
&& url.m_userEnd <= url.m_passwordEnd
Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp (206456 => 206457)
--- trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-09-27 19:49:38 UTC (rev 206456)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-09-27 20:07:15 UTC (rev 206457)
@@ -119,6 +119,15 @@
checkURL("http://[0:f::f:f:0:0]", {"http", "", "", "[0:f::f:f:0:0]", 0, "/", "", "", "http://[0:f::f:f:0:0]/"});
checkURL("http://[0:f:0:0:f::]", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});
checkURL("http://[::f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
+ checkURL("http://[0:f:0:0:f::]:", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});
+ checkURL("http://[0:f:0:0:f::]:\t", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});
+ checkURL("http://[0:f:0:0:f::]\t:", {"http", "", "", "[0:f:0:0:f::]", 0, "/", "", "", "http://[0:f:0:0:f::]/"});
+ checkURL("http://\t[::f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
+ checkURL("http://[\t::f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
+ checkURL("http://[:\t:f:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
+ checkURL("http://[::\tf:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
+ checkURL("http://[::f\t:0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
+ checkURL("http://[::f:\t0:0:f:0:0]", {"http", "", "", "[::f:0:0:f:0:0]", 0, "/", "", "", "http://[::f:0:0:f:0:0]/"});
checkURL("http://example.com/path1/path2/.", {"http", "", "", "example.com", 0, "/path1/path2/", "", "", "http://example.com/path1/path2/"});
checkURL("http://example.com/path1/path2/..", {"http", "", "", "example.com", 0, "/path1/", "", "", "http://example.com/path1/"});
checkURL("http://example.com/path1/path2/./path3", {"http", "", "", "example.com", 0, "/path1/path2/path3", "", "", "http://example.com/path1/path2/path3"});
@@ -191,12 +200,23 @@
checkURL("http://host/a%20B", {"http", "", "", "host", 0, "/a%20B", "", "", "http://host/a%20B"});
checkURL("http://host?q=@ <>!#fragment", {"http", "", "", "host", 0, "/", "q=@%20%3C%3E!", "fragment", "http://host/?q=@%20%3C%3E!#fragment"});
checkURL("http://user:@host", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});
+ checkURL("http://user:@\thost", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});
+ checkURL("http://user\t:@host", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});
+ checkURL("http://use\tr:@host", {"http", "user", "", "host", 0, "/", "", "", "http://user@host/"});
checkURL("http://127.0.0.1:10100/path", {"http", "", "", "127.0.0.1", 10100, "/path", "", "", "http://127.0.0.1:10100/path"});
checkURL("http://127.0.0.1:/path", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});
+ checkURL("http://127.0.0.1\t:/path", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});
+ checkURL("http://127.0.0.1:\t/path", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});
+ checkURL("http://127.0.0.1:/\tpath", {"http", "", "", "127.0.0.1", 0, "/path", "", "", "http://127.0.0.1/path"});
checkURL("http://127.0.0.1:123", {"http", "", "", "127.0.0.1", 123, "/", "", "", "http://127.0.0.1:123/"});
checkURL("http://127.0.0.1:", {"http", "", "", "127.0.0.1", 0, "/", "", "", "http://127.0.0.1/"});
checkURL("http://[0:f::f:f:0:0]:123/path", {"http", "", "", "[0:f::f:f:0:0]", 123, "/path", "", "", "http://[0:f::f:f:0:0]:123/path"});
checkURL("http://[0:f::f:f:0:0]:123", {"http", "", "", "[0:f::f:f:0:0]", 123, "/", "", "", "http://[0:f::f:f:0:0]:123/"});
+ checkURL("http://[0:f:0:0:f:\t:]:123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
+ checkURL("http://[0:f:0:0:f::\t]:123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
+ checkURL("http://[0:f:0:0:f::]\t:123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
+ checkURL("http://[0:f:0:0:f::]:\t123", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
+ checkURL("http://[0:f:0:0:f::]:1\t23", {"http", "", "", "[0:f:0:0:f::]", 123, "/", "", "", "http://[0:f:0:0:f::]:123/"});
checkURL("http://[0:f::f:f:0:0]:/path", {"http", "", "", "[0:f::f:f:0:0]", 0, "/path", "", "", "http://[0:f::f:f:0:0]/path"});
checkURL("http://[0:f::f:f:0:0]:", {"http", "", "", "[0:f::f:f:0:0]", 0, "/", "", "", "http://[0:f::f:f:0:0]/"});
checkURL("http://host:10100/path", {"http", "", "", "host", 10100, "/path", "", "", "http://host:10100/path"});
@@ -210,6 +230,9 @@
checkURL("sc:/pa/", {"sc", "", "", "", 0, "/pa/", "", "", "sc:/pa/"});
checkURL("notspecial:/notuser:notpassword@nothost", {"notspecial", "", "", "", 0, "/notuser:notpassword@nothost", "", "", "notspecial:/notuser:notpassword@nothost"});
checkURL("sc://pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
+ checkURL("sc://\tpa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
+ checkURL("sc:/\t/pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
+ checkURL("sc:\t//pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
checkURL("http://host \a ", {"http", "", "", "host", 0, "/", "", "", "http://host/"});
checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"});
checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
@@ -217,11 +240,14 @@
checkURL("http://256/", {"http", "", "", "256", 0, "/", "", "", "http://256/"});
checkURL("http://256./", {"http", "", "", "256.", 0, "/", "", "", "http://256./"});
checkURL("http://123.256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});
+ checkURL("http://123\t.256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});
+ checkURL("http://123.\t256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});
checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"});
checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
checkURL("notspecial:/", {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});
checkURL("data:image/png;base64,encoded-data-follows-here", {"data", "", "", "", 0, "image/png;base64,encoded-data-follows-here", "", "", "data:image/png;base64,encoded-data-follows-here"});
checkURL("data:image/png;base64,encoded/data-with-slash", {"data", "", "", "", 0, "image/png;base64,encoded/data-with-slash", "", "", "data:image/png;base64,encoded/data-with-slash"});
+ checkURL("about:~", {"about", "", "", "", 0, "~", "", "", "about:~"});
// This disagrees with the web platform test for http://:@www.example.com but agrees with Chrome and URL::parse,
// and Firefox fails the web platform test differently. Maybe the web platform test ought to be changed.
@@ -287,6 +313,8 @@
checkRelativeURL("\\@", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/@", "", "", "http://example.org/@"});
checkRelativeURL("/path3", "http://u...@example.org/path1/path2", {"http", "user", "", "example.org", 0, "/path3", "", "", "http://u...@example.org/path3"});
checkRelativeURL("", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
+ checkRelativeURL("\t", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
+ checkRelativeURL(" ", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
checkRelativeURL(" \a \t\n", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "", "http://example.org/foo/bar"});
checkRelativeURL(":foo.com\\", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/:foo.com/", "", "", "http://example.org/foo/:foo.com/"});
checkRelativeURL("http:/example.com/", "about:blank", {"http", "", "", "example.com", 0, "/", "", "", "http://example.com/"});
@@ -314,6 +342,7 @@
checkRelativeURL("notspecial:/", "http://host", {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});
checkRelativeURL("foo:/", "http://example.org/foo/bar", {"foo", "", "", "", 0, "/", "", "", "foo:/"});
checkRelativeURL("://:0/", "http://webkit.org/", {"http", "", "", "webkit.org", 0, "/://:0/", "", "", "http://webkit.org/://:0/"});
+ checkRelativeURL(String(), "http://webkit.org/", {"http", "", "", "webkit.org", 0, "/", "", "", "http://webkit.org/"});
// The checking of slashes in SpecialAuthoritySlashes needed to get this to pass contradicts what is in the spec,
// but it is included in the web platform tests.
@@ -590,6 +619,27 @@
checkURLDifferences(utf16String(u"http://host/path#š©\tš©"),
{"http", "", "", "host", 0, "/path", "", utf16String(u"š©š©"), utf16String(u"http://host/path#š©š©")},
{"http", "", "", "host", 0, "/path", "", "%F0%9F%92%A9%F0%9F%92%A9", "http://host/path#%F0%9F%92%A9%F0%9F%92%A9"});
+ checkURLDifferences("http://%48OsT",
+ {"http", "", "", "host", 0, "/", "", "", "http://host/"},
+ {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});
+ checkURLDifferences("http://h%4FsT",
+ {"http", "", "", "host", 0, "/", "", "", "http://host/"},
+ {"http", "", "", "h%4fst", 0, "/", "", "", "http://h%4fst/"});
+ checkURLDifferences("http://h%4fsT",
+ {"http", "", "", "host", 0, "/", "", "", "http://host/"},
+ {"http", "", "", "h%4fst", 0, "/", "", "", "http://h%4fst/"});
+ checkURLDifferences("http://h%6fsT",
+ {"http", "", "", "host", 0, "/", "", "", "http://host/"},
+ {"http", "", "", "h%6fst", 0, "/", "", "", "http://h%6fst/"});
+ checkURLDifferences("http://host/`",
+ {"http", "", "", "host", 0, "/%60", "", "", "http://host/%60"},
+ {"http", "", "", "host", 0, "/`", "", "", "http://host/`"});
+ checkURLDifferences("aA://",
+ {"aa", "", "", "", 0, "/", "", "", "aa:///"},
+ {"aa", "", "", "", 0, "//", "", "", "aa://"});
+ checkURLDifferences("A://",
+ {"a", "", "", "", 0, "/", "", "", "a:///"},
+ {"a", "", "", "", 0, "//", "", "", "a://"});
}
TEST_F(URLParserTest, DefaultPort)
@@ -596,6 +646,15 @@
{
checkURL("FtP://host:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
checkURL("ftp://host:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
+ checkURL("f\ttp://host:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
+ checkURL("f\ttp://host\t:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
+ checkURL("f\ttp://host:\t21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
+ checkURL("f\ttp://host:2\t1/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
+ checkURL("f\ttp://host:21\t/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
+ checkURL("ftp://host\t:21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
+ checkURL("ftp://host:\t21/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
+ checkURL("ftp://host:2\t1/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
+ checkURL("ftp://host:21\t/", {"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"});
checkURL("ftp://host:22/", {"ftp", "", "", "host", 22, "/", "", "", "ftp://host:22/"});
checkURLDifferences("ftp://host:21",
{"ftp", "", "", "host", 0, "/", "", "", "ftp://host/"},
@@ -670,12 +729,6 @@
checkURLDifferences("unknown://host:81",
{"unknown", "", "", "host", 81, "/", "", "", "unknown://host:81/"},
{"unknown", "", "", "host", 81, "", "", "", "unknown://host:81"});
- checkURLDifferences("http://%48OsT",
- {"http", "", "", "host", 0, "/", "", "", "http://host/"},
- {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});
- checkURLDifferences("http://host/`",
- {"http", "", "", "host", 0, "/%60", "", "", "http://host/%60"},
- {"http", "", "", "host", 0, "/`", "", "", "http://host/`"});
}
static void shouldFail(const String& urlString)
@@ -694,6 +747,8 @@
shouldFail(" \a ");
shouldFail("");
shouldFail(String());
+ shouldFail("", "about:blank");
+ shouldFail(String(), "about:blank");
shouldFail("http://127.0.0.1:abc");
shouldFail("http://host:abc");
shouldFail("http://a:@", "about:blank");
@@ -723,6 +778,7 @@
shouldFail("://:0/");
shouldFail("://:0/", "");
shouldFail("://:0/", "about:blank");
+ shouldFail("about~");
}
// These are in the spec but not in the web platform tests.