Diff
Modified: trunk/JSTests/ChangeLog (259025 => 259026)
--- trunk/JSTests/ChangeLog 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/JSTests/ChangeLog 2020-03-26 01:24:05 UTC (rev 259026)
@@ -1,5 +1,14 @@
2020-03-25 Alexey Shvayka <shvaikal...@gmail.com>
+ Invalid numeric and named references should be early syntax errors
+ https://bugs.webkit.org/show_bug.cgi?id=178175
+
+ Reviewed by Ross Kirsling.
+
+ * test262/expectations.yaml: Mark 44 test cases as passing.
+
+2020-03-25 Alexey Shvayka <shvaikal...@gmail.com>
+
\b escapes inside character classes should be valid in Unicode patterns
https://bugs.webkit.org/show_bug.cgi?id=209528
Modified: trunk/JSTests/test262/expectations.yaml (259025 => 259026)
--- trunk/JSTests/test262/expectations.yaml 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/JSTests/test262/expectations.yaml 2020-03-26 01:24:05 UTC (rev 259026)
@@ -1711,9 +1711,6 @@
test/built-ins/RegExp/quantifier-integer-limit.js:
default: 'SyntaxError: Invalid regular _expression_: number too large in {} quantifier'
strict mode: 'SyntaxError: Invalid regular _expression_: number too large in {} quantifier'
-test/built-ins/RegExp/unicode_restricted_identity_escape_alpha.js:
- default: "Test262Error: IdentityEscape in AtomEscape: 'k' Expected a SyntaxError to be thrown but no exception was thrown at all"
- strict mode: "Test262Error: IdentityEscape in AtomEscape: 'k' Expected a SyntaxError to be thrown but no exception was thrown at all"
test/built-ins/RegExp/unicode_restricted_octal_escape.js:
default: 'Test262Error: RegExp("[\1]", "u"): Expected a SyntaxError to be thrown but no exception was thrown at all'
strict mode: 'Test262Error: RegExp("[\1]", "u"): Expected a SyntaxError to be thrown but no exception was thrown at all'
@@ -3264,72 +3261,9 @@
test/language/identifiers/start-unicode-13.0.0.js:
default: "SyntaxError: Invalid character '\\u08be'"
strict mode: "SyntaxError: Invalid character '\\u08be'"
-test/language/literals/regexp/named-groups/invalid-dangling-groupname-2-u.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-dangling-groupname-2.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-dangling-groupname-3-u.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-dangling-groupname-3.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-dangling-groupname-4-u.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-dangling-groupname-4.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-dangling-groupname-5.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-dangling-groupname-u.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-dangling-groupname-without-group-u.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-dangling-groupname.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-incomplete-groupname-2.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-incomplete-groupname-3.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-incomplete-groupname-4.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-incomplete-groupname-5.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-incomplete-groupname-6.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-incomplete-groupname-u.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-incomplete-groupname-without-group-3-u.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/named-groups/invalid-incomplete-groupname.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
test/language/literals/regexp/u-astral-char-class-invert.js:
default: 'Test262Error: Expected SameValue(«�», «null») to be true'
strict mode: 'Test262Error: Expected SameValue(«�», «null») to be true'
-test/language/literals/regexp/u-dec-esc.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/u-invalid-legacy-octal-escape.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
-test/language/literals/regexp/u-invalid-oob-decimal-escape.js:
- default: 'Test262: This statement should not be evaluated.'
- strict mode: 'Test262: This statement should not be evaluated.'
test/language/module-code/eval-rqstd-once.js:
module: "SyntaxError: Unexpected identifier 'as'. Expected 'from' before exported module name."
test/language/module-code/eval-rqstd-order.js:
Modified: trunk/LayoutTests/ChangeLog (259025 => 259026)
--- trunk/LayoutTests/ChangeLog 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/LayoutTests/ChangeLog 2020-03-26 01:24:05 UTC (rev 259026)
@@ -1,3 +1,13 @@
+2020-03-25 Alexey Shvayka <shvaikal...@gmail.com>
+
+ Invalid numeric and named references should be early syntax errors
+ https://bugs.webkit.org/show_bug.cgi?id=178175
+
+ Reviewed by Ross Kirsling.
+
+ * js/regexp-named-capture-groups-expected.txt:
+ * js/script-tests/regexp-named-capture-groups.js:
+
2020-03-25 Pinki Gyanchandani <pgyanchand...@apple.com>
CanvasRenderingContext2D.putImageData() should not process neutered ImageData
Modified: trunk/LayoutTests/js/regexp-named-capture-groups-expected.txt (259025 => 259026)
--- trunk/LayoutTests/js/regexp-named-capture-groups-expected.txt 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/LayoutTests/js/regexp-named-capture-groups-expected.txt 2020-03-26 01:24:05 UTC (rev 259026)
@@ -66,7 +66,8 @@
PASS "1122332211".match(/\k<ones>\k<twos>\k<threes>(?<ones>1*)(?<twos>2*)(?<threes>3*)\k<threes>\k<twos>\k<ones>/) is ["1122332211", "11", "22", "3"]
PASS "1122332211".match(/\k<ones>\k<twos>\k<threes>(?<ones>1*)(?<twos>2*)(?<threes>3*)\k<threes>\k<twos>\k<ones>/u) is ["1122332211", "11", "22", "3"]
PASS "\k<z>XzzX".match(/\k<z>X(z*)X/) is ["k<z>XzzX", "zz"]
-PASS "\k<z>XzzX".match(/\k<z>X(z*)X/u) threw exception SyntaxError: Invalid regular _expression_: invalid backreference for Unicode pattern.
+PASS "\k<z>XzzX".match(/\k<z>X(z*)X/u) threw exception SyntaxError: Invalid regular _expression_: invalid \k<> named backreference.
+PASS /\k<xxx(?<a>y)(/ threw exception SyntaxError: Invalid regular _expression_: invalid \k<> named backreference.
PASS successfullyParsed is true
TEST COMPLETE
Modified: trunk/LayoutTests/js/script-tests/regexp-named-capture-groups.js (259025 => 259026)
--- trunk/LayoutTests/js/script-tests/regexp-named-capture-groups.js 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/LayoutTests/js/script-tests/regexp-named-capture-groups.js 2020-03-26 01:24:05 UTC (rev 259026)
@@ -111,6 +111,7 @@
shouldBe('"1122332211".match(/\\\k<ones>\\\k<twos>\\\k<threes>(?<ones>1*)(?<twos>2*)(?<threes>3*)\\\k<threes>\\\k<twos>\\\k<ones>/u)', '["1122332211", "11", "22", "3"]');
// Check that a named forward reference for a non-existent named capture
-// matches for non-unicode patterns and throws for unicode patterns.
+// matches for non-Unicode patterns w/o a named group and throws for patterns with a named group or Unicode flag.
shouldBe('"\\\k<z>XzzX".match(/\\\k<z>X(z*)X/)', '["k<z>XzzX", "zz"]');
-shouldThrow('"\\\k<z>XzzX".match(/\\\k<z>X(z*)X/u)', '"SyntaxError: Invalid regular _expression_: invalid backreference for Unicode pattern"');
+shouldThrow('"\\\k<z>XzzX".match(/\\\k<z>X(z*)X/u)', '"SyntaxError: Invalid regular _expression_: invalid \\\\k<> named backreference"');
+shouldThrow('/\\\k<xxx(?<a>y)(/', '"SyntaxError: Invalid regular _expression_: invalid \\\\k<> named backreference"');
Modified: trunk/Source/_javascript_Core/ChangeLog (259025 => 259026)
--- trunk/Source/_javascript_Core/ChangeLog 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/Source/_javascript_Core/ChangeLog 2020-03-26 01:24:05 UTC (rev 259026)
@@ -1,3 +1,56 @@
+2020-03-25 Alexey Shvayka <shvaikal...@gmail.com>
+
+ Invalid numeric and named references should be early syntax errors
+ https://bugs.webkit.org/show_bug.cgi?id=178175
+
+ Reviewed by Ross Kirsling.
+
+ This patch:
+
+ 1. Fixes named reference parsing in parseEscape(), making /\k/u throw SyntaxError per spec [1].
+
+ 2. Reworks containsIllegalNamedForwardReferences(), making dangling (e.g. /\k<a>(?<b>.)/) and
+ incomplete (e.g. /\k<(?<a>.)/) named references throw SyntaxError if the non-Unicode pattern
+ contains a named group [2].
+
+ 3. Moves reparsing logic from YarrPattern to YarrParser, ensuring syntax errors due to illegal
+ references (named & numeric) are thrown at parse time; drops isValidNamedForwardReference()
+ from Delegate, refactors saveUnmatchedNamedForwardReferences(), and overall improves cohesion
+ of illegal references logic.
+
+ [1]: https://tc39.es/ecma262/#prod-IdentityEscape
+ [2]: https://tc39.es/ecma262/#sec-regexpinitialize (step 7.b)
+
+ * yarr/YarrErrorCode.cpp:
+ (JSC::Yarr::errorMessage):
+ (JSC::Yarr::errorToThrow):
+ * yarr/YarrErrorCode.h:
+ * yarr/YarrParser.h:
+ (JSC::Yarr::Parser::CharacterClassParserDelegate::atomNamedBackReference):
+ (JSC::Yarr::Parser::Parser):
+ (JSC::Yarr::Parser::parseEscape):
+ (JSC::Yarr::Parser::parseParenthesesBegin):
+ (JSC::Yarr::Parser::parse):
+ (JSC::Yarr::Parser::handleIllegalReferences):
+ (JSC::Yarr::Parser::containsIllegalNamedForwardReference):
+ (JSC::Yarr::Parser::resetForReparsing):
+ (JSC::Yarr::parse):
+ (JSC::Yarr::Parser::CharacterClassParserDelegate::isValidNamedForwardReference): Deleted.
+ * yarr/YarrPattern.cpp:
+ (JSC::Yarr::YarrPatternConstructor::atomBackReference):
+ (JSC::Yarr::YarrPatternConstructor::atomNamedForwardReference):
+ (JSC::Yarr::YarrPattern::compile):
+ (JSC::Yarr::YarrPatternConstructor::saveUnmatchedNamedForwardReferences): Deleted.
+ (JSC::Yarr::YarrPatternConstructor::isValidNamedForwardReference): Deleted.
+ * yarr/YarrPattern.h:
+ (JSC::Yarr::YarrPattern::resetForReparsing):
+ (JSC::Yarr::YarrPattern::containsIllegalBackReference): Deleted.
+ (JSC::Yarr::YarrPattern::containsIllegalNamedForwardReferences): Deleted.
+ * yarr/YarrSyntaxChecker.cpp:
+ (JSC::Yarr::SyntaxChecker::atomNamedBackReference):
+ (JSC::Yarr::SyntaxChecker::resetForReparsing):
+ (JSC::Yarr::SyntaxChecker::isValidNamedForwardReference): Deleted.
+
2020-03-25 Chris Dumez <cdu...@apple.com>
Use JSC::EnsureStillAliveScope RAII object in the generated bindings code
Modified: trunk/Source/_javascript_Core/yarr/YarrErrorCode.cpp (259025 => 259026)
--- trunk/Source/_javascript_Core/yarr/YarrErrorCode.cpp 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/Source/_javascript_Core/yarr/YarrErrorCode.cpp 2020-03-26 01:24:05 UTC (rev 259026)
@@ -53,6 +53,7 @@
REGEXP_ERROR_PREFIX "\\ at end of pattern", // EscapeUnterminated
REGEXP_ERROR_PREFIX "invalid Unicode {} escape", // InvalidUnicodeEscape
REGEXP_ERROR_PREFIX "invalid backreference for Unicode pattern", // InvalidBackreference
+ REGEXP_ERROR_PREFIX "invalid \\k<> named backreference", // InvalidNamedBackReference
REGEXP_ERROR_PREFIX "invalid escaped character for Unicode pattern", // InvalidIdentityEscape
REGEXP_ERROR_PREFIX "invalid \\c escape for Unicode pattern", // InvalidControlLetterEscape
REGEXP_ERROR_PREFIX "invalid property _expression_", // InvalidUnicodePropertyExpression
@@ -87,6 +88,7 @@
case ErrorCode::EscapeUnterminated:
case ErrorCode::InvalidUnicodeEscape:
case ErrorCode::InvalidBackreference:
+ case ErrorCode::InvalidNamedBackReference:
case ErrorCode::InvalidIdentityEscape:
case ErrorCode::InvalidControlLetterEscape:
case ErrorCode::InvalidUnicodePropertyExpression:
Modified: trunk/Source/_javascript_Core/yarr/YarrErrorCode.h (259025 => 259026)
--- trunk/Source/_javascript_Core/yarr/YarrErrorCode.h 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/Source/_javascript_Core/yarr/YarrErrorCode.h 2020-03-26 01:24:05 UTC (rev 259026)
@@ -52,6 +52,7 @@
EscapeUnterminated,
InvalidUnicodeEscape,
InvalidBackreference,
+ InvalidNamedBackReference,
InvalidIdentityEscape,
InvalidControlLetterEscape,
InvalidUnicodePropertyExpression,
Modified: trunk/Source/_javascript_Core/yarr/YarrParser.h (259025 => 259026)
--- trunk/Source/_javascript_Core/yarr/YarrParser.h 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/Source/_javascript_Core/yarr/YarrParser.h 2020-03-26 01:24:05 UTC (rev 259026)
@@ -1,5 +1,6 @@
/*
- * Copyright (C) 2009-2019 Apple Inc. All rights reserved.
+ * Copyright (C) 2009-2020 Apple Inc. All rights reserved.
+ * Copyright (C) 2020 Alexey Shvayka <shvaikal...@gmail.com>.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -10,17 +11,17 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
@@ -41,7 +42,7 @@
class Parser {
private:
template<class FriendDelegate>
- friend ErrorCode parse(FriendDelegate&, const String& pattern, bool isUnicode, unsigned backReferenceLimit);
+ friend ErrorCode parse(FriendDelegate&, const String& pattern, bool isUnicode, unsigned backReferenceLimit, bool isNamedForwardReferenceAllowed);
/*
* CharacterClassParserDelegate:
@@ -200,7 +201,6 @@
NO_RETURN_DUE_TO_ASSERT void assertionWordBoundary(bool) { RELEASE_ASSERT_NOT_REACHED(); }
NO_RETURN_DUE_TO_ASSERT void atomBackReference(unsigned) { RELEASE_ASSERT_NOT_REACHED(); }
NO_RETURN_DUE_TO_ASSERT void atomNamedBackReference(const String&) { RELEASE_ASSERT_NOT_REACHED(); }
- NO_RETURN_DUE_TO_ASSERT bool isValidNamedForwardReference(const String&) { RELEASE_ASSERT_NOT_REACHED(); }
NO_RETURN_DUE_TO_ASSERT void atomNamedForwardReference(const String&) { RELEASE_ASSERT_NOT_REACHED(); }
private:
@@ -217,12 +217,13 @@
UChar32 m_character;
};
- Parser(Delegate& delegate, const String& pattern, bool isUnicode, unsigned backReferenceLimit)
+ Parser(Delegate& delegate, const String& pattern, bool isUnicode, unsigned backReferenceLimit, bool isNamedForwardReferenceAllowed)
: m_delegate(delegate)
- , m_backReferenceLimit(backReferenceLimit)
, m_data(pattern.characters<CharType>())
, m_size(pattern.length())
, m_isUnicode(isUnicode)
+ , m_backReferenceLimit(backReferenceLimit)
+ , m_isNamedForwardReferenceAllowed(isNamedForwardReferenceAllowed)
{
}
@@ -338,16 +339,12 @@
unsigned backReference = consumeNumber();
if (backReference <= m_backReferenceLimit) {
+ m_maxSeenBackReference = std::max(m_maxSeenBackReference, backReference);
delegate.atomBackReference(backReference);
break;
}
restoreState(state);
-
- if (m_isUnicode) {
- m_errorCode = ErrorCode::InvalidBackreference;
- return false;
- }
}
// Not a backreference, and not octal. Just a number.
@@ -439,28 +436,27 @@
case 'k': {
consume();
ParseState state = saveState();
- if (!atEndOfPattern() && !inCharacterClass) {
- if (consume() == '<') {
- auto groupName = tryConsumeGroupName();
- if (groupName) {
- if (m_captureGroupNames.contains(groupName.value())) {
- delegate.atomNamedBackReference(groupName.value());
- break;
- }
-
- if (delegate.isValidNamedForwardReference(groupName.value())) {
- delegate.atomNamedForwardReference(groupName.value());
- break;
- }
+ if (!inCharacterClass && tryConsume('<')) {
+ auto groupName = tryConsumeGroupName();
+ if (groupName) {
+ if (m_captureGroupNames.contains(groupName.value())) {
+ delegate.atomNamedBackReference(groupName.value());
+ break;
}
- if (m_isUnicode) {
- m_errorCode = ErrorCode::InvalidBackreference;
+
+ if (m_isNamedForwardReferenceAllowed) {
+ m_forwardReferenceNames.add(groupName.value());
+ delegate.atomNamedForwardReference(groupName.value());
break;
}
}
}
+
restoreState(state);
- delegate.atomPatternCharacter('k');
+ if (!isIdentityEscapeAnError('k')) {
+ delegate.atomPatternCharacter('k');
+ m_kIdentityEscapeSeen = true;
+ }
break;
}
@@ -677,6 +673,11 @@
case '<': {
auto groupName = tryConsumeGroupName();
if (groupName) {
+ if (m_kIdentityEscapeSeen) {
+ m_errorCode = ErrorCode::InvalidNamedBackReference;
+ break;
+ }
+
auto setAddResult = m_captureGroupNames.add(groupName.value());
if (setAddResult.isNewEntry)
m_delegate.atomParenthesesSubpatternBegin(true, groupName);
@@ -694,6 +695,9 @@
} else
m_delegate.atomParenthesesSubpatternBegin();
+ if (type == ParenthesesType::Subpattern)
+ ++m_numSubpatterns;
+
m_parenthesesStack.append(type);
}
@@ -881,14 +885,87 @@
ErrorCode parse()
{
if (m_size > MAX_PATTERN_SIZE)
- m_errorCode = ErrorCode::PatternTooLarge;
- else
- parseTokens();
- ASSERT(atEndOfPattern() || hasError(m_errorCode));
-
+ return ErrorCode::PatternTooLarge;
+
+ parseTokens();
+
+ if (!hasError(m_errorCode)) {
+ ASSERT(atEndOfPattern());
+ handleIllegalReferences();
+ ASSERT(atEndOfPattern());
+ }
+
return m_errorCode;
}
+ void handleIllegalReferences()
+ {
+ bool shouldReparse = false;
+
+ if (m_maxSeenBackReference > m_numSubpatterns) {
+ // Contains illegal numeric backreference. See https://tc39.es/ecma262/#prod-annexB-AtomEscape
+ if (m_isUnicode) {
+ m_errorCode = ErrorCode::InvalidBackreference;
+ return;
+ }
+
+ m_backReferenceLimit = m_numSubpatterns;
+ shouldReparse = true;
+ }
+
+ if (m_kIdentityEscapeSeen && !m_captureGroupNames.isEmpty()) {
+ m_errorCode = ErrorCode::InvalidNamedBackReference;
+ return;
+ }
+
+ if (containsIllegalNamedForwardReference()) {
+ // \k<a> is parsed as named reference in Unicode patterns because of strict IdentityEscape grammar.
+ // See https://tc39.es/ecma262/#sec-patterns-static-semantics-early-errors
+ if (m_isUnicode || !m_captureGroupNames.isEmpty()) {
+ m_errorCode = ErrorCode::InvalidNamedBackReference;
+ return;
+ }
+
+ m_isNamedForwardReferenceAllowed = false;
+ shouldReparse = true;
+ }
+
+ if (shouldReparse) {
+ resetForReparsing();
+ parseTokens();
+ }
+ }
+
+ bool containsIllegalNamedForwardReference()
+ {
+ if (m_forwardReferenceNames.isEmpty())
+ return false;
+
+ if (m_captureGroupNames.isEmpty())
+ return true;
+
+ for (auto& entry : m_forwardReferenceNames) {
+ if (!m_captureGroupNames.contains(entry))
+ return true;
+ }
+
+ return false;
+ }
+
+ void resetForReparsing()
+ {
+ ASSERT(!hasError(m_errorCode));
+
+ m_delegate.resetForReparsing();
+ m_index = 0;
+ m_numSubpatterns = 0;
+ m_maxSeenBackReference = 0;
+ m_kIdentityEscapeSeen = false;
+ m_parenthesesStack.clear();
+ m_captureGroupNames.clear();
+ m_forwardReferenceNames.clear();
+ }
+
// Misc helper functions:
typedef unsigned ParseState;
@@ -1153,14 +1230,19 @@
enum class ParenthesesType : uint8_t { Subpattern, Assertion };
Delegate& m_delegate;
- unsigned m_backReferenceLimit;
ErrorCode m_errorCode { ErrorCode::NoError };
const CharType* m_data;
unsigned m_size;
unsigned m_index { 0 };
bool m_isUnicode;
+ unsigned m_backReferenceLimit;
+ unsigned m_numSubpatterns { 0 };
+ unsigned m_maxSeenBackReference { 0 };
+ bool m_isNamedForwardReferenceAllowed;
+ bool m_kIdentityEscapeSeen { false };
Vector<ParenthesesType, 16> m_parenthesesStack;
HashSet<String> m_captureGroupNames;
+ HashSet<String> m_forwardReferenceNames;
// Derived by empirical testing of compile time in PCRE and WREC.
static constexpr unsigned MAX_PATTERN_SIZE = 1024 * 1024;
@@ -1192,7 +1274,6 @@
* void atomParenthesesEnd();
* void atomBackReference(unsigned subpatternId);
* void atomNamedBackReference(const String& subpatternName);
- * bool isValidNamedForwardReference(const String& subpatternName);
* void atomNamedForwardReference(const String& subpatternName);
*
* void quantifyAtom(unsigned min, unsigned max, bool greedy);
@@ -1199,6 +1280,8 @@
*
* void disjunction();
*
+ * void resetForReparsing();
+ *
* The regular _expression_ is described by a sequence of assertion*() and atom*()
* callbacks to the delegate, describing the terms in the regular _expression_.
* Following an atom a quantifyAtom() call may occur to indicate that the previous
@@ -1229,11 +1312,11 @@
*/
template<class Delegate>
-ErrorCode parse(Delegate& delegate, const String& pattern, bool isUnicode, unsigned backReferenceLimit = quantifyInfinite)
+ErrorCode parse(Delegate& delegate, const String& pattern, bool isUnicode, unsigned backReferenceLimit = quantifyInfinite, bool isNamedForwardReferenceAllowed = true)
{
if (pattern.is8Bit())
- return Parser<Delegate, LChar>(delegate, pattern, isUnicode, backReferenceLimit).parse();
- return Parser<Delegate, UChar>(delegate, pattern, isUnicode, backReferenceLimit).parse();
+ return Parser<Delegate, LChar>(delegate, pattern, isUnicode, backReferenceLimit, isNamedForwardReferenceAllowed).parse();
+ return Parser<Delegate, UChar>(delegate, pattern, isUnicode, backReferenceLimit, isNamedForwardReferenceAllowed).parse();
}
} } // namespace JSC::Yarr
Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.cpp (259025 => 259026)
--- trunk/Source/_javascript_Core/yarr/YarrPattern.cpp 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.cpp 2020-03-26 01:24:05 UTC (rev 259026)
@@ -462,16 +462,6 @@
m_pattern.m_disjunctions.append(WTFMove(body));
}
- void saveUnmatchedNamedForwardReferences()
- {
- m_unmatchedNamedForwardReferences.shrink(0);
-
- for (auto& entry : m_pattern.m_namedForwardReferences) {
- if (!m_pattern.m_captureGroupNames.contains(entry))
- m_unmatchedNamedForwardReferences.append(entry);
- }
- }
-
void assertionBOL()
{
if (!m_alternative->m_terms.size() && !m_invertParentheticalAssertion) {
@@ -657,7 +647,6 @@
{
ASSERT(subpatternId);
m_pattern.m_containsBackreferences = true;
- m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
if (subpatternId > m_pattern.m_numSubpatterns) {
m_alternative->m_terms.append(PatternTerm::ForwardReference());
@@ -687,14 +676,8 @@
atomBackReference(m_pattern.m_namedGroupToParenIndex.get(subpatternName));
}
- bool isValidNamedForwardReference(const String& subpatternName)
+ void atomNamedForwardReference(const String&)
{
- return !m_unmatchedNamedForwardReferences.contains(subpatternName);
- }
-
- void atomNamedForwardReference(const String& subpatternName)
- {
- m_pattern.m_namedForwardReferences.appendIfNotContains(subpatternName);
m_alternative->m_terms.append(PatternTerm::ForwardReference());
}
@@ -1130,7 +1113,6 @@
YarrPattern& m_pattern;
PatternAlternative* m_alternative;
CharacterClassConstructor m_characterClassConstructor;
- Vector<String> m_unmatchedNamedForwardReferences;
void* m_stackLimit;
ErrorCode m_error { ErrorCode::NoError };
bool m_invertCharacterClass;
@@ -1146,24 +1128,7 @@
if (hasError(error))
return error;
}
-
- // If the pattern contains illegal backreferences reset & reparse.
- // Quoting Netscape's "What's new in _javascript_ 1.2",
- // "Note: if the number of left parentheses is less than the number specified
- // in \#, the \# is taken as an octal escape as described in the next row."
- if (containsIllegalBackReference() || containsIllegalNamedForwardReferences()) {
- if (unicode())
- return ErrorCode::InvalidBackreference;
- unsigned numSubpatterns = m_numSubpatterns;
-
- constructor.saveUnmatchedNamedForwardReferences();
- constructor.resetForReparsing();
- ErrorCode error = parse(constructor, patternString, unicode(), numSubpatterns);
- ASSERT_UNUSED(error, !hasError(error));
- ASSERT(numSubpatterns == m_numSubpatterns);
- }
-
constructor.checkForTerminalParentheses();
constructor.optimizeDotStarWrappedExpressions();
constructor.optimizeBOL();
Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.h (259025 => 259026)
--- trunk/Source/_javascript_Core/yarr/YarrPattern.h 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.h 2020-03-26 01:24:05 UTC (rev 259026)
@@ -391,7 +391,6 @@
void resetForReparsing()
{
m_numSubpatterns = 0;
- m_maxBackReference = 0;
m_initialStartValueFrameLocation = 0;
m_containsBackreferences = false;
@@ -415,27 +414,8 @@
m_disjunctions.clear();
m_userCharacterClasses.clear();
m_captureGroupNames.shrink(0);
- m_namedForwardReferences.shrink(0);
}
- bool containsIllegalBackReference()
- {
- return m_maxBackReference > m_numSubpatterns;
- }
-
- bool containsIllegalNamedForwardReferences()
- {
- if (m_namedForwardReferences.isEmpty())
- return false;
-
- for (auto& entry : m_namedForwardReferences) {
- if (!m_captureGroupNames.contains(entry))
- return true;
- }
-
- return false;
- }
-
bool containsUnsignedLengthPattern()
{
return m_containsUnsignedLengthPattern;
@@ -555,13 +535,11 @@
bool m_saveInitialStartValue : 1;
OptionSet<Flags> m_flags;
unsigned m_numSubpatterns { 0 };
- unsigned m_maxBackReference { 0 };
unsigned m_initialStartValueFrameLocation { 0 };
PatternDisjunction* m_body;
Vector<std::unique_ptr<PatternDisjunction>, 4> m_disjunctions;
Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses;
Vector<String> m_captureGroupNames;
- Vector<String> m_namedForwardReferences;
HashMap<String, unsigned> m_namedGroupToParenIndex;
private:
Modified: trunk/Source/_javascript_Core/yarr/YarrSyntaxChecker.cpp (259025 => 259026)
--- trunk/Source/_javascript_Core/yarr/YarrSyntaxChecker.cpp 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/Source/_javascript_Core/yarr/YarrSyntaxChecker.cpp 2020-03-26 01:24:05 UTC (rev 259026)
@@ -49,10 +49,10 @@
void atomParenthesesEnd() {}
void atomBackReference(unsigned) {}
void atomNamedBackReference(const String&) {}
- bool isValidNamedForwardReference(const String&) { return true; }
void atomNamedForwardReference(const String&) {}
void quantifyAtom(unsigned, unsigned, bool) {}
void disjunction() {}
+ void resetForReparsing() {}
};
ErrorCode checkSyntax(const String& pattern, const String& flags)
Modified: trunk/Source/WebCore/ChangeLog (259025 => 259026)
--- trunk/Source/WebCore/ChangeLog 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/Source/WebCore/ChangeLog 2020-03-26 01:24:05 UTC (rev 259026)
@@ -1,3 +1,21 @@
+2020-03-25 Alexey Shvayka <shvaikal...@gmail.com>
+
+ Invalid numeric and named references should be early syntax errors
+ https://bugs.webkit.org/show_bug.cgi?id=178175
+
+ Reviewed by Ross Kirsling.
+
+ Accounts for changes of YarrParser's Delegate interface, no behavioral changes.
+ resetForReparsing() is never called because we disable numeric backrefences
+ and named forward references (see arguments of Yarr::parse() call).
+
+ Test: TestWebKitAPI.ContentExtensionTest.ParsingFailures
+
+ * contentextensions/URLFilterParser.cpp:
+ (WebCore::ContentExtensions::PatternParser::resetForReparsing):
+ (WebCore::ContentExtensions::URLFilterParser::addPattern):
+ (WebCore::ContentExtensions::PatternParser::isValidNamedForwardReference): Deleted.
+
2020-03-25 Pinki Gyanchandani <pgyanchand...@apple.com>
CanvasRenderingContext2D.putImageData() should not process neutered ImageData
Modified: trunk/Source/WebCore/contentextensions/URLFilterParser.cpp (259025 => 259026)
--- trunk/Source/WebCore/contentextensions/URLFilterParser.cpp 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/Source/WebCore/contentextensions/URLFilterParser.cpp 2020-03-26 01:24:05 UTC (rev 259026)
@@ -134,11 +134,6 @@
fail(URLFilterParser::BackReference);
}
- bool isValidNamedForwardReference(const String&)
- {
- return false;
- }
-
void atomNamedForwardReference(const String&)
{
fail(URLFilterParser::ForwardReference);
@@ -249,6 +244,11 @@
fail(URLFilterParser::Disjunction);
}
+ NO_RETURN_DUE_TO_CRASH void resetForReparsing()
+ {
+ RELEASE_ASSERT_NOT_REACHED();
+ }
+
private:
bool hasError() const
{
@@ -358,7 +358,7 @@
ParseStatus status = Ok;
PatternParser patternParser(patternIsCaseSensitive);
- if (!JSC::Yarr::hasError(JSC::Yarr::parse(patternParser, pattern, false, 0)))
+ if (!JSC::Yarr::hasError(JSC::Yarr::parse(patternParser, pattern, false, 0, false)))
patternParser.finalize(patternId, m_combinedURLFilters);
else
status = YarrError;
Modified: trunk/Tools/ChangeLog (259025 => 259026)
--- trunk/Tools/ChangeLog 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/Tools/ChangeLog 2020-03-26 01:24:05 UTC (rev 259026)
@@ -1,3 +1,15 @@
+2020-03-25 Alexey Shvayka <shvaikal...@gmail.com>
+
+ Invalid numeric and named references should be early syntax errors
+ https://bugs.webkit.org/show_bug.cgi?id=178175
+
+ Reviewed by Ross Kirsling.
+
+ Removes FIXME as YarrParser is correct not to throw errors as it is
+ parsing in non-Unicode mode. Also adds a few named groups tests.
+
+ * TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:
+
2020-03-25 Zhifei Fang <zhifei_f...@apple.com>
[Timeline] A better default get label function, which fit the assumpation the label is always a string
Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp (259025 => 259026)
--- trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp 2020-03-26 00:32:00 UTC (rev 259025)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp 2020-03-26 01:24:05 UTC (rev 259026)
@@ -1760,7 +1760,6 @@
testPatternStatus("[", ContentExtensions::URLFilterParser::ParseStatus::YarrError);
testPatternStatus("[a}", ContentExtensions::URLFilterParser::ParseStatus::YarrError);
- // FIXME: Look into why these do not cause YARR parsing errors. They probably should.
testPatternStatus("a]", ContentExtensions::URLFilterParser::ParseStatus::Ok);
testPatternStatus("{", ContentExtensions::URLFilterParser::ParseStatus::Ok);
testPatternStatus("{[a]", ContentExtensions::URLFilterParser::ParseStatus::Ok);
@@ -1789,10 +1788,12 @@
testPatternStatus("(a)\\1", ContentExtensions::URLFilterParser::ParseStatus::Ok); // Back references are disabled, it parse as octal 1
testPatternStatus("(<A>a)\\k<A>", ContentExtensions::URLFilterParser::ParseStatus::Ok); // Named back references aren't handled, it parse as "k<A>"
+ testPatternStatus("(?<A>a)\\k<A>", ContentExtensions::URLFilterParser::ParseStatus::BackReference);
testPatternStatus("\\1(a)", ContentExtensions::URLFilterParser::ParseStatus::Ok); // Forward references are disabled, it parse as octal 1
testPatternStatus("\\8(a)", ContentExtensions::URLFilterParser::ParseStatus::Ok); // Forward references are disabled, it parse as '8'
testPatternStatus("\\9(a)", ContentExtensions::URLFilterParser::ParseStatus::Ok); // Forward references are disabled, it parse as '9'
testPatternStatus("\\k<A>(<A>a)", ContentExtensions::URLFilterParser::ParseStatus::Ok); // Named forward references aren't handled, it parse as "k<A>"
+ testPatternStatus("\\k<A>(?<A>a)", ContentExtensions::URLFilterParser::ParseStatus::YarrError);
testPatternStatus("\\k<A>(a)", ContentExtensions::URLFilterParser::ParseStatus::Ok); // Unmatched named forward references aren't handled, it parse as "k<A>"
}