Added: releases/WebKitGTK/webkit-2.26/JSTests/stress/regexp-unicode-surrogate-pair-increment-should-involve-length-check.js (0 => 250223)
--- releases/WebKitGTK/webkit-2.26/JSTests/stress/regexp-unicode-surrogate-pair-increment-should-involve-length-check.js (rev 0)
+++ releases/WebKitGTK/webkit-2.26/JSTests/stress/regexp-unicode-surrogate-pair-increment-should-involve-length-check.js 2019-09-23 10:14:36 UTC (rev 250223)
@@ -0,0 +1,27 @@
+// This test checks for proper incrementing around / over individual surrogates and surrogate pairs.
+// This test should run without crashing.
+
+function testRegExpMatch(re, str)
+{
+ for (let i = 0; i < 100; ++i) {
+ let match = re.exec(str);
+ if (!match || match[0] != str) {
+ print(match);
+ throw "Expected " + re + " to match \"" + str + "\" but it didn't";
+ }
+ }
+}
+
+let testString = "\ud800\ud800\udc00";
+let greedyRegExp = /([^x]+)[^]*\1([^])/u;
+
+testRegExpMatch(greedyRegExp, testString);
+
+let nonGreedyRegExp = /(.*[^x]+?)[^]*([^])/u;
+
+testRegExpMatch(nonGreedyRegExp, testString);
+
+let testString2 = "\ud800\ud800\udc00Test\udc00123";
+let backtrackGreedyRegExp = /.*[\x20-\udffff].\w*.\d{3}/u;
+
+testRegExpMatch(backtrackGreedyRegExp, testString2);
Modified: releases/WebKitGTK/webkit-2.26/Source/_javascript_Core/ChangeLog (250222 => 250223)
--- releases/WebKitGTK/webkit-2.26/Source/_javascript_Core/ChangeLog 2019-09-23 10:14:32 UTC (rev 250222)
+++ releases/WebKitGTK/webkit-2.26/Source/_javascript_Core/ChangeLog 2019-09-23 10:14:36 UTC (rev 250223)
@@ -1,3 +1,30 @@
+2019-09-16 Michael Saboff <msab...@apple.com>
+
+ [JSC] Perform check again when we found non-BMP characters
+ https://bugs.webkit.org/show_bug.cgi?id=201647
+
+ Reviewed by Yusuke Suzuki.
+
+ We need to check for end of input for non-BMP characters when matching a character class that contains
+ both BMP and non-BMP characters. In advanceIndexAfterCharacterClassTermMatch() we were checking for
+ end of input for both BMP and non-BMP characters. For BMP characters, this check is redundant.
+ After moving the check to after the "is BMP check", we need to decrement index after reaching the failure
+ label to back out the index++ for the first surrogate of the non-BMP character.
+
+ Added the same kind of check in generateCharacterClassOnce(). In that case, we have pre-checked the
+ first character (surrogate) for a non-BMP codepoint, so we just need to check for end of input before
+ we increment for the second surrogate.
+
+ While writing tests, I found an off by one error in backtrackCharacterClassGreedy() and changed the
+ loop to check the count at loop top instead of loop bottom.
+
+ * yarr/YarrJIT.cpp:
+ (JSC::Yarr::YarrGenerator::advanceIndexAfterCharacterClassTermMatch):
+ (JSC::Yarr::YarrGenerator::generateCharacterClassOnce):
+ (JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
+ (JSC::Yarr::YarrGenerator::backtrackCharacterClassGreedy):
+ (JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):
+
2019-09-11 Michael Saboff <msab...@apple.com>
JSC crashes due to stack overflow while building RegExp
Modified: releases/WebKitGTK/webkit-2.26/Source/_javascript_Core/yarr/YarrJIT.cpp (250222 => 250223)
--- releases/WebKitGTK/webkit-2.26/Source/_javascript_Core/yarr/YarrJIT.cpp 2019-09-23 10:14:32 UTC (rev 250222)
+++ releases/WebKitGTK/webkit-2.26/Source/_javascript_Core/yarr/YarrJIT.cpp 2019-09-23 10:14:36 UTC (rev 250223)
@@ -453,16 +453,16 @@
}
#ifdef JIT_UNICODE_EXPRESSIONS
- void advanceIndexAfterCharacterClassTermMatch(const PatternTerm* term, JumpList& failures, const RegisterID character)
+ void advanceIndexAfterCharacterClassTermMatch(const PatternTerm* term, JumpList& failuresAfterIncrementingIndex, const RegisterID character)
{
ASSERT(term->type == PatternTerm::TypeCharacterClass);
- if (term->characterClass->hasOneCharacterSize() && !term->invert())
+ if (term->isFixedWidthCharacterClass())
add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
else {
add32(TrustedImm32(1), index);
- failures.append(atEndOfInput());
Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
+ failuresAfterIncrementingIndex.append(atEndOfInput());
add32(TrustedImm32(1), index);
isBMPChar.link(this);
}
@@ -1757,6 +1757,7 @@
#ifdef JIT_UNICODE_EXPRESSIONS
if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert())) {
Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
+ op.m_jumps.append(atEndOfInput());
add32(TrustedImm32(1), index);
isBMPChar.link(this);
}
@@ -1816,7 +1817,7 @@
#ifdef JIT_UNICODE_EXPRESSIONS
if (m_decodeSurrogatePairs) {
- if (term->characterClass->hasOneCharacterSize() && !term->invert())
+ if (term->isFixedWidthCharacterClass())
add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), countRegister);
else {
add32(TrustedImm32(1), countRegister);
@@ -1849,9 +1850,10 @@
move(TrustedImm32(0), countRegister);
JumpList failures;
+ JumpList failuresDecrementIndex;
Label loop(this);
#ifdef JIT_UNICODE_EXPRESSIONS
- if (term->characterClass->hasOneCharacterSize() && !term->invert() && term->characterClass->hasNonBMPCharacters()) {
+ if (term->isFixedWidthCharacterClass() && term->characterClass->hasNonBMPCharacters()) {
move(TrustedImm32(1), character);
failures.append(checkNotEnoughInput(character));
} else
@@ -1875,7 +1877,7 @@
#ifdef JIT_UNICODE_EXPRESSIONS
if (m_decodeSurrogatePairs)
- advanceIndexAfterCharacterClassTermMatch(term, failures, character);
+ advanceIndexAfterCharacterClassTermMatch(term, failuresDecrementIndex, character);
else
#endif
add32(TrustedImm32(1), index);
@@ -1887,6 +1889,11 @@
} else
jump(loop);
+ if (!failuresDecrementIndex.empty()) {
+ failuresDecrementIndex.link(this);
+ sub32(TrustedImm32(1), index);
+ }
+
failures.link(this);
op.m_reentry = label();
@@ -1908,7 +1915,7 @@
if (!m_decodeSurrogatePairs)
sub32(TrustedImm32(1), index);
- else if (term->characterClass->hasOneCharacterSize() && !term->invert())
+ else if (term->isFixedWidthCharacterClass())
sub32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
else {
// Rematch one less
@@ -1917,6 +1924,8 @@
loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
Label rematchLoop(this);
+ Jump doneRematching = branchTest32(Zero, countRegister);
+
readCharacter(m_checkedOffset - term->inputPosition, character);
sub32(TrustedImm32(1), countRegister);
@@ -1928,7 +1937,8 @@
isBMPChar.link(this);
#endif
- branchTest32(Zero, countRegister).linkTo(rematchLoop, this);
+ jump(rematchLoop);
+ doneRematching.link(this);
loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
}
@@ -1964,6 +1974,7 @@
const RegisterID countRegister = regT1;
JumpList nonGreedyFailures;
+ JumpList nonGreedyFailuresDecrementIndex;
m_backtrackingState.link(this);
@@ -1996,7 +2007,7 @@
#ifdef JIT_UNICODE_EXPRESSIONS
if (m_decodeSurrogatePairs)
- advanceIndexAfterCharacterClassTermMatch(term, nonGreedyFailures, character);
+ advanceIndexAfterCharacterClassTermMatch(term, nonGreedyFailuresDecrementIndex, character);
else
#endif
add32(TrustedImm32(1), index);
@@ -2004,6 +2015,10 @@
jump(op.m_reentry);
+ if (!nonGreedyFailuresDecrementIndex.empty()) {
+ nonGreedyFailuresDecrementIndex.link(this);
+ breakpoint();
+ }
nonGreedyFailures.link(this);
sub32(countRegister, index);
m_backtrackingState.fallthrough();