Diff
Modified: trunk/JSTests/ChangeLog (225929 => 225930)
--- trunk/JSTests/ChangeLog 2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/JSTests/ChangeLog 2017-12-14 22:16:38 UTC (rev 225930)
@@ -1,3 +1,14 @@
+2017-12-14 Michael Saboff <msab...@apple.com>
+
+ REGRESSION (r225695): Repro crash on yahoo login page
+ https://bugs.webkit.org/show_bug.cgi?id=180761
+
+ Reviewed by JF Bastien.
+
+ New regression test.
+
+ * stress/regress-180761.js: Added.
+
2017-12-13 Keith Miller <keith_mil...@apple.com>
JSObjects should have a mask for loading indexed properties
Added: trunk/JSTests/stress/regress-180761.js (0 => 225930)
--- trunk/JSTests/stress/regress-180761.js (rev 0)
+++ trunk/JSTests/stress/regress-180761.js 2017-12-14 22:16:38 UTC (rev 225930)
@@ -0,0 +1,6 @@
+//@ runDefault
+
+// Regression test for bug 180761. This test should not crash.
+
+if (/(?:(?: |a)|\.a)* a*/.exec("/a.aaa") !== null)
+ throw "Expected /(?:(?: |a)|\.a)* a*/.exec(\"/a.aaa\") to not match";
Modified: trunk/Source/_javascript_Core/ChangeLog (225929 => 225930)
--- trunk/Source/_javascript_Core/ChangeLog 2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/ChangeLog 2017-12-14 22:16:38 UTC (rev 225930)
@@ -1,3 +1,78 @@
+2017-12-14 Michael Saboff <msab...@apple.com>
+
+ REGRESSION (r225695): Repro crash on yahoo login page
+ https://bugs.webkit.org/show_bug.cgi?id=180761
+
+ Reviewed by JF Bastien.
+
+ Relanding r225695 with a fix.
+
+ The fix is that we need to save the return address for a parentheses in
+ the ParenContext because it is actually used by any immediately contained
+ alternatives.
+
+ Also did a little refactoring, changing occurances of PatternContext to
+ ParenContext since that is the name of the structure.
+
+ * runtime/RegExp.cpp:
+ (JSC::byteCodeCompilePattern):
+ (JSC::RegExp::byteCodeCompileIfNecessary):
+ (JSC::RegExp::compile):
+ (JSC::RegExp::compileMatchOnly):
+ * runtime/RegExp.h:
+ * runtime/RegExpInlines.h:
+ (JSC::RegExp::matchInline):
+ * testRegExp.cpp:
+ (parseRegExpLine):
+ (runFromFiles):
+ * yarr/Yarr.h:
+ * yarr/YarrInterpreter.cpp:
+ (JSC::Yarr::ByteCompiler::compile):
+ (JSC::Yarr::ByteCompiler::dumpDisjunction):
+ * yarr/YarrJIT.cpp:
+ (JSC::Yarr::YarrGenerator::ParenContextSizes::ParenContextSizes):
+ (JSC::Yarr::YarrGenerator::ParenContextSizes::numSubpatterns):
+ (JSC::Yarr::YarrGenerator::ParenContextSizes::frameSlots):
+ (JSC::Yarr::YarrGenerator::ParenContext::sizeFor):
+ (JSC::Yarr::YarrGenerator::ParenContext::nextOffset):
+ (JSC::Yarr::YarrGenerator::ParenContext::beginOffset):
+ (JSC::Yarr::YarrGenerator::ParenContext::matchAmountOffset):
+ (JSC::Yarr::YarrGenerator::ParenContext::returnAddressOffset):
+ (JSC::Yarr::YarrGenerator::ParenContext::subpatternOffset):
+ (JSC::Yarr::YarrGenerator::ParenContext::savedFrameOffset):
+ (JSC::Yarr::YarrGenerator::initParenContextFreeList):
+ (JSC::Yarr::YarrGenerator::allocateParenContext):
+ (JSC::Yarr::YarrGenerator::freeParenContext):
+ (JSC::Yarr::YarrGenerator::saveParenContext):
+ (JSC::Yarr::YarrGenerator::restoreParenContext):
+ (JSC::Yarr::YarrGenerator::tryReadUnicodeCharImpl):
+ (JSC::Yarr::YarrGenerator::storeToFrame):
+ (JSC::Yarr::YarrGenerator::generateJITFailReturn):
+ (JSC::Yarr::YarrGenerator::clearMatches):
+ (JSC::Yarr::YarrGenerator::generate):
+ (JSC::Yarr::YarrGenerator::backtrack):
+ (JSC::Yarr::YarrGenerator::opCompileParenthesesSubpattern):
+ (JSC::Yarr::YarrGenerator::generateEnter):
+ (JSC::Yarr::YarrGenerator::generateReturn):
+ (JSC::Yarr::YarrGenerator::YarrGenerator):
+ (JSC::Yarr::YarrGenerator::compile):
+ * yarr/YarrJIT.h:
+ (JSC::Yarr::YarrCodeBlock::execute):
+ * yarr/YarrPattern.cpp:
+ (JSC::Yarr::indentForNestingLevel):
+ (JSC::Yarr::dumpUChar32):
+ (JSC::Yarr::dumpCharacterClass):
+ (JSC::Yarr::PatternTerm::dump):
+ (JSC::Yarr::YarrPattern::dumpPattern):
+ * yarr/YarrPattern.h:
+ (JSC::Yarr::PatternTerm::containsAnyCaptures):
+ (JSC::Yarr::BackTrackInfoParenthesesOnce::returnAddressIndex):
+ (JSC::Yarr::BackTrackInfoParentheses::beginIndex):
+ (JSC::Yarr::BackTrackInfoParentheses::returnAddressIndex):
+ (JSC::Yarr::BackTrackInfoParentheses::matchAmountIndex):
+ (JSC::Yarr::BackTrackInfoParentheses::parenContextHeadIndex):
+ (JSC::Yarr::BackTrackInfoAlternative::offsetIndex): Deleted.
+
2017-12-13 Keith Miller <keith_mil...@apple.com>
JSObjects should have a mask for loading indexed properties
Modified: trunk/Source/_javascript_Core/runtime/RegExp.cpp (225929 => 225930)
--- trunk/Source/_javascript_Core/runtime/RegExp.cpp 2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/runtime/RegExp.cpp 2017-12-14 22:16:38 UTC (rev 225930)
@@ -271,6 +271,30 @@
return vm.regExpCache()->lookupOrCreate(patternString, flags);
}
+
+static std::unique_ptr<Yarr::BytecodePattern> byteCodeCompilePattern(VM* vm, Yarr::YarrPattern& pattern)
+{
+ return Yarr::byteCompile(pattern, &vm->m_regExpAllocator, &vm->m_regExpAllocatorLock);
+}
+
+void RegExp::byteCodeCompileIfNecessary(VM* vm)
+{
+ if (m_regExpBytecode)
+ return;
+
+ Yarr::YarrPattern pattern(m_patternString, m_flags, &m_constructionError, vm->stackLimit());
+ if (m_constructionError) {
+ RELEASE_ASSERT_NOT_REACHED();
+#if COMPILER_QUIRK(CONSIDERS_UNREACHABLE_CODE)
+ m_state = ParseError;
+ return;
+#endif
+ }
+ ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
+
+ m_regExpBytecode = byteCodeCompilePattern(vm, pattern);
+}
+
void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize)
{
ConcurrentJSLocker locker(m_lock);
@@ -303,8 +327,11 @@
UNUSED_PARAM(charSize);
#endif
+ if (Options::dumpCompiledRegExpPatterns())
+ dataLog("Can't JIT this regular _expression_: \"", m_patternString, "\"\n");
+
m_state = ByteCode;
- m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator, &vm->m_regExpAllocatorLock);
+ m_regExpBytecode = byteCodeCompilePattern(vm, pattern);
}
int RegExp::match(VM& vm, const String& s, unsigned startOffset, Vector<int>& ovector)
@@ -356,8 +383,11 @@
UNUSED_PARAM(charSize);
#endif
+ if (Options::dumpCompiledRegExpPatterns())
+ dataLog("Can't JIT this regular _expression_: \"", m_patternString, "\"\n");
+
m_state = ByteCode;
- m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator, &vm->m_regExpAllocatorLock);
+ m_regExpBytecode = byteCodeCompilePattern(vm, pattern);
}
MatchResult RegExp::match(VM& vm, const String& s, unsigned startOffset)
Modified: trunk/Source/_javascript_Core/runtime/RegExp.h (225929 => 225930)
--- trunk/Source/_javascript_Core/runtime/RegExp.h 2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/runtime/RegExp.h 2017-12-14 22:16:38 UTC (rev 225930)
@@ -140,6 +140,8 @@
RegExpState m_state;
+ void byteCodeCompileIfNecessary(VM*);
+
void compile(VM*, Yarr::YarrCharSize);
void compileIfNecessary(VM&, Yarr::YarrCharSize);
Modified: trunk/Source/_javascript_Core/runtime/RegExpInlines.h (225929 => 225930)
--- trunk/Source/_javascript_Core/runtime/RegExpInlines.h 2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/runtime/RegExpInlines.h 2017-12-14 22:16:38 UTC (rev 225930)
@@ -110,11 +110,25 @@
int result;
#if ENABLE(YARR_JIT)
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ char patternContextBuffer[patternContextBufferSize];
+#define EXTRA_JIT_PARAMS , patternContextBuffer, patternContextBufferSize
+#else
+#define EXTRA_JIT_PARAMS
+#endif
+
if (m_state == JITCode) {
if (s.is8Bit())
- result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length(), offsetVector).start;
+ result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length(), offsetVector EXTRA_JIT_PARAMS).start;
else
- result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length(), offsetVector).start;
+ result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length(), offsetVector EXTRA_JIT_PARAMS).start;
+
+ if (result == Yarr::JSRegExpJITCodeFailure) {
+ // JIT'ed code couldn't handle _expression_, so punt back to the interpreter.
+ byteCodeCompileIfNecessary(&vm);
+ result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
+ }
+
#if ENABLE(YARR_JIT_DEBUG)
matchCompareWithInterpreter(s, startOffset, offsetVector, result);
#endif
@@ -199,15 +213,30 @@
compileIfNecessaryMatchOnly(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
#if ENABLE(YARR_JIT)
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ char patternContextBuffer[patternContextBufferSize];
+#define EXTRA_JIT_PARAMS , patternContextBuffer, patternContextBufferSize
+#else
+#define EXTRA_JIT_PARAMS
+#endif
+
+ MatchResult result;
+
if (m_state == JITCode) {
- MatchResult result = s.is8Bit() ?
- m_regExpJITCode.execute(s.characters8(), startOffset, s.length()) :
- m_regExpJITCode.execute(s.characters16(), startOffset, s.length());
+ if (s.is8Bit())
+ result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length() EXTRA_JIT_PARAMS);
+ else
+ result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length() EXTRA_JIT_PARAMS);
+
#if ENABLE(REGEXP_TRACING)
if (!result)
m_rtMatchOnlyFoundCount++;
#endif
- return result;
+ if (result.start != static_cast<size_t>(Yarr::JSRegExpJITCodeFailure))
+ return result;
+
+ // JIT'ed code couldn't handle _expression_, so punt back to the interpreter.
+ byteCodeCompileIfNecessary(&vm);
}
#endif
Modified: trunk/Source/_javascript_Core/testRegExp.cpp (225929 => 225930)
--- trunk/Source/_javascript_Core/testRegExp.cpp 2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/testRegExp.cpp 2017-12-14 22:16:38 UTC (rev 225930)
@@ -315,10 +315,10 @@
return -1;
}
-static RegExp* parseRegExpLine(VM& vm, char* line, int lineLength)
+static RegExp* parseRegExpLine(VM& vm, char* line, int lineLength, const char** regexpError)
{
StringBuilder pattern;
-
+
if (line[0] != '/')
return 0;
@@ -330,9 +330,11 @@
++i;
RegExp* r = RegExp::create(vm, pattern.toString(), regExpFlags(line + i));
- if (r->isValid())
- return r;
- return nullptr;
+ if (!r->isValid()) {
+ *regexpError = r->errorMessage();
+ return nullptr;
+ }
+ return r;
}
static RegExpTest* parseTestLine(char* line, int lineLength)
@@ -431,6 +433,7 @@
size_t lineLength = 0;
char* linePtr = 0;
unsigned int lineNumber = 0;
+ const char* regexpError = nullptr;
while ((linePtr = fgets(&lineBuffer[0], MaxLineLength, testCasesFile))) {
lineLength = strlen(linePtr);
@@ -444,7 +447,11 @@
continue;
if (linePtr[0] == '/') {
- regexp = parseRegExpLine(vm, linePtr, lineLength);
+ regexp = parseRegExpLine(vm, linePtr, lineLength, ®expError);
+ if (!regexp) {
+ failures++;
+ fprintf(stderr, "Failure on line %u. '%s' %s\n", lineNumber, linePtr, regexpError);
+ }
} else if (linePtr[0] == ' ') {
RegExpTest* regExpTest = parseTestLine(linePtr, lineLength);
@@ -461,10 +468,10 @@
} else if (linePtr[0] == '-') {
tests++;
regexp = 0; // Reset the live regexp to avoid confusing other subsequent tests
- bool successfullyParsed = parseRegExpLine(vm, linePtr + 1, lineLength - 1);
+ bool successfullyParsed = parseRegExpLine(vm, linePtr + 1, lineLength - 1, ®expError);
if (successfullyParsed) {
failures++;
- fprintf(stderr, "Failure on line %u. '%s' is not a valid regexp\n", lineNumber, linePtr + 1);
+ fprintf(stderr, "Failure on line %u. '%s' %s\n", lineNumber, linePtr + 1, regexpError);
}
}
}
Modified: trunk/Source/_javascript_Core/yarr/Yarr.h (225929 => 225930)
--- trunk/Source/_javascript_Core/yarr/Yarr.h 2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/yarr/Yarr.h 2017-12-14 22:16:38 UTC (rev 225930)
@@ -36,9 +36,9 @@
#define YarrStackSpaceForBackTrackInfoBackReference 2
#define YarrStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
#define YarrStackSpaceForBackTrackInfoParentheticalAssertion 1
-#define YarrStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
+#define YarrStackSpaceForBackTrackInfoParenthesesOnce 2
#define YarrStackSpaceForBackTrackInfoParenthesesTerminal 1
-#define YarrStackSpaceForBackTrackInfoParentheses 2
+#define YarrStackSpaceForBackTrackInfoParentheses 4
#define YarrStackSpaceForDotStarEnclosure 1
static const unsigned quantifyInfinite = UINT_MAX;
@@ -52,9 +52,10 @@
JSRegExpMatch = 1,
JSRegExpNoMatch = 0,
JSRegExpErrorNoMatch = -1,
- JSRegExpErrorHitLimit = -2,
- JSRegExpErrorNoMemory = -3,
- JSRegExpErrorInternal = -4
+ JSRegExpJITCodeFailure = -2,
+ JSRegExpErrorHitLimit = -3,
+ JSRegExpErrorNoMemory = -4,
+ JSRegExpErrorInternal = -5,
};
enum YarrCharSize {
Modified: trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp (225929 => 225930)
--- trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp 2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp 2017-12-14 22:16:38 UTC (rev 225930)
@@ -27,6 +27,7 @@
#include "config.h"
#include "YarrInterpreter.h"
+#include "Options.h"
#include "SuperSampler.h"
#include "Yarr.h"
#include "YarrCanonicalize.h"
@@ -1669,6 +1670,11 @@
emitDisjunction(m_pattern.m_body);
regexEnd();
+#ifndef NDEBUG
+ if (Options::dumpCompiledRegExpPatterns())
+ dumpDisjunction(m_bodyDisjunction.get());
+#endif
+
return std::make_unique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator, lock);
}
@@ -1829,16 +1835,6 @@
return beginTerm;
}
-#ifndef NDEBUG
- void dumpDisjunction(ByteDisjunction* disjunction)
- {
- dataLogF("ByteDisjunction(%p):\n\t", disjunction);
- for (unsigned i = 0; i < disjunction->terms.size(); ++i)
- dataLogF("{ %d } ", disjunction->terms[i].type);
- dataLogF("\n");
- }
-#endif
-
void closeAlternative(int beginTerm)
{
int origBeginTerm = beginTerm;
@@ -2111,7 +2107,245 @@
}
}
}
+#ifndef NDEBUG
+ void dumpDisjunction(ByteDisjunction* disjunction, unsigned nesting = 0)
+ {
+ PrintStream& out = WTF::dataFile();
+ unsigned termIndexNest = 0;
+
+ if (!nesting) {
+ out.printf("ByteDisjunction(%p):\n", disjunction);
+ nesting = 1;
+ } else {
+ termIndexNest = nesting - 1;
+ nesting = 2;
+ }
+
+ auto outputTermIndexAndNest = [&](size_t index, unsigned termNesting) {
+ for (unsigned nestingDepth = 0; nestingDepth < termIndexNest; nestingDepth++)
+ out.print(" ");
+ out.printf("%4lu", index);
+ for (unsigned nestingDepth = 0; nestingDepth < termNesting; nestingDepth++)
+ out.print(" ");
+ };
+
+ auto dumpQuantity = [&](ByteTerm& term) {
+ if (term.atom.quantityType == QuantifierFixedCount && term.atom.quantityMinCount == 1 && term.atom.quantityMaxCount == 1)
+ return;
+
+ out.print(" {", term.atom.quantityMinCount);
+ if (term.atom.quantityMinCount != term.atom.quantityMaxCount) {
+ if (term.atom.quantityMaxCount == UINT_MAX)
+ out.print(",inf");
+ else
+ out.print(",", term.atom.quantityMaxCount);
+ }
+ out.print("}");
+ if (term.atom.quantityType == QuantifierGreedy)
+ out.print(" greedy");
+ else if (term.atom.quantityType == QuantifierNonGreedy)
+ out.print(" non-greedy");
+ };
+
+ auto dumpCaptured = [&](ByteTerm& term) {
+ if (term.capture())
+ out.print(" captured (#", term.atom.subpatternId, ")");
+ };
+
+ auto dumpInverted = [&](ByteTerm& term) {
+ if (term.invert())
+ out.print(" inverted");
+ };
+
+ auto dumpInputPosition = [&](ByteTerm& term) {
+ out.printf(" inputPosition %u", term.inputPosition);
+ };
+
+ auto dumpCharacter = [&](ByteTerm& term) {
+ out.print(" ");
+ dumpUChar32(out, term.atom.patternCharacter);
+ };
+
+ auto dumpCharClass = [&](ByteTerm& term) {
+ out.print(" ");
+ dumpCharacterClass(out, &m_pattern, term.atom.characterClass);
+ };
+
+ for (size_t idx = 0; idx < disjunction->terms.size(); ++idx) {
+ ByteTerm term = disjunction->terms[idx];
+
+ bool outputNewline = true;
+
+ switch (term.type) {
+ case ByteTerm::TypeBodyAlternativeBegin:
+ outputTermIndexAndNest(idx, nesting++);
+ out.print("BodyAlternativeBegin");
+ if (term.alternative.onceThrough)
+ out.print(" onceThrough");
+ break;
+ case ByteTerm::TypeBodyAlternativeDisjunction:
+ outputTermIndexAndNest(idx, nesting - 1);
+ out.print("BodyAlternativeDisjunction");
+ break;
+ case ByteTerm::TypeBodyAlternativeEnd:
+ outputTermIndexAndNest(idx, --nesting);
+ out.print("BodyAlternativeEnd");
+ break;
+ case ByteTerm::TypeAlternativeBegin:
+ outputTermIndexAndNest(idx, nesting++);
+ out.print("AlternativeBegin");
+ break;
+ case ByteTerm::TypeAlternativeDisjunction:
+ outputTermIndexAndNest(idx, nesting - 1);
+ out.print("AlternativeDisjunction");
+ break;
+ case ByteTerm::TypeAlternativeEnd:
+ outputTermIndexAndNest(idx, --nesting);
+ out.print("AlternativeEnd");
+ break;
+ case ByteTerm::TypeSubpatternBegin:
+ outputTermIndexAndNest(idx, nesting++);
+ out.print("SubpatternBegin");
+ break;
+ case ByteTerm::TypeSubpatternEnd:
+ outputTermIndexAndNest(idx, --nesting);
+ out.print("SubpatternEnd");
+ break;
+ case ByteTerm::TypeAssertionBOL:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("AssertionBOL");
+ break;
+ case ByteTerm::TypeAssertionEOL:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("AssertionEOL");
+ break;
+ case ByteTerm::TypeAssertionWordBoundary:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("AssertionWordBoundary");
+ break;
+ case ByteTerm::TypePatternCharacterOnce:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("PatternCharacterOnce");
+ dumpInverted(term);
+ dumpInputPosition(term);
+ dumpCharacter(term);
+ dumpQuantity(term);
+ break;
+ case ByteTerm::TypePatternCharacterFixed:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("PatternCharacterFixed");
+ dumpInverted(term);
+ dumpInputPosition(term);
+ dumpCharacter(term);
+ out.print(" {", term.atom.quantityMinCount, "}");
+ break;
+ case ByteTerm::TypePatternCharacterGreedy:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("PatternCharacterGreedy");
+ dumpInverted(term);
+ dumpInputPosition(term);
+ dumpCharacter(term);
+ dumpQuantity(term);
+ break;
+ case ByteTerm::TypePatternCharacterNonGreedy:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("PatternCharacterNonGreedy");
+ dumpInverted(term);
+ dumpInputPosition(term);
+ dumpCharacter(term);
+ dumpQuantity(term);
+ break;
+ case ByteTerm::TypePatternCasedCharacterOnce:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("PatternCasedCharacterOnce");
+ break;
+ case ByteTerm::TypePatternCasedCharacterFixed:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("PatternCasedCharacterFixed");
+ break;
+ case ByteTerm::TypePatternCasedCharacterGreedy:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("PatternCasedCharacterGreedy");
+ break;
+ case ByteTerm::TypePatternCasedCharacterNonGreedy:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("PatternCasedCharacterNonGreedy");
+ break;
+ case ByteTerm::TypeCharacterClass:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("CharacterClass");
+ dumpInverted(term);
+ dumpInputPosition(term);
+ dumpCharClass(term);
+ dumpQuantity(term);
+ break;
+ case ByteTerm::TypeBackReference:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("BackReference #", term.atom.subpatternId);
+ dumpQuantity(term);
+ break;
+ case ByteTerm::TypeParenthesesSubpattern:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("ParenthesesSubpattern");
+ dumpCaptured(term);
+ dumpInverted(term);
+ dumpInputPosition(term);
+ dumpQuantity(term);
+ out.print("\n");
+ outputNewline = false;
+ dumpDisjunction(term.atom.parenthesesDisjunction, nesting);
+ break;
+ case ByteTerm::TypeParenthesesSubpatternOnceBegin:
+ outputTermIndexAndNest(idx, nesting++);
+ out.print("ParenthesesSubpatternOnceBegin");
+ dumpCaptured(term);
+ dumpInverted(term);
+ dumpInputPosition(term);
+ break;
+ case ByteTerm::TypeParenthesesSubpatternOnceEnd:
+ outputTermIndexAndNest(idx, --nesting);
+ out.print("ParenthesesSubpatternOnceEnd");
+ break;
+ case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
+ outputTermIndexAndNest(idx, nesting++);
+ out.print("ParenthesesSubpatternTerminalBegin");
+ dumpInverted(term);
+ dumpInputPosition(term);
+ break;
+ case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
+ outputTermIndexAndNest(idx, --nesting);
+ out.print("ParenthesesSubpatternTerminalEnd");
+ break;
+ case ByteTerm::TypeParentheticalAssertionBegin:
+ outputTermIndexAndNest(idx, nesting++);
+ out.print("ParentheticalAssertionBegin");
+ dumpInverted(term);
+ dumpInputPosition(term);
+ break;
+ case ByteTerm::TypeParentheticalAssertionEnd:
+ outputTermIndexAndNest(idx, --nesting);
+ out.print("ParentheticalAssertionEnd");
+ break;
+ case ByteTerm::TypeCheckInput:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("CheckInput ", term.checkInputCount);
+ break;
+ case ByteTerm::TypeUncheckInput:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("UncheckInput ", term.checkInputCount);
+ break;
+ case ByteTerm::TypeDotStarEnclosure:
+ outputTermIndexAndNest(idx, nesting);
+ out.print("DotStarEnclosure");
+ break;
+ }
+ if (outputNewline)
+ out.print("\n");
+ }
+ }
+#endif
+
private:
YarrPattern& m_pattern;
std::unique_ptr<ByteDisjunction> m_bodyDisjunction;
@@ -2152,7 +2386,7 @@
COMPILE_ASSERT(sizeof(BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
COMPILE_ASSERT(sizeof(BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
COMPILE_ASSERT(sizeof(BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
-COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) <= (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
} }
Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.cpp (225929 => 225930)
--- trunk/Source/_javascript_Core/yarr/YarrJIT.cpp 2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.cpp 2017-12-14 22:16:38 UTC (rev 225930)
@@ -58,20 +58,25 @@
#define HAVE_INITIAL_START_REG
#elif CPU(ARM64)
+ // Argument registers
static const RegisterID input = ARM64Registers::x0;
static const RegisterID index = ARM64Registers::x1;
static const RegisterID length = ARM64Registers::x2;
static const RegisterID output = ARM64Registers::x3;
+ static const RegisterID freelistRegister = ARM64Registers::x4;
+ static const RegisterID freelistSizeRegister = ARM64Registers::x5;
- static const RegisterID regT0 = ARM64Registers::x4;
- static const RegisterID regT1 = ARM64Registers::x5;
- static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x6;
- static const RegisterID regUnicodeTemp = ARM64Registers::x7;
- static const RegisterID initialStart = ARM64Registers::x8;
- static const RegisterID supplementaryPlanesBase = ARM64Registers::x9;
- static const RegisterID surrogateTagMask = ARM64Registers::x10;
- static const RegisterID leadingSurrogateTag = ARM64Registers::x11;
- static const RegisterID trailingSurrogateTag = ARM64Registers::x12;
+ // Scratch registers
+ static const RegisterID regT0 = ARM64Registers::x6;
+ static const RegisterID regT1 = ARM64Registers::x7;
+ static const RegisterID regT2 = ARM64Registers::x8;
+ static const RegisterID remainingMatchCount = ARM64Registers::x9;
+ static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x10;
+ static const RegisterID initialStart = ARM64Registers::x11;
+ static const RegisterID supplementaryPlanesBase = ARM64Registers::x12;
+ static const RegisterID surrogateTagMask = ARM64Registers::x13;
+ static const RegisterID leadingSurrogateTag = ARM64Registers::x14;
+ static const RegisterID trailingSurrogateTag = ARM64Registers::x15;
static const RegisterID returnRegister = ARM64Registers::x0;
static const RegisterID returnRegister2 = ARM64Registers::x1;
@@ -105,10 +110,13 @@
static const RegisterID returnRegister2 = X86Registers::edx;
#elif CPU(X86_64)
#if !OS(WINDOWS)
+ // Argument registers
static const RegisterID input = X86Registers::edi;
static const RegisterID index = X86Registers::esi;
static const RegisterID length = X86Registers::edx;
static const RegisterID output = X86Registers::ecx;
+ static const RegisterID freelistRegister = X86Registers::r8;
+ static const RegisterID freelistSizeRegister = X86Registers::r9; // Only used during initialization.
#else
// If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
// http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
@@ -119,23 +127,23 @@
static const RegisterID output = X86Registers::r10;
#endif
+ // Scratch registers
static const RegisterID regT0 = X86Registers::eax;
#if !OS(WINDOWS)
- static const RegisterID regT1 = X86Registers::r8;
+ static const RegisterID regT1 = X86Registers::r9;
+ static const RegisterID regT2 = X86Registers::r10;
#else
static const RegisterID regT1 = X86Registers::ecx;
+ static const RegisterID regT2 = X86Registers::edi;
#endif
static const RegisterID initialStart = X86Registers::ebx;
#if !OS(WINDOWS)
- static const RegisterID regUnicodeInputAndTrail = X86Registers::r9;
- static const RegisterID regUnicodeTemp = X86Registers::r10;
+ static const RegisterID remainingMatchCount = X86Registers::r12;
#else
- static const RegisterID regUnicodeInputAndTrail = X86Registers::esi;
- static const RegisterID regUnicodeTemp = X86Registers::edi;
+ static const RegisterID remainingMatchCount = X86Registers::esi;
#endif
- static const RegisterID supplementaryPlanesBase = X86Registers::r12;
- static const RegisterID surrogateTagMask = X86Registers::r13;
+ static const RegisterID regUnicodeInputAndTrail = X86Registers::r13;
static const RegisterID leadingSurrogateTag = X86Registers::r14;
static const RegisterID trailingSurrogateTag = X86Registers::r15;
@@ -142,10 +150,165 @@
static const RegisterID returnRegister = X86Registers::eax;
static const RegisterID returnRegister2 = X86Registers::edx;
+ const TrustedImm32 supplementaryPlanesBase = TrustedImm32(0x10000);
+ const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
#define HAVE_INITIAL_START_REG
#define JIT_UNICODE_EXPRESSIONS
#endif
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ struct ParenContextSizes {
+ size_t m_numSubpatterns;
+ size_t m_frameSlots;
+
+ ParenContextSizes(size_t numSubpatterns, size_t frameSlots)
+ : m_numSubpatterns(numSubpatterns)
+ , m_frameSlots(frameSlots)
+ {
+ }
+
+ size_t numSubpatterns() { return m_numSubpatterns; }
+
+ size_t frameSlots() { return m_frameSlots; }
+ };
+
+ struct ParenContext {
+ struct ParenContext* next;
+ uint32_t begin;
+ uint32_t matchAmount;
+ uintptr_t returnAddress;
+ struct Subpatterns {
+ unsigned start;
+ unsigned end;
+ } subpatterns[0];
+ uintptr_t frameSlots[0];
+
+ static size_t sizeFor(ParenContextSizes& parenContextSizes)
+ {
+ return sizeof(ParenContext) + sizeof(Subpatterns) * parenContextSizes.numSubpatterns() + sizeof(uintptr_t) * parenContextSizes.frameSlots();
+ }
+
+ static ptrdiff_t nextOffset()
+ {
+ return offsetof(ParenContext, next);
+ }
+
+ static ptrdiff_t beginOffset()
+ {
+ return offsetof(ParenContext, begin);
+ }
+
+ static ptrdiff_t matchAmountOffset()
+ {
+ return offsetof(ParenContext, matchAmount);
+ }
+
+ static ptrdiff_t returnAddressOffset()
+ {
+ return offsetof(ParenContext, returnAddress);
+ }
+
+ static ptrdiff_t subpatternOffset(size_t subpattern)
+ {
+ return offsetof(ParenContext, subpatterns) + (subpattern - 1) * sizeof(Subpatterns);
+ }
+
+ static ptrdiff_t savedFrameOffset(ParenContextSizes& parenContextSizes)
+ {
+ return offsetof(ParenContext, subpatterns) + (parenContextSizes.numSubpatterns()) * sizeof(Subpatterns);
+ }
+ };
+
+ void initParenContextFreeList()
+ {
+ RegisterID parenContextPointer = regT0;
+ RegisterID nextParenContextPointer = regT2;
+
+ size_t parenContextSize = ParenContext::sizeFor(m_parenContextSizes);
+
+ parenContextSize = WTF::roundUpToMultipleOf<sizeof(uintptr_t)>(parenContextSize);
+
+ // Check that the paren context is a reasonable size.
+ if (parenContextSize > INT16_MAX)
+ m_abortExecution.append(jump());
+
+ Jump emptyFreeList = branchTestPtr(Zero, freelistRegister);
+ move(freelistRegister, parenContextPointer);
+ addPtr(TrustedImm32(parenContextSize), freelistRegister, nextParenContextPointer);
+ addPtr(freelistRegister, freelistSizeRegister);
+ subPtr(TrustedImm32(parenContextSize), freelistSizeRegister);
+
+ Label loopTop(this);
+ Jump initDone = branchPtr(Above, nextParenContextPointer, freelistSizeRegister);
+ storePtr(nextParenContextPointer, Address(parenContextPointer, ParenContext::nextOffset()));
+ move(nextParenContextPointer, parenContextPointer);
+ addPtr(TrustedImm32(parenContextSize), parenContextPointer, nextParenContextPointer);
+ jump(loopTop);
+
+ initDone.link(this);
+ storePtr(TrustedImmPtr(0), Address(parenContextPointer, ParenContext::nextOffset()));
+ emptyFreeList.link(this);
+ }
+
+ void allocateParenContext(RegisterID result)
+ {
+ m_abortExecution.append(branchTestPtr(Zero, freelistRegister));
+ sub32(TrustedImm32(1), remainingMatchCount);
+ m_hitMatchLimit.append(branchTestPtr(Zero, remainingMatchCount));
+ move(freelistRegister, result);
+ loadPtr(Address(freelistRegister, ParenContext::nextOffset()), freelistRegister);
+ }
+
+ void freeParenContext(RegisterID headPtrRegister, RegisterID newHeadPtrRegister)
+ {
+ loadPtr(Address(headPtrRegister, ParenContext::nextOffset()), newHeadPtrRegister);
+ storePtr(freelistRegister, Address(headPtrRegister, ParenContext::nextOffset()));
+ move(headPtrRegister, freelistRegister);
+ }
+
+ void saveParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
+ {
+ store32(index, Address(parenContextReg, ParenContext::beginOffset()));
+ loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), tempReg);
+ store32(tempReg, Address(parenContextReg, ParenContext::matchAmountOffset()));
+ loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex(), tempReg);
+ storePtr(tempReg, Address(parenContextReg, ParenContext::returnAddressOffset()));
+ if (compileMode == IncludeSubpatterns) {
+ for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
+ loadPtr(Address(output, (subpattern << 1) * sizeof(unsigned)), tempReg);
+ storePtr(tempReg, Address(parenContextReg, ParenContext::subpatternOffset(subpattern)));
+ clearSubpatternStart(subpattern);
+ }
+ }
+ subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
+ for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
+ loadFromFrame(frameLocation, tempReg);
+ storePtr(tempReg, Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)));
+ }
+ }
+
+ void restoreParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
+ {
+ load32(Address(parenContextReg, ParenContext::beginOffset()), index);
+ storeToFrame(index, subpatternBaseFrameLocation + BackTrackInfoParentheses::beginIndex());
+ load32(Address(parenContextReg, ParenContext::matchAmountOffset()), tempReg);
+ storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
+ loadPtr(Address(parenContextReg, ParenContext::returnAddressOffset()), tempReg);
+ storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
+ if (compileMode == IncludeSubpatterns) {
+ for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
+ loadPtr(Address(parenContextReg, ParenContext::subpatternOffset(subpattern)), tempReg);
+ storePtr(tempReg, Address(output, (subpattern << 1) * sizeof(unsigned)));
+ }
+ }
+ subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
+ for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
+ loadPtr(Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)), tempReg);
+ storeToFrame(tempReg, frameLocation);
+ }
+ }
+#endif
+
void optimizeAlternative(PatternAlternative* alternative)
{
if (!alternative->m_terms.size())
@@ -354,14 +517,14 @@
JumpList notUnicode;
load16Unaligned(regUnicodeInputAndTrail, resultReg);
- and32(surrogateTagMask, resultReg, regUnicodeTemp);
- notUnicode.append(branch32(NotEqual, regUnicodeTemp, leadingSurrogateTag));
+ and32(surrogateTagMask, resultReg, regT2);
+ notUnicode.append(branch32(NotEqual, regT2, leadingSurrogateTag));
addPtr(TrustedImm32(2), regUnicodeInputAndTrail);
- getEffectiveAddress(BaseIndex(input, length, TimesTwo), regUnicodeTemp);
- notUnicode.append(branchPtr(AboveOrEqual, regUnicodeInputAndTrail, regUnicodeTemp));
+ getEffectiveAddress(BaseIndex(input, length, TimesTwo), regT2);
+ notUnicode.append(branch32(AboveOrEqual, regUnicodeInputAndTrail, regT2));
load16Unaligned(Address(regUnicodeInputAndTrail), regUnicodeInputAndTrail);
- and32(surrogateTagMask, regUnicodeInputAndTrail, regUnicodeTemp);
- notUnicode.append(branch32(NotEqual, regUnicodeTemp, trailingSurrogateTag));
+ and32(surrogateTagMask, regUnicodeInputAndTrail, regT2);
+ notUnicode.append(branch32(NotEqual, regT2, trailingSurrogateTag));
sub32(leadingSurrogateTag, resultReg);
sub32(trailingSurrogateTag, regUnicodeInputAndTrail);
lshift32(TrustedImm32(10), resultReg);
@@ -422,6 +585,13 @@
poke(imm, frameLocation);
}
+#if CPU(ARM64) || CPU(X86_64)
+ void storeToFrame(TrustedImmPtr imm, unsigned frameLocation)
+ {
+ poke(imm, frameLocation);
+ }
+#endif
+
DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
{
return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
@@ -467,7 +637,30 @@
generateReturn();
}
- // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns.
+ void generateJITFailReturn()
+ {
+ if (m_abortExecution.empty() && m_hitMatchLimit.empty())
+ return;
+
+ JumpList finishExiting;
+ if (!m_abortExecution.empty()) {
+ m_abortExecution.link(this);
+ move(TrustedImmPtr((void*)static_cast<size_t>(-2)), returnRegister);
+ finishExiting.append(jump());
+ }
+
+ if (!m_hitMatchLimit.empty()) {
+ m_hitMatchLimit.link(this);
+ move(TrustedImmPtr((void*)static_cast<size_t>(-1)), returnRegister);
+ }
+
+ finishExiting.link(this);
+ removeCallFrame();
+ move(TrustedImm32(0), returnRegister2);
+ generateReturn();
+ }
+
+ // Used to record subpatterns, should only be called if compileMode is IncludeSubpatterns.
void setSubpatternStart(RegisterID reg, unsigned subpattern)
{
ASSERT(subpattern);
@@ -487,6 +680,12 @@
store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
}
+ void clearMatches(unsigned subpattern, unsigned lastSubpattern)
+ {
+ for (; subpattern <= lastSubpattern; subpattern++)
+ clearSubpatternStart(subpattern);
+ }
+
// We use one of three different strategies to track the start of the current match,
// while matching.
// 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
@@ -529,7 +728,7 @@
OpNestedAlternativeNext,
OpNestedAlternativeEnd,
// Used for alternatives in subpatterns where there is only a single
- // alternative (backtrackingis easier in these cases), or for alternatives
+ // alternative (backtracking is easier in these cases), or for alternatives
// which never need to be backtracked (those in parenthetical assertions,
// terminal subpatterns).
OpSimpleNestedAlternativeBegin,
@@ -541,6 +740,9 @@
// Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
OpParenthesesSubpatternTerminalBegin,
OpParenthesesSubpatternTerminalEnd,
+ // Used to wrap generic captured matches
+ OpParenthesesSubpatternBegin,
+ OpParenthesesSubpatternEnd,
// Used to wrap parenthetical assertions.
OpParentheticalAssertionBegin,
OpParentheticalAssertionEnd,
@@ -1768,10 +1970,7 @@
// In the non-simple case, store a 'return address' so we can backtrack correctly.
if (op.m_op == OpNestedAlternativeNext) {
unsigned parenthesesFrameLocation = term->frameLocation;
- unsigned alternativeFrameLocation = parenthesesFrameLocation;
- if (term->quantityType != QuantifierFixedCount)
- alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
- op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
+ op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
}
if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
@@ -1818,10 +2017,7 @@
// In the non-simple case, store a 'return address' so we can backtrack correctly.
if (op.m_op == OpNestedAlternativeEnd) {
unsigned parenthesesFrameLocation = term->frameLocation;
- unsigned alternativeFrameLocation = parenthesesFrameLocation;
- if (term->quantityType != QuantifierFixedCount)
- alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
- op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
+ op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
}
if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
@@ -1963,7 +2159,7 @@
pastBreakpoint.link(this);
}
- // We know that the match is non-zero, we can accept it and
+ // We know that the match is non-zero, we can accept it and
// loop back up to the head of the subpattern.
jump(beginOp.m_reentry);
@@ -1973,6 +2169,131 @@
break;
}
+ // OpParenthesesSubpatternBegin/End
+ //
+ // These nodes support generic subpatterns.
+ case OpParenthesesSubpatternBegin: {
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ PatternTerm* term = op.m_term;
+ unsigned parenthesesFrameLocation = term->frameLocation;
+
+ // Upon entry to a Greedy quantified set of parenthese store the index.
+ // We'll use this for two purposes:
+ // - To indicate which iteration we are on of mathing the remainder of
+ // the _expression_ after the parentheses - the first, including the
+ // match within the parentheses, or the second having skipped over them.
+ // - To check for empty matches, which must be rejected.
+ //
+ // At the head of a NonGreedy set of parentheses we'll immediately set the
+ // value on the stack to -1 (indicating a match skipping the subpattern),
+ // and plant a jump to the end. We'll also plant a label to backtrack to
+ // to reenter the subpattern later, with a store to set up index on the
+ // second iteration.
+ //
+ // FIXME: for capturing parens, could use the index in the capture array?
+ if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) {
+ storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
+ storeToFrame(TrustedImmPtr(0), parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
+
+ if (term->quantityType == QuantifierNonGreedy) {
+ storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
+ op.m_jumps.append(jump());
+ }
+
+ op.m_reentry = label();
+ RegisterID currParenContextReg = regT0;
+ RegisterID newParenContextReg = regT1;
+
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
+ allocateParenContext(newParenContextReg);
+ storePtr(currParenContextReg, newParenContextReg);
+ storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
+ saveParenContext(newParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
+ storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
+ }
+
+ // If the parenthese are capturing, store the starting index value to the
+ // captures array, offsetting as necessary.
+ //
+ // FIXME: could avoid offsetting this value in JIT code, apply
+ // offsets only afterwards, at the point the results array is
+ // being accessed.
+ if (term->capture() && compileMode == IncludeSubpatterns) {
+ const RegisterID indexTemporary = regT0;
+ unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
+ if (term->quantityType == QuantifierFixedCount)
+ inputOffset += term->parentheses.disjunction->m_minimumSize;
+ if (inputOffset) {
+ move(index, indexTemporary);
+ sub32(Imm32(inputOffset), indexTemporary);
+ setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
+ } else
+ setSubpatternStart(index, term->parentheses.subpatternId);
+ }
+#else // !JIT_ALL_PARENS_EXPRESSIONS
+ RELEASE_ASSERT_NOT_REACHED();
+#endif
+ break;
+ }
+ case OpParenthesesSubpatternEnd: {
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ PatternTerm* term = op.m_term;
+ unsigned parenthesesFrameLocation = term->frameLocation;
+
+ // Runtime ASSERT to make sure that the nested alternative handled the
+ // "no input consumed" check.
+ if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
+ Jump pastBreakpoint;
+ pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)));
+ abortWithReason(YARRNoInputConsumed);
+ pastBreakpoint.link(this);
+ }
+
+ const RegisterID countTemporary = regT1;
+
+ YarrOp& beginOp = m_ops[op.m_previousOp];
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
+ add32(TrustedImm32(1), countTemporary);
+ storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
+
+ // If the parenthese are capturing, store the ending index value to the
+ // captures array, offsetting as necessary.
+ //
+ // FIXME: could avoid offsetting this value in JIT code, apply
+ // offsets only afterwards, at the point the results array is
+ // being accessed.
+ if (term->capture() && compileMode == IncludeSubpatterns) {
+ const RegisterID indexTemporary = regT0;
+
+ unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
+ if (inputOffset) {
+ move(index, indexTemporary);
+ sub32(Imm32(inputOffset), indexTemporary);
+ setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
+ } else
+ setSubpatternEnd(index, term->parentheses.subpatternId);
+ }
+
+ // If the parentheses are quantified Greedy then add a label to jump back
+ // to if get a failed match from after the parentheses. For NonGreedy
+ // parentheses, link the jump from before the subpattern to here.
+ if (term->quantityType == QuantifierGreedy) {
+ if (term->quantityMaxCount != quantifyInfinite)
+ branch32(Below, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(beginOp.m_reentry, this);
+ else
+ jump(beginOp.m_reentry);
+
+ op.m_reentry = label();
+ } else if (term->quantityType == QuantifierNonGreedy) {
+ YarrOp& beginOp = m_ops[op.m_previousOp];
+ beginOp.m_jumps.link(this);
+ }
+#else // !JIT_ALL_PARENS_EXPRESSIONS
+ RELEASE_ASSERT_NOT_REACHED();
+#endif
+ break;
+ }
+
// OpParentheticalAssertionBegin/End
case OpParentheticalAssertionBegin: {
PatternTerm* term = op.m_term;
@@ -2391,10 +2712,7 @@
// Plant a jump to the return address.
unsigned parenthesesFrameLocation = term->frameLocation;
- unsigned alternativeFrameLocation = parenthesesFrameLocation;
- if (term->quantityType != QuantifierFixedCount)
- alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
- loadFromFrameAndJump(alternativeFrameLocation);
+ loadFromFrameAndJump(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
// Link the DataLabelPtr associated with the end of the last
// alternative to this point.
@@ -2425,7 +2743,7 @@
PatternTerm* term = op.m_term;
ASSERT(term->quantityMaxCount == 1);
- // We only need to backtrack to thispoint if capturing or greedy.
+ // We only need to backtrack to this point if capturing or greedy.
if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
m_backtrackingState.link(this);
@@ -2459,7 +2777,7 @@
// are currently in a state where we had skipped over the subpattern
// (in which case the flag value on the stack will be -1).
unsigned parenthesesFrameLocation = term->frameLocation;
- Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
+ Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
if (term->quantityType == QuantifierGreedy) {
// For Greedy parentheses, we skip after having already tried going
@@ -2503,6 +2821,108 @@
m_backtrackingState.append(op.m_jumps);
break;
+ // OpParenthesesSubpatternBegin/End
+ //
+ // When we are backtracking back out of a capturing subpattern we need
+ // to clear the start index in the matches output array, to record that
+ // this subpattern has not been captured.
+ //
+ // When backtracking back out of a Greedy quantified subpattern we need
+ // to catch this, and try running the remainder of the alternative after
+ // the subpattern again, skipping the parentheses.
+ //
+ // Upon backtracking back into a quantified set of parentheses we need to
+ // check whether we were currently skipping the subpattern. If not, we
+ // can backtrack into them, if we were we need to either backtrack back
+ // out of the start of the parentheses, or jump back to the forwards
+ // matching start, depending of whether the match is Greedy or NonGreedy.
+ case OpParenthesesSubpatternBegin: {
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ PatternTerm* term = op.m_term;
+ unsigned parenthesesFrameLocation = term->frameLocation;
+
+ if (term->quantityType != QuantifierFixedCount) {
+ m_backtrackingState.link(this);
+
+ if (term->quantityType == QuantifierGreedy) {
+ RegisterID currParenContextReg = regT0;
+ RegisterID newParenContextReg = regT1;
+
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
+
+ restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
+
+ freeParenContext(currParenContextReg, newParenContextReg);
+ storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
+ const RegisterID countTemporary = regT0;
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
+ Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
+
+ sub32(TrustedImm32(1), countTemporary);
+ storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
+
+ jump(m_ops[op.m_nextOp].m_reentry);
+
+ zeroLengthMatch.link(this);
+
+ // Clear the flag in the stackframe indicating we didn't run through the subpattern.
+ storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
+
+ jump(m_ops[op.m_nextOp].m_reentry);
+ }
+
+ // If Greedy, jump to the end.
+ if (term->quantityType == QuantifierGreedy) {
+ // A backtrack from after the parentheses, when skipping the subpattern,
+ // will jump back to here.
+ op.m_jumps.link(this);
+ }
+
+ m_backtrackingState.fallthrough();
+ }
+#else // !JIT_ALL_PARENS_EXPRESSIONS
+ RELEASE_ASSERT_NOT_REACHED();
+#endif
+ break;
+ }
+ case OpParenthesesSubpatternEnd: {
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ PatternTerm* term = op.m_term;
+
+ if (term->quantityType != QuantifierFixedCount) {
+ m_backtrackingState.link(this);
+
+ // Check whether we should backtrack back into the parentheses, or if we
+ // are currently in a state where we had skipped over the subpattern
+ // (in which case the flag value on the stack will be -1).
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
+
+ if (term->quantityType == QuantifierGreedy) {
+ // For Greedy parentheses, we skip after having already tried going
+ // through the subpattern, so if we get here we're done.
+ YarrOp& beginOp = m_ops[op.m_previousOp];
+ beginOp.m_jumps.append(hadSkipped);
+ } else {
+ // For NonGreedy parentheses, we try skipping the subpattern first,
+ // so if we get here we need to try running through the subpattern
+ // next. Jump back to the start of the parentheses in the forwards
+ // matching path.
+ ASSERT(term->quantityType == QuantifierNonGreedy);
+ YarrOp& beginOp = m_ops[op.m_previousOp];
+ hadSkipped.linkTo(beginOp.m_reentry, this);
+ }
+
+ m_backtrackingState.fallthrough();
+ }
+
+ m_backtrackingState.append(op.m_jumps);
+#else // !JIT_ALL_PARENS_EXPRESSIONS
+ RELEASE_ASSERT_NOT_REACHED();
+#endif
+ break;
+ }
+
// OpParentheticalAssertionBegin/End
case OpParentheticalAssertionBegin: {
PatternTerm* term = op.m_term;
@@ -2562,9 +2982,9 @@
// Emits ops for a subpattern (set of parentheses). These consist
// of a set of alternatives wrapped in an outer set of nodes for
// the parentheses.
- // Supported types of parentheses are 'Once' (quantityMaxCount == 1)
- // and 'Terminal' (non-capturing parentheses quantified as greedy
- // and infinite).
+ // Supported types of parentheses are 'Once' (quantityMaxCount == 1),
+ // 'Terminal' (non-capturing parentheses quantified as greedy
+ // and infinite), and 0 based greedy quantified parentheses.
// Alternatives will use the 'Simple' set of ops if either the
// subpattern is terminal (in which case we will never need to
// backtrack), or if the subpattern only contains one alternative.
@@ -2584,6 +3004,8 @@
// need to restore the capture from the first subpattern upon a
// failure in the second.
if (term->quantityMinCount && term->quantityMinCount != term->quantityMaxCount) {
+ if (Options::dumpCompiledRegExpPatterns())
+ dataLogF("Can't JIT a variable counted parenthesis with a non-zero minimum\n");
m_shouldFallBack = true;
return;
} if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) {
@@ -2602,9 +3024,31 @@
parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
} else {
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ // We only handle generic parenthesis with greedy counts.
+ if (term->quantityType != QuantifierGreedy) {
+ // This subpattern is not supported by the JIT.
+ m_shouldFallBack = true;
+ return;
+ }
+
+ m_containsNestedSubpatterns = true;
+
+ // Select the 'Generic' nodes.
+ parenthesesBeginOpCode = OpParenthesesSubpatternBegin;
+ parenthesesEndOpCode = OpParenthesesSubpatternEnd;
+
+ // If there is more than one alternative we cannot use the 'simple' nodes.
+ if (term->parentheses.disjunction->m_alternatives.size() != 1) {
+ alternativeBeginOpCode = OpNestedAlternativeBegin;
+ alternativeNextOpCode = OpNestedAlternativeNext;
+ alternativeEndOpCode = OpNestedAlternativeEnd;
+ }
+#else
// This subpattern is not supported by the JIT.
m_shouldFallBack = true;
return;
+#endif
}
size_t parenBegin = m_ops.size();
@@ -2831,18 +3275,21 @@
if (m_pattern.m_saveInitialStartValue)
push(X86Registers::ebx);
- if (m_decodeSurrogatePairs) {
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ if (m_containsNestedSubpatterns) {
#if OS(WINDOWS)
push(X86Registers::edi);
push(X86Registers::esi);
#endif
push(X86Registers::r12);
+ }
+#endif
+
+ if (m_decodeSurrogatePairs) {
push(X86Registers::r13);
push(X86Registers::r14);
push(X86Registers::r15);
- move(TrustedImm32(0x10000), supplementaryPlanesBase);
- move(TrustedImm32(0xfffffc00), surrogateTagMask);
move(TrustedImm32(0xd800), leadingSurrogateTag);
move(TrustedImm32(0xdc00), trailingSurrogateTag);
}
@@ -2912,6 +3359,10 @@
pop(X86Registers::r15);
pop(X86Registers::r14);
pop(X86Registers::r13);
+ }
+
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ if (m_containsNestedSubpatterns) {
pop(X86Registers::r12);
#if OS(WINDOWS)
pop(X86Registers::esi);
@@ -2918,6 +3369,7 @@
pop(X86Registers::edi);
#endif
}
+#endif
if (m_pattern.m_saveInitialStartValue)
pop(X86Registers::ebx);
@@ -2949,6 +3401,10 @@
, m_decodeSurrogatePairs(m_charSize == Char16 && m_pattern.unicode())
, m_unicodeIgnoreCase(m_pattern.unicode() && m_pattern.ignoreCase())
, m_canonicalMode(m_pattern.unicode() ? CanonicalMode::Unicode : CanonicalMode::UCS2)
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ , m_containsNestedSubpatterns(false)
+ , m_parenContextSizes(compileMode == IncludeSubpatterns ? m_pattern.m_numSubpatterns : 0, m_pattern.m_body->m_callFrameSize)
+#endif
{
}
@@ -2961,6 +3417,15 @@
}
#endif
+ // We need to compile before generating code since we set flags based on compilation that
+ // are used during generation.
+ opCompileBody(m_pattern.m_body);
+
+ if (m_shouldFallBack) {
+ jitObject.setFallBack(true);
+ return;
+ }
+
generateEnter();
Jump hasInput = checkInput();
@@ -2967,6 +3432,11 @@
generateFailReturn();
hasInput.link(this);
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ if (m_containsNestedSubpatterns)
+ move(TrustedImm32(matchLimit), remainingMatchCount);
+#endif
+
if (compileMode == IncludeSubpatterns) {
for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i)
store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int)));
@@ -2977,6 +3447,11 @@
initCallFrame();
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ if (m_containsNestedSubpatterns)
+ initParenContextFreeList();
+#endif
+
if (m_pattern.m_saveInitialStartValue) {
#ifdef HAVE_INITIAL_START_REG
move(index, initialStart);
@@ -2985,18 +3460,13 @@
#endif
}
- opCompileBody(m_pattern.m_body);
-
- if (m_shouldFallBack) {
- jitObject.setFallBack(true);
- return;
- }
-
generate();
backtrack();
generateTryReadUnicodeCharacterHelper();
+ generateJITFailReturn();
+
LinkBuffer linkBuffer(*this, REGEXP_CODE_ID, JITCompilationCanFail);
if (linkBuffer.didFailToAllocate()) {
jitObject.setFallBack(true);
@@ -3040,6 +3510,12 @@
bool m_decodeSurrogatePairs;
bool m_unicodeIgnoreCase;
CanonicalMode m_canonicalMode;
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ bool m_containsNestedSubpatterns;
+ ParenContextSizes m_parenContextSizes;
+#endif
+ JumpList m_abortExecution;
+ JumpList m_hitMatchLimit;
Vector<Call> m_tryReadUnicodeCharacterCalls;
Label m_tryReadUnicodeCharacterEntry;
Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.h (225929 => 225930)
--- trunk/Source/_javascript_Core/yarr/YarrJIT.h 2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.h 2017-12-14 22:16:38 UTC (rev 225930)
@@ -38,6 +38,11 @@
#define YARR_CALL
#endif
+#if CPU(ARM64) || (CPU(X86_64) && !OS(WINDOWS))
+#define JIT_ALL_PARENS_EXPRESSIONS
+constexpr size_t patternContextBufferSize = 8192; // Space caller allocates to save nested parenthesis context
+#endif
+
namespace JSC {
class VM;
@@ -47,10 +52,17 @@
class YarrCodeBlock {
#if CPU(X86_64) || CPU(ARM64)
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ typedef MatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize) YARR_CALL;
+ typedef MatchResult (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize) YARR_CALL;
+ typedef MatchResult (*YarrJITCodeMatchOnly8)(const LChar* input, unsigned start, unsigned length, void*, void* freeParenContext, unsigned parenContextSize) YARR_CALL;
+ typedef MatchResult (*YarrJITCodeMatchOnly16)(const UChar* input, unsigned start, unsigned length, void*, void* freeParenContext, unsigned parenContextSize) YARR_CALL;
+#else
typedef MatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
typedef MatchResult (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
typedef MatchResult (*YarrJITCodeMatchOnly8)(const LChar* input, unsigned start, unsigned length) YARR_CALL;
typedef MatchResult (*YarrJITCodeMatchOnly16)(const UChar* input, unsigned start, unsigned length) YARR_CALL;
+#endif
#else
typedef EncodedMatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
typedef EncodedMatchResult (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
@@ -81,6 +93,31 @@
void set8BitCodeMatchOnly(MacroAssemblerCodeRef matchOnly) { m_matchOnly8 = matchOnly; }
void set16BitCodeMatchOnly(MacroAssemblerCodeRef matchOnly) { m_matchOnly16 = matchOnly; }
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+ MatchResult execute(const LChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize)
+ {
+ ASSERT(has8BitCode());
+ return MatchResult(reinterpret_cast<YarrJITCode8>(m_ref8.code().executableAddress())(input, start, length, output, freeParenContext, parenContextSize));
+ }
+
+ MatchResult execute(const UChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize)
+ {
+ ASSERT(has16BitCode());
+ return MatchResult(reinterpret_cast<YarrJITCode16>(m_ref16.code().executableAddress())(input, start, length, output, freeParenContext, parenContextSize));
+ }
+
+ MatchResult execute(const LChar* input, unsigned start, unsigned length, void* freeParenContext, unsigned parenContextSize)
+ {
+ ASSERT(has8BitCodeMatchOnly());
+ return MatchResult(reinterpret_cast<YarrJITCodeMatchOnly8>(m_matchOnly8.code().executableAddress())(input, start, length, 0, freeParenContext, parenContextSize));
+ }
+
+ MatchResult execute(const UChar* input, unsigned start, unsigned length, void* freeParenContext, unsigned parenContextSize)
+ {
+ ASSERT(has16BitCodeMatchOnly());
+ return MatchResult(reinterpret_cast<YarrJITCodeMatchOnly16>(m_matchOnly16.code().executableAddress())(input, start, length, 0, freeParenContext, parenContextSize));
+ }
+#else
MatchResult execute(const LChar* input, unsigned start, unsigned length, int* output)
{
ASSERT(has8BitCode());
@@ -104,6 +141,7 @@
ASSERT(has16BitCodeMatchOnly());
return MatchResult(reinterpret_cast<YarrJITCodeMatchOnly16>(m_matchOnly16.code().executableAddress())(input, start, length));
}
+#endif
#if ENABLE(REGEXP_TRACING)
void *get8BitMatchOnlyAddr()
Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.cpp (225929 => 225930)
--- trunk/Source/_javascript_Core/yarr/YarrPattern.cpp 2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.cpp 2017-12-14 22:16:38 UTC (rev 225930)
@@ -828,8 +828,7 @@
// Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
term.frameLocation = currentCallFrameSize;
if (term.quantityMaxCount == 1 && !term.parentheses.isCopy) {
- if (term.quantityType != QuantifierFixedCount)
- currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+ currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce;
error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet(), currentCallFrameSize);
if (error)
return error;
@@ -845,11 +844,10 @@
term.inputPosition = currentInputPosition.unsafeGet();
} else {
term.inputPosition = currentInputPosition.unsafeGet();
- unsigned ignoredCallFrameSize;
- error = setupDisjunctionOffsets(term.parentheses.disjunction, 0, currentInputPosition.unsafeGet(), ignoredCallFrameSize);
+ currentCallFrameSize += YarrStackSpaceForBackTrackInfoParentheses;
+ error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet(), currentCallFrameSize);
if (error)
return error;
- currentCallFrameSize += YarrStackSpaceForBackTrackInfoParentheses;
}
// Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length.
alternative->m_hasFixedSize = false;
@@ -1185,7 +1183,7 @@
*error = compile(pattern, stackLimit);
}
-static void indentForNestingLevel(PrintStream& out, unsigned nestingDepth)
+void indentForNestingLevel(PrintStream& out, unsigned nestingDepth)
{
out.print(" ");
for (; nestingDepth; --nestingDepth)
@@ -1192,7 +1190,7 @@
out.print(" ");
}
-static void dumpUChar32(PrintStream& out, UChar32 c)
+void dumpUChar32(PrintStream& out, UChar32 c)
{
if (c >= ' '&& c <= 0xff)
out.printf("'%c'", static_cast<char>(c));
@@ -1200,6 +1198,79 @@
out.printf("0x%04x", c);
}
+void dumpCharacterClass(PrintStream& out, YarrPattern* pattern, CharacterClass* characterClass)
+{
+ if (characterClass == pattern->anyCharacterClass())
+ out.print("<any character>");
+ else if (characterClass == pattern->newlineCharacterClass())
+ out.print("<newline>");
+ else if (characterClass == pattern->digitsCharacterClass())
+ out.print("<digits>");
+ else if (characterClass == pattern->spacesCharacterClass())
+ out.print("<whitespace>");
+ else if (characterClass == pattern->wordcharCharacterClass())
+ out.print("<word>");
+ else if (characterClass == pattern->wordUnicodeIgnoreCaseCharCharacterClass())
+ out.print("<unicode ignore case>");
+ else if (characterClass == pattern->nondigitsCharacterClass())
+ out.print("<non-digits>");
+ else if (characterClass == pattern->nonspacesCharacterClass())
+ out.print("<non-whitespace>");
+ else if (characterClass == pattern->nonwordcharCharacterClass())
+ out.print("<non-word>");
+ else if (characterClass == pattern->nonwordUnicodeIgnoreCaseCharCharacterClass())
+ out.print("<unicode non-ignore case>");
+ else {
+ bool needMatchesRangesSeperator = false;
+
+ auto dumpMatches = [&] (const char* prefix, Vector<UChar32> matches) {
+ size_t matchesSize = matches.size();
+ if (matchesSize) {
+ if (needMatchesRangesSeperator)
+ out.print(",");
+ needMatchesRangesSeperator = true;
+
+ out.print(prefix, ":(");
+ for (size_t i = 0; i < matchesSize; ++i) {
+ if (i)
+ out.print(",");
+ dumpUChar32(out, matches[i]);
+ }
+ out.print(")");
+ }
+ };
+
+ auto dumpRanges = [&] (const char* prefix, Vector<CharacterRange> ranges) {
+ size_t rangeSize = ranges.size();
+ if (rangeSize) {
+ if (needMatchesRangesSeperator)
+ out.print(",");
+ needMatchesRangesSeperator = true;
+
+ out.print(prefix, " ranges:(");
+ for (size_t i = 0; i < rangeSize; ++i) {
+ if (i)
+ out.print(",");
+ CharacterRange range = ranges[i];
+ out.print("(");
+ dumpUChar32(out, range.begin);
+ out.print("..");
+ dumpUChar32(out, range.end);
+ out.print(")");
+ }
+ out.print(")");
+ }
+ };
+
+ out.print("[");
+ dumpMatches("ASCII", characterClass->m_matches);
+ dumpRanges("ASCII", characterClass->m_ranges);
+ dumpMatches("Unicode", characterClass->m_matchesUnicode);
+ dumpRanges("Unicode", characterClass->m_rangesUnicode);
+ out.print("]");
+ }
+}
+
void PatternAlternative::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth)
{
out.print("minimum size: ", m_minimumSize);
@@ -1239,8 +1310,10 @@
{
indentForNestingLevel(out, nestingDepth);
- if (invert() && (type != TypeParenthesesSubpattern && type != TypeParentheticalAssertion))
- out.print("not ");
+ if (type != TypeParenthesesSubpattern && type != TypeParentheticalAssertion) {
+ if (invert())
+ out.print("not ");
+ }
switch (type) {
case TypeAssertionBOL:
@@ -1254,6 +1327,7 @@
break;
case TypePatternCharacter:
out.printf("character ");
+ out.printf("inputPosition %u ", inputPosition);
if (thisPattern->ignoreCase() && isASCIIAlpha(patternCharacter)) {
dumpUChar32(out, toASCIIUpper(patternCharacter));
out.print("/");
@@ -1375,16 +1449,17 @@
if (parentheses.isTerminal)
out.print(",terminal");
- if (quantityMaxCount != 1 || parentheses.isCopy || quantityType != QuantifierFixedCount)
- out.println(",frame location ", frameLocation);
- else
- out.println();
+ out.println(",frame location ", frameLocation);
if (parentheses.disjunction->m_alternatives.size() > 1) {
indentForNestingLevel(out, nestingDepth + 1);
unsigned alternativeFrameLocation = frameLocation;
- if (quantityType != QuantifierFixedCount)
+ if (quantityMaxCount == 1 && !parentheses.isCopy)
alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+ else if (parentheses.isTerminal)
+ alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesTerminal;
+ else
+ alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
out.println("alternative list,frame location ", alternativeFrameLocation);
}
@@ -1461,6 +1536,8 @@
out.print(")");
}
out.print(":\n");
+ if (m_body->m_callFrameSize)
+ out.print(" callframe size: ", m_body->m_callFrameSize, "\n");
m_body->dump(out, this);
}
Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.h (225929 => 225930)
--- trunk/Source/_javascript_Core/yarr/YarrPattern.h 2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.h 2017-12-14 22:16:38 UTC (rev 225930)
@@ -227,7 +227,13 @@
{
return m_capture;
}
-
+
+ bool containsAnyCaptures()
+ {
+ ASSERT(this->type == TypeParenthesesSubpattern);
+ return parentheses.lastSubpatternId >= parentheses.subpatternId;
+ }
+
void quantify(unsigned count, QuantifierType type)
{
quantityMinCount = 0;
@@ -549,6 +555,10 @@
HashMap<unsigned, CharacterClass*> unicodePropertiesCached;
};
+ void indentForNestingLevel(PrintStream&, unsigned);
+ void dumpUChar32(PrintStream&, UChar32);
+ void dumpCharacterClass(PrintStream&, YarrPattern*, CharacterClass*);
+
struct BackTrackInfoPatternCharacter {
uintptr_t begin; // Only needed for unicode patterns
uintptr_t matchAmount;
@@ -574,9 +584,9 @@
};
struct BackTrackInfoAlternative {
- uintptr_t offset;
-
- static unsigned offsetIndex() { return offsetof(BackTrackInfoAlternative, offset) / sizeof(uintptr_t); }
+ union {
+ uintptr_t offset;
+ };
};
struct BackTrackInfoParentheticalAssertion {
@@ -587,8 +597,10 @@
struct BackTrackInfoParenthesesOnce {
uintptr_t begin;
+ uintptr_t returnAddress;
static unsigned beginIndex() { return offsetof(BackTrackInfoParenthesesOnce, begin) / sizeof(uintptr_t); }
+ static unsigned returnAddressIndex() { return offsetof(BackTrackInfoParenthesesOnce, returnAddress) / sizeof(uintptr_t); }
};
struct BackTrackInfoParenthesesTerminal {
@@ -597,4 +609,16 @@
static unsigned beginIndex() { return offsetof(BackTrackInfoParenthesesTerminal, begin) / sizeof(uintptr_t); }
};
+ struct BackTrackInfoParentheses {
+ uintptr_t begin;
+ uintptr_t returnAddress;
+ uintptr_t matchAmount;
+ uintptr_t parenContextHead;
+
+ static unsigned beginIndex() { return offsetof(BackTrackInfoParentheses, begin) / sizeof(uintptr_t); }
+ static unsigned returnAddressIndex() { return offsetof(BackTrackInfoParentheses, returnAddress) / sizeof(uintptr_t); }
+ static unsigned matchAmountIndex() { return offsetof(BackTrackInfoParentheses, matchAmount) / sizeof(uintptr_t); }
+ static unsigned parenContextHeadIndex() { return offsetof(BackTrackInfoParentheses, parenContextHead) / sizeof(uintptr_t); }
+ };
+
} } // namespace JSC::Yarr