Title: [225930] trunk
Revision
225930
Author
msab...@apple.com
Date
2017-12-14 14:16:38 -0800 (Thu, 14 Dec 2017)

Log Message

REGRESSION (r225695): Repro crash on yahoo login page
https://bugs.webkit.org/show_bug.cgi?id=180761

Reviewed by JF Bastien.

JSTests:

New regression test.

* stress/regress-180761.js: Added.

Source/_javascript_Core:

Relanding r225695 with a fix.

The fix is that we need to save the return address for a parentheses in
the ParenContext because it is actually used by any immediately contained
alternatives.

Also did a little refactoring, changing occurances of PatternContext to
ParenContext since that is the name of the structure.

* runtime/RegExp.cpp:
(JSC::byteCodeCompilePattern):
(JSC::RegExp::byteCodeCompileIfNecessary):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):
* runtime/RegExp.h:
* runtime/RegExpInlines.h:
(JSC::RegExp::matchInline):
* testRegExp.cpp:
(parseRegExpLine):
(runFromFiles):
* yarr/Yarr.h:
* yarr/YarrInterpreter.cpp:
(JSC::Yarr::ByteCompiler::compile):
(JSC::Yarr::ByteCompiler::dumpDisjunction):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::ParenContextSizes::ParenContextSizes):
(JSC::Yarr::YarrGenerator::ParenContextSizes::numSubpatterns):
(JSC::Yarr::YarrGenerator::ParenContextSizes::frameSlots):
(JSC::Yarr::YarrGenerator::ParenContext::sizeFor):
(JSC::Yarr::YarrGenerator::ParenContext::nextOffset):
(JSC::Yarr::YarrGenerator::ParenContext::beginOffset):
(JSC::Yarr::YarrGenerator::ParenContext::matchAmountOffset):
(JSC::Yarr::YarrGenerator::ParenContext::returnAddressOffset):
(JSC::Yarr::YarrGenerator::ParenContext::subpatternOffset):
(JSC::Yarr::YarrGenerator::ParenContext::savedFrameOffset):
(JSC::Yarr::YarrGenerator::initParenContextFreeList):
(JSC::Yarr::YarrGenerator::allocateParenContext):
(JSC::Yarr::YarrGenerator::freeParenContext):
(JSC::Yarr::YarrGenerator::saveParenContext):
(JSC::Yarr::YarrGenerator::restoreParenContext):
(JSC::Yarr::YarrGenerator::tryReadUnicodeCharImpl):
(JSC::Yarr::YarrGenerator::storeToFrame):
(JSC::Yarr::YarrGenerator::generateJITFailReturn):
(JSC::Yarr::YarrGenerator::clearMatches):
(JSC::Yarr::YarrGenerator::generate):
(JSC::Yarr::YarrGenerator::backtrack):
(JSC::Yarr::YarrGenerator::opCompileParenthesesSubpattern):
(JSC::Yarr::YarrGenerator::generateEnter):
(JSC::Yarr::YarrGenerator::generateReturn):
(JSC::Yarr::YarrGenerator::YarrGenerator):
(JSC::Yarr::YarrGenerator::compile):
* yarr/YarrJIT.h:
(JSC::Yarr::YarrCodeBlock::execute):
* yarr/YarrPattern.cpp:
(JSC::Yarr::indentForNestingLevel):
(JSC::Yarr::dumpUChar32):
(JSC::Yarr::dumpCharacterClass):
(JSC::Yarr::PatternTerm::dump):
(JSC::Yarr::YarrPattern::dumpPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::PatternTerm::containsAnyCaptures):
(JSC::Yarr::BackTrackInfoParenthesesOnce::returnAddressIndex):
(JSC::Yarr::BackTrackInfoParentheses::beginIndex):
(JSC::Yarr::BackTrackInfoParentheses::returnAddressIndex):
(JSC::Yarr::BackTrackInfoParentheses::matchAmountIndex):
(JSC::Yarr::BackTrackInfoParentheses::parenContextHeadIndex):
(JSC::Yarr::BackTrackInfoAlternative::offsetIndex): Deleted.

Modified Paths

Added Paths

Diff

Modified: trunk/JSTests/ChangeLog (225929 => 225930)


--- trunk/JSTests/ChangeLog	2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/JSTests/ChangeLog	2017-12-14 22:16:38 UTC (rev 225930)
@@ -1,3 +1,14 @@
+2017-12-14  Michael Saboff  <msab...@apple.com>
+
+        REGRESSION (r225695): Repro crash on yahoo login page
+        https://bugs.webkit.org/show_bug.cgi?id=180761
+
+        Reviewed by JF Bastien.
+
+        New regression test.
+
+        * stress/regress-180761.js: Added.
+
 2017-12-13  Keith Miller  <keith_mil...@apple.com>
 
         JSObjects should have a mask for loading indexed properties

Added: trunk/JSTests/stress/regress-180761.js (0 => 225930)


--- trunk/JSTests/stress/regress-180761.js	                        (rev 0)
+++ trunk/JSTests/stress/regress-180761.js	2017-12-14 22:16:38 UTC (rev 225930)
@@ -0,0 +1,6 @@
+//@ runDefault
+
+// Regression test for bug 180761.  This test should not crash.
+
+if (/(?:(?: |a)|\.a)* a*/.exec("/a.aaa") !== null)
+    throw "Expected /(?:(?: |a)|\.a)* a*/.exec(\"/a.aaa\") to not match";

Modified: trunk/Source/_javascript_Core/ChangeLog (225929 => 225930)


--- trunk/Source/_javascript_Core/ChangeLog	2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/ChangeLog	2017-12-14 22:16:38 UTC (rev 225930)
@@ -1,3 +1,78 @@
+2017-12-14  Michael Saboff  <msab...@apple.com>
+
+        REGRESSION (r225695): Repro crash on yahoo login page
+        https://bugs.webkit.org/show_bug.cgi?id=180761
+
+        Reviewed by JF Bastien.
+
+        Relanding r225695 with a fix.
+
+        The fix is that we need to save the return address for a parentheses in
+        the ParenContext because it is actually used by any immediately contained
+        alternatives.
+
+        Also did a little refactoring, changing occurances of PatternContext to
+        ParenContext since that is the name of the structure.
+
+        * runtime/RegExp.cpp:
+        (JSC::byteCodeCompilePattern):
+        (JSC::RegExp::byteCodeCompileIfNecessary):
+        (JSC::RegExp::compile):
+        (JSC::RegExp::compileMatchOnly):
+        * runtime/RegExp.h:
+        * runtime/RegExpInlines.h:
+        (JSC::RegExp::matchInline):
+        * testRegExp.cpp:
+        (parseRegExpLine):
+        (runFromFiles):
+        * yarr/Yarr.h:
+        * yarr/YarrInterpreter.cpp:
+        (JSC::Yarr::ByteCompiler::compile):
+        (JSC::Yarr::ByteCompiler::dumpDisjunction):
+        * yarr/YarrJIT.cpp:
+        (JSC::Yarr::YarrGenerator::ParenContextSizes::ParenContextSizes):
+        (JSC::Yarr::YarrGenerator::ParenContextSizes::numSubpatterns):
+        (JSC::Yarr::YarrGenerator::ParenContextSizes::frameSlots):
+        (JSC::Yarr::YarrGenerator::ParenContext::sizeFor):
+        (JSC::Yarr::YarrGenerator::ParenContext::nextOffset):
+        (JSC::Yarr::YarrGenerator::ParenContext::beginOffset):
+        (JSC::Yarr::YarrGenerator::ParenContext::matchAmountOffset):
+        (JSC::Yarr::YarrGenerator::ParenContext::returnAddressOffset):
+        (JSC::Yarr::YarrGenerator::ParenContext::subpatternOffset):
+        (JSC::Yarr::YarrGenerator::ParenContext::savedFrameOffset):
+        (JSC::Yarr::YarrGenerator::initParenContextFreeList):
+        (JSC::Yarr::YarrGenerator::allocateParenContext):
+        (JSC::Yarr::YarrGenerator::freeParenContext):
+        (JSC::Yarr::YarrGenerator::saveParenContext):
+        (JSC::Yarr::YarrGenerator::restoreParenContext):
+        (JSC::Yarr::YarrGenerator::tryReadUnicodeCharImpl):
+        (JSC::Yarr::YarrGenerator::storeToFrame):
+        (JSC::Yarr::YarrGenerator::generateJITFailReturn):
+        (JSC::Yarr::YarrGenerator::clearMatches):
+        (JSC::Yarr::YarrGenerator::generate):
+        (JSC::Yarr::YarrGenerator::backtrack):
+        (JSC::Yarr::YarrGenerator::opCompileParenthesesSubpattern):
+        (JSC::Yarr::YarrGenerator::generateEnter):
+        (JSC::Yarr::YarrGenerator::generateReturn):
+        (JSC::Yarr::YarrGenerator::YarrGenerator):
+        (JSC::Yarr::YarrGenerator::compile):
+        * yarr/YarrJIT.h:
+        (JSC::Yarr::YarrCodeBlock::execute):
+        * yarr/YarrPattern.cpp:
+        (JSC::Yarr::indentForNestingLevel):
+        (JSC::Yarr::dumpUChar32):
+        (JSC::Yarr::dumpCharacterClass):
+        (JSC::Yarr::PatternTerm::dump):
+        (JSC::Yarr::YarrPattern::dumpPattern):
+        * yarr/YarrPattern.h:
+        (JSC::Yarr::PatternTerm::containsAnyCaptures):
+        (JSC::Yarr::BackTrackInfoParenthesesOnce::returnAddressIndex):
+        (JSC::Yarr::BackTrackInfoParentheses::beginIndex):
+        (JSC::Yarr::BackTrackInfoParentheses::returnAddressIndex):
+        (JSC::Yarr::BackTrackInfoParentheses::matchAmountIndex):
+        (JSC::Yarr::BackTrackInfoParentheses::parenContextHeadIndex):
+        (JSC::Yarr::BackTrackInfoAlternative::offsetIndex): Deleted.
+
 2017-12-13  Keith Miller  <keith_mil...@apple.com>
 
         JSObjects should have a mask for loading indexed properties

Modified: trunk/Source/_javascript_Core/runtime/RegExp.cpp (225929 => 225930)


--- trunk/Source/_javascript_Core/runtime/RegExp.cpp	2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/runtime/RegExp.cpp	2017-12-14 22:16:38 UTC (rev 225930)
@@ -271,6 +271,30 @@
     return vm.regExpCache()->lookupOrCreate(patternString, flags);
 }
 
+
+static std::unique_ptr<Yarr::BytecodePattern> byteCodeCompilePattern(VM* vm, Yarr::YarrPattern& pattern)
+{
+    return Yarr::byteCompile(pattern, &vm->m_regExpAllocator, &vm->m_regExpAllocatorLock);
+}
+
+void RegExp::byteCodeCompileIfNecessary(VM* vm)
+{
+    if (m_regExpBytecode)
+        return;
+
+    Yarr::YarrPattern pattern(m_patternString, m_flags, &m_constructionError, vm->stackLimit());
+    if (m_constructionError) {
+        RELEASE_ASSERT_NOT_REACHED();
+#if COMPILER_QUIRK(CONSIDERS_UNREACHABLE_CODE)
+        m_state = ParseError;
+        return;
+#endif
+    }
+    ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
+
+    m_regExpBytecode = byteCodeCompilePattern(vm, pattern);
+}
+
 void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize)
 {
     ConcurrentJSLocker locker(m_lock);
@@ -303,8 +327,11 @@
     UNUSED_PARAM(charSize);
 #endif
 
+    if (Options::dumpCompiledRegExpPatterns())
+        dataLog("Can't JIT this regular _expression_: \"", m_patternString, "\"\n");
+
     m_state = ByteCode;
-    m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator, &vm->m_regExpAllocatorLock);
+    m_regExpBytecode = byteCodeCompilePattern(vm, pattern);
 }
 
 int RegExp::match(VM& vm, const String& s, unsigned startOffset, Vector<int>& ovector)
@@ -356,8 +383,11 @@
     UNUSED_PARAM(charSize);
 #endif
 
+    if (Options::dumpCompiledRegExpPatterns())
+        dataLog("Can't JIT this regular _expression_: \"", m_patternString, "\"\n");
+
     m_state = ByteCode;
-    m_regExpBytecode = Yarr::byteCompile(pattern, &vm->m_regExpAllocator, &vm->m_regExpAllocatorLock);
+    m_regExpBytecode = byteCodeCompilePattern(vm, pattern);
 }
 
 MatchResult RegExp::match(VM& vm, const String& s, unsigned startOffset)

Modified: trunk/Source/_javascript_Core/runtime/RegExp.h (225929 => 225930)


--- trunk/Source/_javascript_Core/runtime/RegExp.h	2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/runtime/RegExp.h	2017-12-14 22:16:38 UTC (rev 225930)
@@ -140,6 +140,8 @@
 
     RegExpState m_state;
 
+    void byteCodeCompileIfNecessary(VM*);
+
     void compile(VM*, Yarr::YarrCharSize);
     void compileIfNecessary(VM&, Yarr::YarrCharSize);
 

Modified: trunk/Source/_javascript_Core/runtime/RegExpInlines.h (225929 => 225930)


--- trunk/Source/_javascript_Core/runtime/RegExpInlines.h	2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/runtime/RegExpInlines.h	2017-12-14 22:16:38 UTC (rev 225930)
@@ -110,11 +110,25 @@
 
     int result;
 #if ENABLE(YARR_JIT)
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+    char patternContextBuffer[patternContextBufferSize];
+#define EXTRA_JIT_PARAMS  , patternContextBuffer, patternContextBufferSize
+#else
+#define EXTRA_JIT_PARAMS
+#endif
+
     if (m_state == JITCode) {
         if (s.is8Bit())
-            result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length(), offsetVector).start;
+            result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length(), offsetVector EXTRA_JIT_PARAMS).start;
         else
-            result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length(), offsetVector).start;
+            result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length(), offsetVector EXTRA_JIT_PARAMS).start;
+
+        if (result == Yarr::JSRegExpJITCodeFailure) {
+            // JIT'ed code couldn't handle _expression_, so punt back to the interpreter.
+            byteCodeCompileIfNecessary(&vm);
+            result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
+        }
+
 #if ENABLE(YARR_JIT_DEBUG)
         matchCompareWithInterpreter(s, startOffset, offsetVector, result);
 #endif
@@ -199,15 +213,30 @@
     compileIfNecessaryMatchOnly(vm, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
 
 #if ENABLE(YARR_JIT)
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+    char patternContextBuffer[patternContextBufferSize];
+#define EXTRA_JIT_PARAMS  , patternContextBuffer, patternContextBufferSize
+#else
+#define EXTRA_JIT_PARAMS
+#endif
+
+    MatchResult result;
+
     if (m_state == JITCode) {
-        MatchResult result = s.is8Bit() ?
-            m_regExpJITCode.execute(s.characters8(), startOffset, s.length()) :
-            m_regExpJITCode.execute(s.characters16(), startOffset, s.length());
+        if (s.is8Bit())
+            result = m_regExpJITCode.execute(s.characters8(), startOffset, s.length() EXTRA_JIT_PARAMS);
+        else
+            result = m_regExpJITCode.execute(s.characters16(), startOffset, s.length() EXTRA_JIT_PARAMS);
+
 #if ENABLE(REGEXP_TRACING)
         if (!result)
             m_rtMatchOnlyFoundCount++;
 #endif
-        return result;
+        if (result.start != static_cast<size_t>(Yarr::JSRegExpJITCodeFailure))
+            return result;
+
+        // JIT'ed code couldn't handle _expression_, so punt back to the interpreter.
+        byteCodeCompileIfNecessary(&vm);
     }
 #endif
 

Modified: trunk/Source/_javascript_Core/testRegExp.cpp (225929 => 225930)


--- trunk/Source/_javascript_Core/testRegExp.cpp	2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/testRegExp.cpp	2017-12-14 22:16:38 UTC (rev 225930)
@@ -315,10 +315,10 @@
     return -1;
 }
 
-static RegExp* parseRegExpLine(VM& vm, char* line, int lineLength)
+static RegExp* parseRegExpLine(VM& vm, char* line, int lineLength, const char** regexpError)
 {
     StringBuilder pattern;
-    
+
     if (line[0] != '/')
         return 0;
 
@@ -330,9 +330,11 @@
     ++i;
 
     RegExp* r = RegExp::create(vm, pattern.toString(), regExpFlags(line + i));
-    if (r->isValid())
-        return r;
-    return nullptr;
+    if (!r->isValid()) {
+        *regexpError = r->errorMessage();
+        return nullptr;
+    }
+    return r;
 }
 
 static RegExpTest* parseTestLine(char* line, int lineLength)
@@ -431,6 +433,7 @@
         size_t lineLength = 0;
         char* linePtr = 0;
         unsigned int lineNumber = 0;
+        const char* regexpError = nullptr;
 
         while ((linePtr = fgets(&lineBuffer[0], MaxLineLength, testCasesFile))) {
             lineLength = strlen(linePtr);
@@ -444,7 +447,11 @@
                 continue;
 
             if (linePtr[0] == '/') {
-                regexp = parseRegExpLine(vm, linePtr, lineLength);
+                regexp = parseRegExpLine(vm, linePtr, lineLength, &regexpError);
+                if (!regexp) {
+                    failures++;
+                    fprintf(stderr, "Failure on line %u. '%s' %s\n", lineNumber, linePtr, regexpError);
+                }
             } else if (linePtr[0] == ' ') {
                 RegExpTest* regExpTest = parseTestLine(linePtr, lineLength);
                 
@@ -461,10 +468,10 @@
             } else if (linePtr[0] == '-') {
                 tests++;
                 regexp = 0; // Reset the live regexp to avoid confusing other subsequent tests
-                bool successfullyParsed = parseRegExpLine(vm, linePtr + 1, lineLength - 1);
+                bool successfullyParsed = parseRegExpLine(vm, linePtr + 1, lineLength - 1, &regexpError);
                 if (successfullyParsed) {
                     failures++;
-                    fprintf(stderr, "Failure on line %u. '%s' is not a valid regexp\n", lineNumber, linePtr + 1);
+                    fprintf(stderr, "Failure on line %u. '%s' %s\n", lineNumber, linePtr + 1, regexpError);
                 }
             }
         }

Modified: trunk/Source/_javascript_Core/yarr/Yarr.h (225929 => 225930)


--- trunk/Source/_javascript_Core/yarr/Yarr.h	2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/yarr/Yarr.h	2017-12-14 22:16:38 UTC (rev 225930)
@@ -36,9 +36,9 @@
 #define YarrStackSpaceForBackTrackInfoBackReference 2
 #define YarrStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
 #define YarrStackSpaceForBackTrackInfoParentheticalAssertion 1
-#define YarrStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
+#define YarrStackSpaceForBackTrackInfoParenthesesOnce 2
 #define YarrStackSpaceForBackTrackInfoParenthesesTerminal 1
-#define YarrStackSpaceForBackTrackInfoParentheses 2
+#define YarrStackSpaceForBackTrackInfoParentheses 4
 #define YarrStackSpaceForDotStarEnclosure 1
 
 static const unsigned quantifyInfinite = UINT_MAX;
@@ -52,9 +52,10 @@
     JSRegExpMatch = 1,
     JSRegExpNoMatch = 0,
     JSRegExpErrorNoMatch = -1,
-    JSRegExpErrorHitLimit = -2,
-    JSRegExpErrorNoMemory = -3,
-    JSRegExpErrorInternal = -4
+    JSRegExpJITCodeFailure = -2,
+    JSRegExpErrorHitLimit = -3,
+    JSRegExpErrorNoMemory = -4,
+    JSRegExpErrorInternal = -5,
 };
 
 enum YarrCharSize {

Modified: trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp (225929 => 225930)


--- trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp	2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp	2017-12-14 22:16:38 UTC (rev 225930)
@@ -27,6 +27,7 @@
 #include "config.h"
 #include "YarrInterpreter.h"
 
+#include "Options.h"
 #include "SuperSampler.h"
 #include "Yarr.h"
 #include "YarrCanonicalize.h"
@@ -1669,6 +1670,11 @@
         emitDisjunction(m_pattern.m_body);
         regexEnd();
 
+#ifndef NDEBUG
+        if (Options::dumpCompiledRegExpPatterns())
+            dumpDisjunction(m_bodyDisjunction.get());
+#endif
+
         return std::make_unique<BytecodePattern>(WTFMove(m_bodyDisjunction), m_allParenthesesInfo, m_pattern, allocator, lock);
     }
 
@@ -1829,16 +1835,6 @@
         return beginTerm;
     }
 
-#ifndef NDEBUG
-    void dumpDisjunction(ByteDisjunction* disjunction)
-    {
-        dataLogF("ByteDisjunction(%p):\n\t", disjunction);
-        for (unsigned i = 0; i < disjunction->terms.size(); ++i)
-            dataLogF("{ %d } ", disjunction->terms[i].type);
-        dataLogF("\n");
-    }
-#endif
-
     void closeAlternative(int beginTerm)
     {
         int origBeginTerm = beginTerm;
@@ -2111,7 +2107,245 @@
             }
         }
     }
+#ifndef NDEBUG
+    void dumpDisjunction(ByteDisjunction* disjunction, unsigned nesting = 0)
+    {
+        PrintStream& out = WTF::dataFile();
 
+        unsigned termIndexNest = 0;
+
+        if (!nesting) {
+            out.printf("ByteDisjunction(%p):\n", disjunction);
+            nesting = 1;
+        } else {
+            termIndexNest = nesting - 1;
+            nesting = 2;
+        }
+
+        auto outputTermIndexAndNest = [&](size_t index, unsigned termNesting) {
+            for (unsigned nestingDepth = 0; nestingDepth < termIndexNest; nestingDepth++)
+                out.print("  ");
+            out.printf("%4lu", index);
+            for (unsigned nestingDepth = 0; nestingDepth < termNesting; nestingDepth++)
+                out.print("  ");
+        };
+
+        auto dumpQuantity = [&](ByteTerm& term) {
+            if (term.atom.quantityType == QuantifierFixedCount && term.atom.quantityMinCount == 1 && term.atom.quantityMaxCount == 1)
+                return;
+
+            out.print(" {", term.atom.quantityMinCount);
+            if (term.atom.quantityMinCount != term.atom.quantityMaxCount) {
+                if (term.atom.quantityMaxCount == UINT_MAX)
+                    out.print(",inf");
+                else
+                    out.print(",", term.atom.quantityMaxCount);
+            }
+            out.print("}");
+            if (term.atom.quantityType == QuantifierGreedy)
+                out.print(" greedy");
+            else if (term.atom.quantityType == QuantifierNonGreedy)
+                out.print(" non-greedy");
+        };
+
+        auto dumpCaptured = [&](ByteTerm& term) {
+            if (term.capture())
+                out.print(" captured (#", term.atom.subpatternId, ")");
+        };
+
+        auto dumpInverted = [&](ByteTerm& term) {
+            if (term.invert())
+                out.print(" inverted");
+        };
+
+        auto dumpInputPosition = [&](ByteTerm& term) {
+            out.printf(" inputPosition %u", term.inputPosition);
+        };
+
+        auto dumpCharacter = [&](ByteTerm& term) {
+            out.print(" ");
+            dumpUChar32(out, term.atom.patternCharacter);
+        };
+
+        auto dumpCharClass = [&](ByteTerm& term) {
+            out.print(" ");
+            dumpCharacterClass(out, &m_pattern, term.atom.characterClass);
+        };
+
+        for (size_t idx = 0; idx < disjunction->terms.size(); ++idx) {
+            ByteTerm term = disjunction->terms[idx];
+
+            bool outputNewline = true;
+
+            switch (term.type) {
+            case ByteTerm::TypeBodyAlternativeBegin:
+                outputTermIndexAndNest(idx, nesting++);
+                out.print("BodyAlternativeBegin");
+                if (term.alternative.onceThrough)
+                    out.print(" onceThrough");
+                break;
+            case ByteTerm::TypeBodyAlternativeDisjunction:
+                outputTermIndexAndNest(idx, nesting - 1);
+                out.print("BodyAlternativeDisjunction");
+                break;
+            case ByteTerm::TypeBodyAlternativeEnd:
+                outputTermIndexAndNest(idx, --nesting);
+                out.print("BodyAlternativeEnd");
+                break;
+            case ByteTerm::TypeAlternativeBegin:
+                outputTermIndexAndNest(idx, nesting++);
+                out.print("AlternativeBegin");
+                break;
+            case ByteTerm::TypeAlternativeDisjunction:
+                outputTermIndexAndNest(idx, nesting - 1);
+                out.print("AlternativeDisjunction");
+                break;
+            case ByteTerm::TypeAlternativeEnd:
+                outputTermIndexAndNest(idx, --nesting);
+                out.print("AlternativeEnd");
+                break;
+            case ByteTerm::TypeSubpatternBegin:
+                outputTermIndexAndNest(idx, nesting++);
+                out.print("SubpatternBegin");
+                break;
+            case ByteTerm::TypeSubpatternEnd:
+                outputTermIndexAndNest(idx, --nesting);
+                out.print("SubpatternEnd");
+                break;
+            case ByteTerm::TypeAssertionBOL:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("AssertionBOL");
+                break;
+            case ByteTerm::TypeAssertionEOL:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("AssertionEOL");
+                break;
+            case ByteTerm::TypeAssertionWordBoundary:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("AssertionWordBoundary");
+                break;
+            case ByteTerm::TypePatternCharacterOnce:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("PatternCharacterOnce");
+                dumpInverted(term);
+                dumpInputPosition(term);
+                dumpCharacter(term);
+                dumpQuantity(term);
+                break;
+            case ByteTerm::TypePatternCharacterFixed:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("PatternCharacterFixed");
+                dumpInverted(term);
+                dumpInputPosition(term);
+                dumpCharacter(term);
+                out.print(" {", term.atom.quantityMinCount, "}");
+                break;
+            case ByteTerm::TypePatternCharacterGreedy:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("PatternCharacterGreedy");
+                dumpInverted(term);
+                dumpInputPosition(term);
+                dumpCharacter(term);
+                dumpQuantity(term);
+                break;
+            case ByteTerm::TypePatternCharacterNonGreedy:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("PatternCharacterNonGreedy");
+                dumpInverted(term);
+                dumpInputPosition(term);
+                dumpCharacter(term);
+                dumpQuantity(term);
+                break;
+            case ByteTerm::TypePatternCasedCharacterOnce:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("PatternCasedCharacterOnce");
+                break;
+            case ByteTerm::TypePatternCasedCharacterFixed:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("PatternCasedCharacterFixed");
+                break;
+            case ByteTerm::TypePatternCasedCharacterGreedy:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("PatternCasedCharacterGreedy");
+                break;
+            case ByteTerm::TypePatternCasedCharacterNonGreedy:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("PatternCasedCharacterNonGreedy");
+                break;
+            case ByteTerm::TypeCharacterClass:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("CharacterClass");
+                dumpInverted(term);
+                dumpInputPosition(term);
+                dumpCharClass(term);
+                dumpQuantity(term);
+                break;
+            case ByteTerm::TypeBackReference:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("BackReference #", term.atom.subpatternId);
+                dumpQuantity(term);
+                break;
+            case ByteTerm::TypeParenthesesSubpattern:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("ParenthesesSubpattern");
+                dumpCaptured(term);
+                dumpInverted(term);
+                dumpInputPosition(term);
+                dumpQuantity(term);
+                out.print("\n");
+                outputNewline = false;
+                dumpDisjunction(term.atom.parenthesesDisjunction, nesting);
+                break;
+            case ByteTerm::TypeParenthesesSubpatternOnceBegin:
+                outputTermIndexAndNest(idx, nesting++);
+                out.print("ParenthesesSubpatternOnceBegin");
+                dumpCaptured(term);
+                dumpInverted(term);
+                dumpInputPosition(term);
+                break;
+            case ByteTerm::TypeParenthesesSubpatternOnceEnd:
+                outputTermIndexAndNest(idx, --nesting);
+                out.print("ParenthesesSubpatternOnceEnd");
+                break;
+            case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
+                outputTermIndexAndNest(idx, nesting++);
+                out.print("ParenthesesSubpatternTerminalBegin");
+                dumpInverted(term);
+                dumpInputPosition(term);
+                break;
+            case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
+                outputTermIndexAndNest(idx, --nesting);
+                out.print("ParenthesesSubpatternTerminalEnd");
+                break;
+            case ByteTerm::TypeParentheticalAssertionBegin:
+                outputTermIndexAndNest(idx, nesting++);
+                out.print("ParentheticalAssertionBegin");
+                dumpInverted(term);
+                dumpInputPosition(term);
+                break;
+            case ByteTerm::TypeParentheticalAssertionEnd:
+                outputTermIndexAndNest(idx, --nesting);
+                out.print("ParentheticalAssertionEnd");
+                break;
+            case ByteTerm::TypeCheckInput:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("CheckInput ", term.checkInputCount);
+                break;
+            case ByteTerm::TypeUncheckInput:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("UncheckInput ", term.checkInputCount);
+                break;
+            case ByteTerm::TypeDotStarEnclosure:
+                outputTermIndexAndNest(idx, nesting);
+                out.print("DotStarEnclosure");
+                break;
+            }
+            if (outputNewline)
+                out.print("\n");
+        }
+    }
+#endif
+
 private:
     YarrPattern& m_pattern;
     std::unique_ptr<ByteDisjunction> m_bodyDisjunction;
@@ -2152,7 +2386,7 @@
 COMPILE_ASSERT(sizeof(BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
 COMPILE_ASSERT(sizeof(BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
 COMPILE_ASSERT(sizeof(BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
-COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) <= (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
 
 
 } }

Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.cpp (225929 => 225930)


--- trunk/Source/_javascript_Core/yarr/YarrJIT.cpp	2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.cpp	2017-12-14 22:16:38 UTC (rev 225930)
@@ -58,20 +58,25 @@
 
 #define HAVE_INITIAL_START_REG
 #elif CPU(ARM64)
+    // Argument registers
     static const RegisterID input = ARM64Registers::x0;
     static const RegisterID index = ARM64Registers::x1;
     static const RegisterID length = ARM64Registers::x2;
     static const RegisterID output = ARM64Registers::x3;
+    static const RegisterID freelistRegister = ARM64Registers::x4;
+    static const RegisterID freelistSizeRegister = ARM64Registers::x5;
 
-    static const RegisterID regT0 = ARM64Registers::x4;
-    static const RegisterID regT1 = ARM64Registers::x5;
-    static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x6;
-    static const RegisterID regUnicodeTemp = ARM64Registers::x7;
-    static const RegisterID initialStart = ARM64Registers::x8;
-    static const RegisterID supplementaryPlanesBase = ARM64Registers::x9;
-    static const RegisterID surrogateTagMask = ARM64Registers::x10;
-    static const RegisterID leadingSurrogateTag = ARM64Registers::x11;
-    static const RegisterID trailingSurrogateTag = ARM64Registers::x12;
+    // Scratch registers
+    static const RegisterID regT0 = ARM64Registers::x6;
+    static const RegisterID regT1 = ARM64Registers::x7;
+    static const RegisterID regT2 = ARM64Registers::x8;
+    static const RegisterID remainingMatchCount = ARM64Registers::x9;
+    static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x10;
+    static const RegisterID initialStart = ARM64Registers::x11;
+    static const RegisterID supplementaryPlanesBase = ARM64Registers::x12;
+    static const RegisterID surrogateTagMask = ARM64Registers::x13;
+    static const RegisterID leadingSurrogateTag = ARM64Registers::x14;
+    static const RegisterID trailingSurrogateTag = ARM64Registers::x15;
 
     static const RegisterID returnRegister = ARM64Registers::x0;
     static const RegisterID returnRegister2 = ARM64Registers::x1;
@@ -105,10 +110,13 @@
     static const RegisterID returnRegister2 = X86Registers::edx;
 #elif CPU(X86_64)
 #if !OS(WINDOWS)
+    // Argument registers
     static const RegisterID input = X86Registers::edi;
     static const RegisterID index = X86Registers::esi;
     static const RegisterID length = X86Registers::edx;
     static const RegisterID output = X86Registers::ecx;
+    static const RegisterID freelistRegister = X86Registers::r8;
+    static const RegisterID freelistSizeRegister = X86Registers::r9; // Only used during initialization.
 #else
     // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
     // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
@@ -119,23 +127,23 @@
     static const RegisterID output = X86Registers::r10;
 #endif
 
+    // Scratch registers
     static const RegisterID regT0 = X86Registers::eax;
 #if !OS(WINDOWS)
-    static const RegisterID regT1 = X86Registers::r8;
+    static const RegisterID regT1 = X86Registers::r9;
+    static const RegisterID regT2 = X86Registers::r10;
 #else
     static const RegisterID regT1 = X86Registers::ecx;
+    static const RegisterID regT2 = X86Registers::edi;
 #endif
 
     static const RegisterID initialStart = X86Registers::ebx;
 #if !OS(WINDOWS)
-    static const RegisterID regUnicodeInputAndTrail = X86Registers::r9;
-    static const RegisterID regUnicodeTemp = X86Registers::r10;
+    static const RegisterID remainingMatchCount = X86Registers::r12;
 #else
-    static const RegisterID regUnicodeInputAndTrail = X86Registers::esi;
-    static const RegisterID regUnicodeTemp = X86Registers::edi;
+    static const RegisterID remainingMatchCount = X86Registers::esi;
 #endif
-    static const RegisterID supplementaryPlanesBase = X86Registers::r12;
-    static const RegisterID surrogateTagMask = X86Registers::r13;
+    static const RegisterID regUnicodeInputAndTrail = X86Registers::r13;
     static const RegisterID leadingSurrogateTag = X86Registers::r14;
     static const RegisterID trailingSurrogateTag = X86Registers::r15;
 
@@ -142,10 +150,165 @@
     static const RegisterID returnRegister = X86Registers::eax;
     static const RegisterID returnRegister2 = X86Registers::edx;
 
+    const TrustedImm32 supplementaryPlanesBase = TrustedImm32(0x10000);
+    const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
 #define HAVE_INITIAL_START_REG
 #define JIT_UNICODE_EXPRESSIONS
 #endif
 
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+    struct ParenContextSizes {
+        size_t m_numSubpatterns;
+        size_t m_frameSlots;
+
+        ParenContextSizes(size_t numSubpatterns, size_t frameSlots)
+            : m_numSubpatterns(numSubpatterns)
+            , m_frameSlots(frameSlots)
+        {
+        }
+
+        size_t numSubpatterns() { return m_numSubpatterns; }
+
+        size_t frameSlots() { return m_frameSlots; }
+    };
+
+    struct ParenContext {
+        struct ParenContext* next;
+        uint32_t begin;
+        uint32_t matchAmount;
+        uintptr_t returnAddress;
+        struct Subpatterns {
+            unsigned start;
+            unsigned end;
+        } subpatterns[0];
+        uintptr_t frameSlots[0];
+
+        static size_t sizeFor(ParenContextSizes& parenContextSizes)
+        {
+            return sizeof(ParenContext) + sizeof(Subpatterns) * parenContextSizes.numSubpatterns() + sizeof(uintptr_t) * parenContextSizes.frameSlots();
+        }
+
+        static ptrdiff_t nextOffset()
+        {
+            return offsetof(ParenContext, next);
+        }
+
+        static ptrdiff_t beginOffset()
+        {
+            return offsetof(ParenContext, begin);
+        }
+
+        static ptrdiff_t matchAmountOffset()
+        {
+            return offsetof(ParenContext, matchAmount);
+        }
+
+        static ptrdiff_t returnAddressOffset()
+        {
+            return offsetof(ParenContext, returnAddress);
+        }
+
+        static ptrdiff_t subpatternOffset(size_t subpattern)
+        {
+            return offsetof(ParenContext, subpatterns) + (subpattern - 1) * sizeof(Subpatterns);
+        }
+
+        static ptrdiff_t savedFrameOffset(ParenContextSizes& parenContextSizes)
+        {
+            return offsetof(ParenContext, subpatterns) + (parenContextSizes.numSubpatterns()) * sizeof(Subpatterns);
+        }
+    };
+
+    void initParenContextFreeList()
+    {
+        RegisterID parenContextPointer = regT0;
+        RegisterID nextParenContextPointer = regT2;
+
+        size_t parenContextSize = ParenContext::sizeFor(m_parenContextSizes);
+
+        parenContextSize = WTF::roundUpToMultipleOf<sizeof(uintptr_t)>(parenContextSize);
+
+        // Check that the paren context is a reasonable size.
+        if (parenContextSize > INT16_MAX)
+            m_abortExecution.append(jump());
+
+        Jump emptyFreeList = branchTestPtr(Zero, freelistRegister);
+        move(freelistRegister, parenContextPointer);
+        addPtr(TrustedImm32(parenContextSize), freelistRegister, nextParenContextPointer);
+        addPtr(freelistRegister, freelistSizeRegister);
+        subPtr(TrustedImm32(parenContextSize), freelistSizeRegister);
+
+        Label loopTop(this);
+        Jump initDone = branchPtr(Above, nextParenContextPointer, freelistSizeRegister);
+        storePtr(nextParenContextPointer, Address(parenContextPointer, ParenContext::nextOffset()));
+        move(nextParenContextPointer, parenContextPointer);
+        addPtr(TrustedImm32(parenContextSize), parenContextPointer, nextParenContextPointer);
+        jump(loopTop);
+
+        initDone.link(this);
+        storePtr(TrustedImmPtr(0), Address(parenContextPointer, ParenContext::nextOffset()));
+        emptyFreeList.link(this);
+    }
+
+    void allocateParenContext(RegisterID result)
+    {
+        m_abortExecution.append(branchTestPtr(Zero, freelistRegister));
+        sub32(TrustedImm32(1), remainingMatchCount);
+        m_hitMatchLimit.append(branchTestPtr(Zero, remainingMatchCount));
+        move(freelistRegister, result);
+        loadPtr(Address(freelistRegister, ParenContext::nextOffset()), freelistRegister);
+    }
+
+    void freeParenContext(RegisterID headPtrRegister, RegisterID newHeadPtrRegister)
+    {
+        loadPtr(Address(headPtrRegister, ParenContext::nextOffset()), newHeadPtrRegister);
+        storePtr(freelistRegister, Address(headPtrRegister, ParenContext::nextOffset()));
+        move(headPtrRegister, freelistRegister);
+    }
+
+    void saveParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
+    {
+        store32(index, Address(parenContextReg, ParenContext::beginOffset()));
+        loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), tempReg);
+        store32(tempReg, Address(parenContextReg, ParenContext::matchAmountOffset()));
+        loadFromFrame(subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex(), tempReg);
+        storePtr(tempReg, Address(parenContextReg, ParenContext::returnAddressOffset()));
+        if (compileMode == IncludeSubpatterns) {
+            for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
+                loadPtr(Address(output, (subpattern << 1) * sizeof(unsigned)), tempReg);
+                storePtr(tempReg, Address(parenContextReg, ParenContext::subpatternOffset(subpattern)));
+                clearSubpatternStart(subpattern);
+            }
+        }
+        subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
+        for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
+            loadFromFrame(frameLocation, tempReg);
+            storePtr(tempReg, Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)));
+        }
+    }
+
+    void restoreParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
+    {
+        load32(Address(parenContextReg, ParenContext::beginOffset()), index);
+        storeToFrame(index, subpatternBaseFrameLocation + BackTrackInfoParentheses::beginIndex());
+        load32(Address(parenContextReg, ParenContext::matchAmountOffset()), tempReg);
+        storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
+        loadPtr(Address(parenContextReg, ParenContext::returnAddressOffset()), tempReg);
+        storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
+        if (compileMode == IncludeSubpatterns) {
+            for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
+                loadPtr(Address(parenContextReg, ParenContext::subpatternOffset(subpattern)), tempReg);
+                storePtr(tempReg, Address(output, (subpattern << 1) * sizeof(unsigned)));
+            }
+        }
+        subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
+        for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
+            loadPtr(Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)), tempReg);
+            storeToFrame(tempReg, frameLocation);
+        }
+    }
+#endif
+
     void optimizeAlternative(PatternAlternative* alternative)
     {
         if (!alternative->m_terms.size())
@@ -354,14 +517,14 @@
 
         JumpList notUnicode;
         load16Unaligned(regUnicodeInputAndTrail, resultReg);
-        and32(surrogateTagMask, resultReg, regUnicodeTemp);
-        notUnicode.append(branch32(NotEqual, regUnicodeTemp, leadingSurrogateTag));
+        and32(surrogateTagMask, resultReg, regT2);
+        notUnicode.append(branch32(NotEqual, regT2, leadingSurrogateTag));
         addPtr(TrustedImm32(2), regUnicodeInputAndTrail);
-        getEffectiveAddress(BaseIndex(input, length, TimesTwo), regUnicodeTemp);
-        notUnicode.append(branchPtr(AboveOrEqual, regUnicodeInputAndTrail, regUnicodeTemp));
+        getEffectiveAddress(BaseIndex(input, length, TimesTwo), regT2);
+        notUnicode.append(branch32(AboveOrEqual, regUnicodeInputAndTrail, regT2));
         load16Unaligned(Address(regUnicodeInputAndTrail), regUnicodeInputAndTrail);
-        and32(surrogateTagMask, regUnicodeInputAndTrail, regUnicodeTemp);
-        notUnicode.append(branch32(NotEqual, regUnicodeTemp, trailingSurrogateTag));
+        and32(surrogateTagMask, regUnicodeInputAndTrail, regT2);
+        notUnicode.append(branch32(NotEqual, regT2, trailingSurrogateTag));
         sub32(leadingSurrogateTag, resultReg);
         sub32(trailingSurrogateTag, regUnicodeInputAndTrail);
         lshift32(TrustedImm32(10), resultReg);
@@ -422,6 +585,13 @@
         poke(imm, frameLocation);
     }
 
+#if CPU(ARM64) || CPU(X86_64)
+    void storeToFrame(TrustedImmPtr imm, unsigned frameLocation)
+    {
+        poke(imm, frameLocation);
+    }
+#endif
+
     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
     {
         return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
@@ -467,7 +637,30 @@
         generateReturn();
     }
 
-    // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns.
+    void generateJITFailReturn()
+    {
+        if (m_abortExecution.empty() && m_hitMatchLimit.empty())
+            return;
+
+        JumpList finishExiting;
+        if (!m_abortExecution.empty()) {
+            m_abortExecution.link(this);
+            move(TrustedImmPtr((void*)static_cast<size_t>(-2)), returnRegister);
+            finishExiting.append(jump());
+        }
+
+        if (!m_hitMatchLimit.empty()) {
+            m_hitMatchLimit.link(this);
+            move(TrustedImmPtr((void*)static_cast<size_t>(-1)), returnRegister);
+        }
+
+        finishExiting.link(this);
+        removeCallFrame();
+        move(TrustedImm32(0), returnRegister2);
+        generateReturn();
+    }
+
+    // Used to record subpatterns, should only be called if compileMode is IncludeSubpatterns.
     void setSubpatternStart(RegisterID reg, unsigned subpattern)
     {
         ASSERT(subpattern);
@@ -487,6 +680,12 @@
         store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
     }
 
+    void clearMatches(unsigned subpattern, unsigned lastSubpattern)
+    {
+        for (; subpattern <= lastSubpattern; subpattern++)
+            clearSubpatternStart(subpattern);
+    }
+
     // We use one of three different strategies to track the start of the current match,
     // while matching.
     // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
@@ -529,7 +728,7 @@
         OpNestedAlternativeNext,
         OpNestedAlternativeEnd,
         // Used for alternatives in subpatterns where there is only a single
-        // alternative (backtrackingis easier in these cases), or for alternatives
+        // alternative (backtracking is easier in these cases), or for alternatives
         // which never need to be backtracked (those in parenthetical assertions,
         // terminal subpatterns).
         OpSimpleNestedAlternativeBegin,
@@ -541,6 +740,9 @@
         // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
         OpParenthesesSubpatternTerminalBegin,
         OpParenthesesSubpatternTerminalEnd,
+        // Used to wrap generic captured matches
+        OpParenthesesSubpatternBegin,
+        OpParenthesesSubpatternEnd,
         // Used to wrap parenthetical assertions.
         OpParentheticalAssertionBegin,
         OpParentheticalAssertionEnd,
@@ -1768,10 +1970,7 @@
                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
                 if (op.m_op == OpNestedAlternativeNext) {
                     unsigned parenthesesFrameLocation = term->frameLocation;
-                    unsigned alternativeFrameLocation = parenthesesFrameLocation;
-                    if (term->quantityType != QuantifierFixedCount)
-                        alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
-                    op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
+                    op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
                 }
 
                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
@@ -1818,10 +2017,7 @@
                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
                 if (op.m_op == OpNestedAlternativeEnd) {
                     unsigned parenthesesFrameLocation = term->frameLocation;
-                    unsigned alternativeFrameLocation = parenthesesFrameLocation;
-                    if (term->quantityType != QuantifierFixedCount)
-                        alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
-                    op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
+                    op.m_returnAddress = storeToFrameWithPatch(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
                 }
 
                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
@@ -1963,7 +2159,7 @@
                     pastBreakpoint.link(this);
                 }
 
-                // We know that the match is non-zero, we can accept it  and
+                // We know that the match is non-zero, we can accept it and
                 // loop back up to the head of the subpattern.
                 jump(beginOp.m_reentry);
 
@@ -1973,6 +2169,131 @@
                 break;
             }
 
+            // OpParenthesesSubpatternBegin/End
+            //
+            // These nodes support generic subpatterns.
+            case OpParenthesesSubpatternBegin: {
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+                PatternTerm* term = op.m_term;
+                unsigned parenthesesFrameLocation = term->frameLocation;
+
+                // Upon entry to a Greedy quantified set of parenthese store the index.
+                // We'll use this for two purposes:
+                //  - To indicate which iteration we are on of mathing the remainder of
+                //    the _expression_ after the parentheses - the first, including the
+                //    match within the parentheses, or the second having skipped over them.
+                //  - To check for empty matches, which must be rejected.
+                //
+                // At the head of a NonGreedy set of parentheses we'll immediately set the
+                // value on the stack to -1 (indicating a match skipping the subpattern),
+                // and plant a jump to the end. We'll also plant a label to backtrack to
+                // to reenter the subpattern later, with a store to set up index on the
+                // second iteration.
+                //
+                // FIXME: for capturing parens, could use the index in the capture array?
+                if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) {
+                    storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
+                    storeToFrame(TrustedImmPtr(0), parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
+
+                    if (term->quantityType == QuantifierNonGreedy) {
+                        storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
+                        op.m_jumps.append(jump());
+                    }
+                    
+                    op.m_reentry = label();
+                    RegisterID currParenContextReg = regT0;
+                    RegisterID newParenContextReg = regT1;
+
+                    loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
+                    allocateParenContext(newParenContextReg);
+                    storePtr(currParenContextReg, newParenContextReg);
+                    storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
+                    saveParenContext(newParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
+                    storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
+                }
+
+                // If the parenthese are capturing, store the starting index value to the
+                // captures array, offsetting as necessary.
+                //
+                // FIXME: could avoid offsetting this value in JIT code, apply
+                // offsets only afterwards, at the point the results array is
+                // being accessed.
+                if (term->capture() && compileMode == IncludeSubpatterns) {
+                    const RegisterID indexTemporary = regT0;
+                    unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
+                    if (term->quantityType == QuantifierFixedCount)
+                        inputOffset += term->parentheses.disjunction->m_minimumSize;
+                    if (inputOffset) {
+                        move(index, indexTemporary);
+                        sub32(Imm32(inputOffset), indexTemporary);
+                        setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
+                    } else
+                        setSubpatternStart(index, term->parentheses.subpatternId);
+                }
+#else // !JIT_ALL_PARENS_EXPRESSIONS
+                RELEASE_ASSERT_NOT_REACHED();
+#endif
+                break;
+            }
+            case OpParenthesesSubpatternEnd: {
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+                PatternTerm* term = op.m_term;
+                unsigned parenthesesFrameLocation = term->frameLocation;
+
+                // Runtime ASSERT to make sure that the nested alternative handled the
+                // "no input consumed" check.
+                if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
+                    Jump pastBreakpoint;
+                    pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)));
+                    abortWithReason(YARRNoInputConsumed);
+                    pastBreakpoint.link(this);
+                }
+
+                const RegisterID countTemporary = regT1;
+
+                YarrOp& beginOp = m_ops[op.m_previousOp];
+                loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
+                add32(TrustedImm32(1), countTemporary);
+                storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
+
+                // If the parenthese are capturing, store the ending index value to the
+                // captures array, offsetting as necessary.
+                //
+                // FIXME: could avoid offsetting this value in JIT code, apply
+                // offsets only afterwards, at the point the results array is
+                // being accessed.
+                if (term->capture() && compileMode == IncludeSubpatterns) {
+                    const RegisterID indexTemporary = regT0;
+                    
+                    unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
+                    if (inputOffset) {
+                        move(index, indexTemporary);
+                        sub32(Imm32(inputOffset), indexTemporary);
+                        setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
+                    } else
+                        setSubpatternEnd(index, term->parentheses.subpatternId);
+                }
+
+                // If the parentheses are quantified Greedy then add a label to jump back
+                // to if get a failed match from after the parentheses. For NonGreedy
+                // parentheses, link the jump from before the subpattern to here.
+                if (term->quantityType == QuantifierGreedy) {
+                    if (term->quantityMaxCount != quantifyInfinite)
+                        branch32(Below, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(beginOp.m_reentry, this);
+                    else
+                        jump(beginOp.m_reentry);
+                    
+                    op.m_reentry = label();
+                } else if (term->quantityType == QuantifierNonGreedy) {
+                    YarrOp& beginOp = m_ops[op.m_previousOp];
+                    beginOp.m_jumps.link(this);
+                }
+#else // !JIT_ALL_PARENS_EXPRESSIONS
+                RELEASE_ASSERT_NOT_REACHED();
+#endif
+                break;
+            }
+
             // OpParentheticalAssertionBegin/End
             case OpParentheticalAssertionBegin: {
                 PatternTerm* term = op.m_term;
@@ -2391,10 +2712,7 @@
 
                     // Plant a jump to the return address.
                     unsigned parenthesesFrameLocation = term->frameLocation;
-                    unsigned alternativeFrameLocation = parenthesesFrameLocation;
-                    if (term->quantityType != QuantifierFixedCount)
-                        alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
-                    loadFromFrameAndJump(alternativeFrameLocation);
+                    loadFromFrameAndJump(parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
 
                     // Link the DataLabelPtr associated with the end of the last
                     // alternative to this point.
@@ -2425,7 +2743,7 @@
                 PatternTerm* term = op.m_term;
                 ASSERT(term->quantityMaxCount == 1);
 
-                // We only need to backtrack to thispoint if capturing or greedy.
+                // We only need to backtrack to this point if capturing or greedy.
                 if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
                     m_backtrackingState.link(this);
 
@@ -2459,7 +2777,7 @@
                     // are currently in a state where we had skipped over the subpattern
                     // (in which case the flag value on the stack will be -1).
                     unsigned parenthesesFrameLocation = term->frameLocation;
-                    Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
+                    Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
 
                     if (term->quantityType == QuantifierGreedy) {
                         // For Greedy parentheses, we skip after having already tried going
@@ -2503,6 +2821,108 @@
                 m_backtrackingState.append(op.m_jumps);
                 break;
 
+            // OpParenthesesSubpatternBegin/End
+            //
+            // When we are backtracking back out of a capturing subpattern we need
+            // to clear the start index in the matches output array, to record that
+            // this subpattern has not been captured.
+            //
+            // When backtracking back out of a Greedy quantified subpattern we need
+            // to catch this, and try running the remainder of the alternative after
+            // the subpattern again, skipping the parentheses.
+            //
+            // Upon backtracking back into a quantified set of parentheses we need to
+            // check whether we were currently skipping the subpattern. If not, we
+            // can backtrack into them, if we were we need to either backtrack back
+            // out of the start of the parentheses, or jump back to the forwards
+            // matching start, depending of whether the match is Greedy or NonGreedy.
+            case OpParenthesesSubpatternBegin: {
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+                PatternTerm* term = op.m_term;
+                unsigned parenthesesFrameLocation = term->frameLocation;
+
+                if (term->quantityType != QuantifierFixedCount) {
+                    m_backtrackingState.link(this);
+
+                    if (term->quantityType == QuantifierGreedy) {
+                        RegisterID currParenContextReg = regT0;
+                        RegisterID newParenContextReg = regT1;
+
+                        loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
+
+                        restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
+
+                        freeParenContext(currParenContextReg, newParenContextReg);
+                        storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
+                        const RegisterID countTemporary = regT0;
+                        loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
+                        Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
+
+                        sub32(TrustedImm32(1), countTemporary);
+                        storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
+
+                        jump(m_ops[op.m_nextOp].m_reentry);
+
+                        zeroLengthMatch.link(this);
+
+                        // Clear the flag in the stackframe indicating we didn't run through the subpattern.
+                        storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
+
+                        jump(m_ops[op.m_nextOp].m_reentry);
+                    }
+
+                    // If Greedy, jump to the end.
+                    if (term->quantityType == QuantifierGreedy) {
+                        // A backtrack from after the parentheses, when skipping the subpattern,
+                        // will jump back to here.
+                        op.m_jumps.link(this);
+                    }
+
+                    m_backtrackingState.fallthrough();
+                }
+#else // !JIT_ALL_PARENS_EXPRESSIONS
+                RELEASE_ASSERT_NOT_REACHED();
+#endif
+                break;
+            }
+            case OpParenthesesSubpatternEnd: {
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+                PatternTerm* term = op.m_term;
+
+                if (term->quantityType != QuantifierFixedCount) {
+                    m_backtrackingState.link(this);
+
+                    // Check whether we should backtrack back into the parentheses, or if we
+                    // are currently in a state where we had skipped over the subpattern
+                    // (in which case the flag value on the stack will be -1).
+                    unsigned parenthesesFrameLocation = term->frameLocation;
+                    Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation  + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
+
+                    if (term->quantityType == QuantifierGreedy) {
+                        // For Greedy parentheses, we skip after having already tried going
+                        // through the subpattern, so if we get here we're done.
+                        YarrOp& beginOp = m_ops[op.m_previousOp];
+                        beginOp.m_jumps.append(hadSkipped);
+                    } else {
+                        // For NonGreedy parentheses, we try skipping the subpattern first,
+                        // so if we get here we need to try running through the subpattern
+                        // next. Jump back to the start of the parentheses in the forwards
+                        // matching path.
+                        ASSERT(term->quantityType == QuantifierNonGreedy);
+                        YarrOp& beginOp = m_ops[op.m_previousOp];
+                        hadSkipped.linkTo(beginOp.m_reentry, this);
+                    }
+
+                    m_backtrackingState.fallthrough();
+                }
+
+                m_backtrackingState.append(op.m_jumps);
+#else // !JIT_ALL_PARENS_EXPRESSIONS
+                RELEASE_ASSERT_NOT_REACHED();
+#endif
+                break;
+            }
+
             // OpParentheticalAssertionBegin/End
             case OpParentheticalAssertionBegin: {
                 PatternTerm* term = op.m_term;
@@ -2562,9 +2982,9 @@
     // Emits ops for a subpattern (set of parentheses). These consist
     // of a set of alternatives wrapped in an outer set of nodes for
     // the parentheses.
-    // Supported types of parentheses are 'Once' (quantityMaxCount == 1)
-    // and 'Terminal' (non-capturing parentheses quantified as greedy
-    // and infinite).
+    // Supported types of parentheses are 'Once' (quantityMaxCount == 1),
+    // 'Terminal' (non-capturing parentheses quantified as greedy
+    // and infinite), and 0 based greedy quantified parentheses.
     // Alternatives will use the 'Simple' set of ops if either the
     // subpattern is terminal (in which case we will never need to
     // backtrack), or if the subpattern only contains one alternative.
@@ -2584,6 +3004,8 @@
         // need to restore the capture from the first subpattern upon a
         // failure in the second.
         if (term->quantityMinCount && term->quantityMinCount != term->quantityMaxCount) {
+            if (Options::dumpCompiledRegExpPatterns())
+                dataLogF("Can't JIT a variable counted parenthesis with a non-zero minimum\n");
             m_shouldFallBack = true;
             return;
         } if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) {
@@ -2602,9 +3024,31 @@
             parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
             parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
         } else {
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+            // We only handle generic parenthesis with greedy counts.
+            if (term->quantityType != QuantifierGreedy) {
+                // This subpattern is not supported by the JIT.
+                m_shouldFallBack = true;
+                return;
+            }
+
+            m_containsNestedSubpatterns = true;
+
+            // Select the 'Generic' nodes.
+            parenthesesBeginOpCode = OpParenthesesSubpatternBegin;
+            parenthesesEndOpCode = OpParenthesesSubpatternEnd;
+
+            // If there is more than one alternative we cannot use the 'simple' nodes.
+            if (term->parentheses.disjunction->m_alternatives.size() != 1) {
+                alternativeBeginOpCode = OpNestedAlternativeBegin;
+                alternativeNextOpCode = OpNestedAlternativeNext;
+                alternativeEndOpCode = OpNestedAlternativeEnd;
+            }
+#else
             // This subpattern is not supported by the JIT.
             m_shouldFallBack = true;
             return;
+#endif
         }
 
         size_t parenBegin = m_ops.size();
@@ -2831,18 +3275,21 @@
         if (m_pattern.m_saveInitialStartValue)
             push(X86Registers::ebx);
 
-        if (m_decodeSurrogatePairs) {
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+        if (m_containsNestedSubpatterns) {
 #if OS(WINDOWS)
             push(X86Registers::edi);
             push(X86Registers::esi);
 #endif
             push(X86Registers::r12);
+        }
+#endif
+
+        if (m_decodeSurrogatePairs) {
             push(X86Registers::r13);
             push(X86Registers::r14);
             push(X86Registers::r15);
 
-            move(TrustedImm32(0x10000), supplementaryPlanesBase);
-            move(TrustedImm32(0xfffffc00), surrogateTagMask);
             move(TrustedImm32(0xd800), leadingSurrogateTag);
             move(TrustedImm32(0xdc00), trailingSurrogateTag);
         }
@@ -2912,6 +3359,10 @@
             pop(X86Registers::r15);
             pop(X86Registers::r14);
             pop(X86Registers::r13);
+        }
+
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+        if (m_containsNestedSubpatterns) {
             pop(X86Registers::r12);
 #if OS(WINDOWS)
             pop(X86Registers::esi);
@@ -2918,6 +3369,7 @@
             pop(X86Registers::edi);
 #endif
         }
+#endif
 
         if (m_pattern.m_saveInitialStartValue)
             pop(X86Registers::ebx);
@@ -2949,6 +3401,10 @@
         , m_decodeSurrogatePairs(m_charSize == Char16 && m_pattern.unicode())
         , m_unicodeIgnoreCase(m_pattern.unicode() && m_pattern.ignoreCase())
         , m_canonicalMode(m_pattern.unicode() ? CanonicalMode::Unicode : CanonicalMode::UCS2)
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+        , m_containsNestedSubpatterns(false)
+        , m_parenContextSizes(compileMode == IncludeSubpatterns ? m_pattern.m_numSubpatterns : 0, m_pattern.m_body->m_callFrameSize)
+#endif
     {
     }
 
@@ -2961,6 +3417,15 @@
         }
 #endif
 
+        // We need to compile before generating code since we set flags based on compilation that
+        // are used during generation.
+        opCompileBody(m_pattern.m_body);
+        
+        if (m_shouldFallBack) {
+            jitObject.setFallBack(true);
+            return;
+        }
+        
         generateEnter();
 
         Jump hasInput = checkInput();
@@ -2967,6 +3432,11 @@
         generateFailReturn();
         hasInput.link(this);
 
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+        if (m_containsNestedSubpatterns)
+            move(TrustedImm32(matchLimit), remainingMatchCount);
+#endif
+
         if (compileMode == IncludeSubpatterns) {
             for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i)
                 store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int)));
@@ -2977,6 +3447,11 @@
 
         initCallFrame();
 
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+        if (m_containsNestedSubpatterns)
+            initParenContextFreeList();
+#endif
+        
         if (m_pattern.m_saveInitialStartValue) {
 #ifdef HAVE_INITIAL_START_REG
             move(index, initialStart);
@@ -2985,18 +3460,13 @@
 #endif
         }
 
-        opCompileBody(m_pattern.m_body);
-
-        if (m_shouldFallBack) {
-            jitObject.setFallBack(true);
-            return;
-        }
-
         generate();
         backtrack();
 
         generateTryReadUnicodeCharacterHelper();
 
+        generateJITFailReturn();
+
         LinkBuffer linkBuffer(*this, REGEXP_CODE_ID, JITCompilationCanFail);
         if (linkBuffer.didFailToAllocate()) {
             jitObject.setFallBack(true);
@@ -3040,6 +3510,12 @@
     bool m_decodeSurrogatePairs;
     bool m_unicodeIgnoreCase;
     CanonicalMode m_canonicalMode;
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+    bool m_containsNestedSubpatterns;
+    ParenContextSizes m_parenContextSizes;
+#endif
+    JumpList m_abortExecution;
+    JumpList m_hitMatchLimit;
     Vector<Call> m_tryReadUnicodeCharacterCalls;
     Label m_tryReadUnicodeCharacterEntry;
 

Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.h (225929 => 225930)


--- trunk/Source/_javascript_Core/yarr/YarrJIT.h	2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.h	2017-12-14 22:16:38 UTC (rev 225930)
@@ -38,6 +38,11 @@
 #define YARR_CALL
 #endif
 
+#if CPU(ARM64) || (CPU(X86_64) && !OS(WINDOWS))
+#define JIT_ALL_PARENS_EXPRESSIONS
+constexpr size_t patternContextBufferSize = 8192; // Space caller allocates to save nested parenthesis context
+#endif
+
 namespace JSC {
 
 class VM;
@@ -47,10 +52,17 @@
 
 class YarrCodeBlock {
 #if CPU(X86_64) || CPU(ARM64)
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+    typedef MatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize) YARR_CALL;
+    typedef MatchResult (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize) YARR_CALL;
+    typedef MatchResult (*YarrJITCodeMatchOnly8)(const LChar* input, unsigned start, unsigned length, void*, void* freeParenContext, unsigned parenContextSize) YARR_CALL;
+    typedef MatchResult (*YarrJITCodeMatchOnly16)(const UChar* input, unsigned start, unsigned length, void*, void* freeParenContext, unsigned parenContextSize) YARR_CALL;
+#else
     typedef MatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
     typedef MatchResult (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
     typedef MatchResult (*YarrJITCodeMatchOnly8)(const LChar* input, unsigned start, unsigned length) YARR_CALL;
     typedef MatchResult (*YarrJITCodeMatchOnly16)(const UChar* input, unsigned start, unsigned length) YARR_CALL;
+#endif
 #else
     typedef EncodedMatchResult (*YarrJITCode8)(const LChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
     typedef EncodedMatchResult (*YarrJITCode16)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
@@ -81,6 +93,31 @@
     void set8BitCodeMatchOnly(MacroAssemblerCodeRef matchOnly) { m_matchOnly8 = matchOnly; }
     void set16BitCodeMatchOnly(MacroAssemblerCodeRef matchOnly) { m_matchOnly16 = matchOnly; }
 
+#ifdef JIT_ALL_PARENS_EXPRESSIONS
+    MatchResult execute(const LChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize)
+    {
+        ASSERT(has8BitCode());
+        return MatchResult(reinterpret_cast<YarrJITCode8>(m_ref8.code().executableAddress())(input, start, length, output, freeParenContext, parenContextSize));
+    }
+
+    MatchResult execute(const UChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize)
+    {
+        ASSERT(has16BitCode());
+        return MatchResult(reinterpret_cast<YarrJITCode16>(m_ref16.code().executableAddress())(input, start, length, output, freeParenContext, parenContextSize));
+    }
+
+    MatchResult execute(const LChar* input, unsigned start, unsigned length, void* freeParenContext, unsigned parenContextSize)
+    {
+        ASSERT(has8BitCodeMatchOnly());
+        return MatchResult(reinterpret_cast<YarrJITCodeMatchOnly8>(m_matchOnly8.code().executableAddress())(input, start, length, 0, freeParenContext, parenContextSize));
+    }
+
+    MatchResult execute(const UChar* input, unsigned start, unsigned length, void* freeParenContext, unsigned parenContextSize)
+    {
+        ASSERT(has16BitCodeMatchOnly());
+        return MatchResult(reinterpret_cast<YarrJITCodeMatchOnly16>(m_matchOnly16.code().executableAddress())(input, start, length, 0, freeParenContext, parenContextSize));
+    }
+#else
     MatchResult execute(const LChar* input, unsigned start, unsigned length, int* output)
     {
         ASSERT(has8BitCode());
@@ -104,6 +141,7 @@
         ASSERT(has16BitCodeMatchOnly());
         return MatchResult(reinterpret_cast<YarrJITCodeMatchOnly16>(m_matchOnly16.code().executableAddress())(input, start, length));
     }
+#endif
 
 #if ENABLE(REGEXP_TRACING)
     void *get8BitMatchOnlyAddr()

Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.cpp (225929 => 225930)


--- trunk/Source/_javascript_Core/yarr/YarrPattern.cpp	2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.cpp	2017-12-14 22:16:38 UTC (rev 225930)
@@ -828,8 +828,7 @@
                 // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
                 term.frameLocation = currentCallFrameSize;
                 if (term.quantityMaxCount == 1 && !term.parentheses.isCopy) {
-                    if (term.quantityType != QuantifierFixedCount)
-                        currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+                    currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce;
                     error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet(), currentCallFrameSize);
                     if (error)
                         return error;
@@ -845,11 +844,10 @@
                     term.inputPosition = currentInputPosition.unsafeGet();
                 } else {
                     term.inputPosition = currentInputPosition.unsafeGet();
-                    unsigned ignoredCallFrameSize;
-                    error = setupDisjunctionOffsets(term.parentheses.disjunction, 0, currentInputPosition.unsafeGet(), ignoredCallFrameSize);
+                    currentCallFrameSize += YarrStackSpaceForBackTrackInfoParentheses;
+                    error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet(), currentCallFrameSize);
                     if (error)
                         return error;
-                    currentCallFrameSize += YarrStackSpaceForBackTrackInfoParentheses;
                 }
                 // Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length.
                 alternative->m_hasFixedSize = false;
@@ -1185,7 +1183,7 @@
     *error = compile(pattern, stackLimit);
 }
 
-static void indentForNestingLevel(PrintStream& out, unsigned nestingDepth)
+void indentForNestingLevel(PrintStream& out, unsigned nestingDepth)
 {
     out.print("    ");
     for (; nestingDepth; --nestingDepth)
@@ -1192,7 +1190,7 @@
         out.print("  ");
 }
 
-static void dumpUChar32(PrintStream& out, UChar32 c)
+void dumpUChar32(PrintStream& out, UChar32 c)
 {
     if (c >= ' '&& c <= 0xff)
         out.printf("'%c'", static_cast<char>(c));
@@ -1200,6 +1198,79 @@
         out.printf("0x%04x", c);
 }
 
+void dumpCharacterClass(PrintStream& out, YarrPattern* pattern, CharacterClass* characterClass)
+{
+    if (characterClass == pattern->anyCharacterClass())
+        out.print("<any character>");
+    else if (characterClass == pattern->newlineCharacterClass())
+        out.print("<newline>");
+    else if (characterClass == pattern->digitsCharacterClass())
+        out.print("<digits>");
+    else if (characterClass == pattern->spacesCharacterClass())
+        out.print("<whitespace>");
+    else if (characterClass == pattern->wordcharCharacterClass())
+        out.print("<word>");
+    else if (characterClass == pattern->wordUnicodeIgnoreCaseCharCharacterClass())
+        out.print("<unicode ignore case>");
+    else if (characterClass == pattern->nondigitsCharacterClass())
+        out.print("<non-digits>");
+    else if (characterClass == pattern->nonspacesCharacterClass())
+        out.print("<non-whitespace>");
+    else if (characterClass == pattern->nonwordcharCharacterClass())
+        out.print("<non-word>");
+    else if (characterClass == pattern->nonwordUnicodeIgnoreCaseCharCharacterClass())
+        out.print("<unicode non-ignore case>");
+    else {
+        bool needMatchesRangesSeperator = false;
+
+        auto dumpMatches = [&] (const char* prefix, Vector<UChar32> matches) {
+            size_t matchesSize = matches.size();
+            if (matchesSize) {
+                if (needMatchesRangesSeperator)
+                    out.print(",");
+                needMatchesRangesSeperator = true;
+
+                out.print(prefix, ":(");
+                for (size_t i = 0; i < matchesSize; ++i) {
+                    if (i)
+                        out.print(",");
+                    dumpUChar32(out, matches[i]);
+                }
+                out.print(")");
+            }
+        };
+
+        auto dumpRanges = [&] (const char* prefix, Vector<CharacterRange> ranges) {
+            size_t rangeSize = ranges.size();
+            if (rangeSize) {
+                if (needMatchesRangesSeperator)
+                    out.print(",");
+                needMatchesRangesSeperator = true;
+
+                out.print(prefix, " ranges:(");
+                for (size_t i = 0; i < rangeSize; ++i) {
+                    if (i)
+                        out.print(",");
+                    CharacterRange range = ranges[i];
+                    out.print("(");
+                    dumpUChar32(out, range.begin);
+                    out.print("..");
+                    dumpUChar32(out, range.end);
+                    out.print(")");
+                }
+                out.print(")");
+            }
+        };
+
+        out.print("[");
+        dumpMatches("ASCII", characterClass->m_matches);
+        dumpRanges("ASCII", characterClass->m_ranges);
+        dumpMatches("Unicode", characterClass->m_matchesUnicode);
+        dumpRanges("Unicode", characterClass->m_rangesUnicode);
+        out.print("]");
+    }
+}
+
 void PatternAlternative::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth)
 {
     out.print("minimum size: ", m_minimumSize);
@@ -1239,8 +1310,10 @@
 {
     indentForNestingLevel(out, nestingDepth);
 
-    if (invert() && (type != TypeParenthesesSubpattern && type != TypeParentheticalAssertion))
-        out.print("not ");
+    if (type != TypeParenthesesSubpattern && type != TypeParentheticalAssertion) {
+        if (invert())
+            out.print("not ");
+    }
 
     switch (type) {
     case TypeAssertionBOL:
@@ -1254,6 +1327,7 @@
         break;
     case TypePatternCharacter:
         out.printf("character ");
+        out.printf("inputPosition %u ", inputPosition);
         if (thisPattern->ignoreCase() && isASCIIAlpha(patternCharacter)) {
             dumpUChar32(out, toASCIIUpper(patternCharacter));
             out.print("/");
@@ -1375,16 +1449,17 @@
         if (parentheses.isTerminal)
             out.print(",terminal");
 
-        if (quantityMaxCount != 1 || parentheses.isCopy || quantityType != QuantifierFixedCount)
-            out.println(",frame location ", frameLocation);
-        else
-            out.println();
+        out.println(",frame location ", frameLocation);
 
         if (parentheses.disjunction->m_alternatives.size() > 1) {
             indentForNestingLevel(out, nestingDepth + 1);
             unsigned alternativeFrameLocation = frameLocation;
-            if (quantityType != QuantifierFixedCount)
+            if (quantityMaxCount == 1 && !parentheses.isCopy)
                 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+            else if (parentheses.isTerminal)
+                alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesTerminal;
+            else
+                alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
             out.println("alternative list,frame location ", alternativeFrameLocation);
         }
 
@@ -1461,6 +1536,8 @@
         out.print(")");
     }
     out.print(":\n");
+    if (m_body->m_callFrameSize)
+        out.print("    callframe size: ", m_body->m_callFrameSize, "\n");
     m_body->dump(out, this);
 }
 

Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.h (225929 => 225930)


--- trunk/Source/_javascript_Core/yarr/YarrPattern.h	2017-12-14 22:15:25 UTC (rev 225929)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.h	2017-12-14 22:16:38 UTC (rev 225930)
@@ -227,7 +227,13 @@
     {
         return m_capture;
     }
-    
+
+    bool containsAnyCaptures()
+    {
+        ASSERT(this->type == TypeParenthesesSubpattern);
+        return parentheses.lastSubpatternId >= parentheses.subpatternId;
+    }
+
     void quantify(unsigned count, QuantifierType type)
     {
         quantityMinCount = 0;
@@ -549,6 +555,10 @@
     HashMap<unsigned, CharacterClass*> unicodePropertiesCached;
 };
 
+    void indentForNestingLevel(PrintStream&, unsigned);
+    void dumpUChar32(PrintStream&, UChar32);
+    void dumpCharacterClass(PrintStream&, YarrPattern*, CharacterClass*);
+
     struct BackTrackInfoPatternCharacter {
         uintptr_t begin; // Only needed for unicode patterns
         uintptr_t matchAmount;
@@ -574,9 +584,9 @@
     };
 
     struct BackTrackInfoAlternative {
-        uintptr_t offset;
-
-        static unsigned offsetIndex() { return offsetof(BackTrackInfoAlternative, offset) / sizeof(uintptr_t); }
+        union {
+            uintptr_t offset;
+        };
     };
 
     struct BackTrackInfoParentheticalAssertion {
@@ -587,8 +597,10 @@
 
     struct BackTrackInfoParenthesesOnce {
         uintptr_t begin;
+        uintptr_t returnAddress;
 
         static unsigned beginIndex() { return offsetof(BackTrackInfoParenthesesOnce, begin) / sizeof(uintptr_t); }
+        static unsigned returnAddressIndex() { return offsetof(BackTrackInfoParenthesesOnce, returnAddress) / sizeof(uintptr_t); }
     };
 
     struct BackTrackInfoParenthesesTerminal {
@@ -597,4 +609,16 @@
         static unsigned beginIndex() { return offsetof(BackTrackInfoParenthesesTerminal, begin) / sizeof(uintptr_t); }
     };
 
+    struct BackTrackInfoParentheses {
+        uintptr_t begin;
+        uintptr_t returnAddress;
+        uintptr_t matchAmount;
+        uintptr_t parenContextHead;
+
+        static unsigned beginIndex() { return offsetof(BackTrackInfoParentheses, begin) / sizeof(uintptr_t); }
+        static unsigned returnAddressIndex() { return offsetof(BackTrackInfoParentheses, returnAddress) / sizeof(uintptr_t); }
+        static unsigned matchAmountIndex() { return offsetof(BackTrackInfoParentheses, matchAmount) / sizeof(uintptr_t); }
+        static unsigned parenContextHeadIndex() { return offsetof(BackTrackInfoParentheses, parenContextHead) / sizeof(uintptr_t); }
+    };
+
 } } // namespace JSC::Yarr
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to