Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (235635 => 235636)
--- trunk/Source/_javascript_Core/ChangeLog 2018-09-04 21:18:22 UTC (rev 235635)
+++ trunk/Source/_javascript_Core/ChangeLog 2018-09-04 21:18:58 UTC (rev 235636)
@@ -1,3 +1,52 @@
+2018-09-04 Michael Saboff <msab...@apple.com>
+
+ YARR: JIT RegExps with back references
+ https://bugs.webkit.org/show_bug.cgi?id=180874
+
+ Reviewed by Filip Pizlo.
+
+ Implemented JIT'ed back references for all counted types. The only type of back references
+ not handled in the JIT are 16bit matches that ignore case. Such support would require the
+ canonicalization that is currently handled in the Yarr interpreter via a C funtion call.
+ The back reference processing for surrogate pairs is implemented by individually comparing
+ each surrogate ala memcmp.
+
+ Added a generated canonicalization table for the LChar (8bit) domain to process case
+ ignored back references.
+
+ Added macro assembler load16(ExtendedAddress) for indexed access to the canonicalization table.
+
+ Added a new JIT failure reason for forward references as the check to JIT expressions with
+ forward references we're handled synonimously those containing back references.
+
+ This change is only enabled for 64 bit platforms.
+
+ * assembler/MacroAssemblerARM64.h:
+ (JSC::MacroAssemblerARM64::load16):
+ * assembler/MacroAssemblerX86_64.h:
+ (JSC::MacroAssemblerX86_64::load16):
+ * runtime/RegExp.cpp:
+ (JSC::RegExp::compile):
+ (JSC::RegExp::compileMatchOnly):
+ * yarr/YarrCanonicalize.h:
+ * yarr/YarrCanonicalizeUCS2.cpp:
+ * yarr/YarrCanonicalizeUCS2.js:
+ (set characters.hex.set string_appeared_here):
+ * yarr/YarrJIT.cpp:
+ (JSC::Yarr::YarrGenerator::checkNotEnoughInput):
+ (JSC::Yarr::YarrGenerator::readCharacterDontDecodeSurrogates):
+ (JSC::Yarr::YarrGenerator::matchBackreference):
+ (JSC::Yarr::YarrGenerator::generateBackReference):
+ (JSC::Yarr::YarrGenerator::backtrackBackReference):
+ (JSC::Yarr::YarrGenerator::generateTerm):
+ (JSC::Yarr::YarrGenerator::backtrackTerm):
+ (JSC::Yarr::YarrGenerator::compile):
+ (JSC::Yarr::dumpCompileFailure):
+ * yarr/YarrJIT.h:
+ * yarr/YarrPattern.h:
+ (JSC::Yarr::BackTrackInfoBackReference::beginIndex):
+ (JSC::Yarr::BackTrackInfoBackReference::matchAmountIndex):
+
2018-09-04 Mark Lam <mark....@apple.com>
Make the jsc shell print, printErr, and debug functions more robust.
Modified: trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj (235635 => 235636)
--- trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj 2018-09-04 21:18:22 UTC (rev 235635)
+++ trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj 2018-09-04 21:18:58 UTC (rev 235636)
@@ -9966,7 +9966,7 @@
);
runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh;
- shellScript = "set -e\n\nmkdir -p \"${BUILT_PRODUCTS_DIR}/LLIntOffsets/\"\n\n/usr/bin/env ruby \"${SRCROOT}/offlineasm/generate_offset_extractor.rb\" \"-I${BUILT_PRODUCTS_DIR}/DerivedSources/_javascript_Core\" \"${SRCROOT}/llint/LowLevelInterpreter.asm\" \"${BUILT_PRODUCTS_DIR}/LLIntOffsets/LLIntDesiredOffsets.h\" \"${ARCHS} C_LOOP\"\n";
+ shellScript = "set -e\n\nmkdir -p \"${BUILT_PRODUCTS_DIR}/LLIntOffsets/${CURRENT_ARCH}\"\n\n/usr/bin/env ruby \"${SRCROOT}/offlineasm/generate_offset_extractor.rb\" \"-I${BUILT_PRODUCTS_DIR}/DerivedSources/_javascript_Core\" \"${SRCROOT}/llint/LowLevelInterpreter.asm\" \"${BUILT_PRODUCTS_DIR}/LLIntOffsets/${CURRENT_ARCH}/LLIntDesiredOffsets.h\" \"${ARCHS} C_LOOP\"\n";
};
1A02D9A81B34A882000D1522 /* Add Symlink in /System/Library/PrivateFrameworks */ = {
isa = PBXShellScriptBuildPhase;
@@ -10660,6 +10660,7 @@
buildSettings = {
HEADER_SEARCH_PATHS = (
"\"${BUILT_PRODUCTS_DIR}/DerivedSources/_javascript_Core\"",
+ "\"${BUILT_PRODUCTS_DIR}/LLIntOffsets/${CURRENT_ARCH}\"",
"\"$(_javascript_CORE_FRAMEWORKS_DIR)/_javascript_Core.framework/PrivateHeaders\"",
"$(inherited)",
);
@@ -10672,6 +10673,7 @@
buildSettings = {
HEADER_SEARCH_PATHS = (
"\"${BUILT_PRODUCTS_DIR}/DerivedSources/_javascript_Core\"",
+ "\"${BUILT_PRODUCTS_DIR}/LLIntOffsets/${CURRENT_ARCH}\"",
"\"$(_javascript_CORE_FRAMEWORKS_DIR)/_javascript_Core.framework/PrivateHeaders\"",
"$(inherited)",
);
@@ -10684,6 +10686,7 @@
buildSettings = {
HEADER_SEARCH_PATHS = (
"\"${BUILT_PRODUCTS_DIR}/DerivedSources/_javascript_Core\"",
+ "\"${BUILT_PRODUCTS_DIR}/LLIntOffsets/${CURRENT_ARCH}\"",
"\"$(_javascript_CORE_FRAMEWORKS_DIR)/_javascript_Core.framework/PrivateHeaders\"",
"$(inherited)",
);
@@ -10696,6 +10699,7 @@
buildSettings = {
HEADER_SEARCH_PATHS = (
"\"${BUILT_PRODUCTS_DIR}/DerivedSources/_javascript_Core\"",
+ "\"${BUILT_PRODUCTS_DIR}/LLIntOffsets/${CURRENT_ARCH}\"",
"\"$(_javascript_CORE_FRAMEWORKS_DIR)/_javascript_Core.framework/PrivateHeaders\"",
"$(inherited)",
);
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h (235635 => 235636)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h 2018-09-04 21:18:22 UTC (rev 235635)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerARM64.h 2018-09-04 21:18:58 UTC (rev 235636)
@@ -1218,7 +1218,15 @@
m_assembler.add<64>(memoryTempRegister, memoryTempRegister, address.index, Assembler::UXTX, address.scale);
m_assembler.ldrh(dest, address.base, memoryTempRegister);
}
-
+
+ void load16(ExtendedAddress address, RegisterID dest)
+ {
+ moveToCachedReg(TrustedImmPtr(reinterpret_cast<void*>(address.offset)), cachedMemoryTempRegister());
+ m_assembler.ldrh(dest, memoryTempRegister, address.base, Assembler::UXTX, 1);
+ if (dest == memoryTempRegister)
+ cachedMemoryTempRegister().invalidate();
+ }
+
void load16Unaligned(ImplicitAddress address, RegisterID dest)
{
load16(address, dest);
Modified: trunk/Source/_javascript_Core/assembler/MacroAssemblerX86_64.h (235635 => 235636)
--- trunk/Source/_javascript_Core/assembler/MacroAssemblerX86_64.h 2018-09-04 21:18:22 UTC (rev 235635)
+++ trunk/Source/_javascript_Core/assembler/MacroAssemblerX86_64.h 2018-09-04 21:18:58 UTC (rev 235636)
@@ -100,6 +100,23 @@
load8(dest, dest);
}
+ void load16(ExtendedAddress address, RegisterID dest)
+ {
+ TrustedImmPtr addr(reinterpret_cast<void*>(address.offset));
+ MacroAssemblerX86Common::move(addr, scratchRegister());
+ MacroAssemblerX86Common::load16(BaseIndex(scratchRegister(), address.base, TimesTwo), dest);
+ }
+
+ void load16(BaseIndex address, RegisterID dest)
+ {
+ MacroAssemblerX86Common::load16(address, dest);
+ }
+
+ void load16(Address address, RegisterID dest)
+ {
+ MacroAssemblerX86Common::load16(address, dest);
+ }
+
void load32(const void* address, RegisterID dest)
{
if (dest == X86Registers::eax)
Modified: trunk/Source/_javascript_Core/runtime/RegExp.cpp (235635 => 235636)
--- trunk/Source/_javascript_Core/runtime/RegExp.cpp 2018-09-04 21:18:22 UTC (rev 235635)
+++ trunk/Source/_javascript_Core/runtime/RegExp.cpp 2018-09-04 21:18:58 UTC (rev 235636)
@@ -305,7 +305,11 @@
}
#if ENABLE(YARR_JIT)
- if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && VM::canUseRegExpJIT()) {
+ if (!pattern.containsUnsignedLengthPattern() && VM::canUseRegExpJIT()
+#if !ENABLE(YARR_JIT_BACKREFERENCES)
+ && !pattern.m_containsBackreferences
+#endif
+ ) {
Yarr::jitCompile(pattern, m_patternString, charSize, vm, m_regExpJITCode);
if (!m_regExpJITCode.failureReason()) {
m_state = JITCode;
@@ -361,7 +365,11 @@
}
#if ENABLE(YARR_JIT)
- if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && VM::canUseRegExpJIT()) {
+ if (!pattern.containsUnsignedLengthPattern() && VM::canUseRegExpJIT()
+#if !ENABLE(YARR_JIT_BACKREFERENCES)
+ && !pattern.m_containsBackreferences
+#endif
+ ) {
Yarr::jitCompile(pattern, m_patternString, charSize, vm, m_regExpJITCode, Yarr::MatchOnly);
if (!m_regExpJITCode.failureReason()) {
m_state = JITCode;
Modified: trunk/Source/_javascript_Core/yarr/YarrCanonicalize.h (235635 => 235636)
--- trunk/Source/_javascript_Core/yarr/YarrCanonicalize.h 2018-09-04 21:18:22 UTC (rev 235635)
+++ trunk/Source/_javascript_Core/yarr/YarrCanonicalize.h 2018-09-04 21:18:58 UTC (rev 235636)
@@ -53,6 +53,7 @@
extern const size_t UCS2_CANONICALIZATION_RANGES;
extern const UChar32* const ucs2CharacterSetInfo[];
extern const CanonicalizationRange ucs2RangeInfo[];
+extern const uint16_t canonicalTableLChar[256];
extern const size_t UNICODE_CANONICALIZATION_RANGES;
extern const UChar32* const unicodeCharacterSetInfo[];
Modified: trunk/Source/_javascript_Core/yarr/YarrCanonicalizeUCS2.cpp (235635 => 235636)
--- trunk/Source/_javascript_Core/yarr/YarrCanonicalizeUCS2.cpp 2018-09-04 21:18:22 UTC (rev 235635)
+++ trunk/Source/_javascript_Core/yarr/YarrCanonicalizeUCS2.cpp 2018-09-04 21:18:58 UTC (rev 235636)
@@ -533,5 +533,24 @@
{ 0xff5b, 0xffff, 0x0000, CanonicalizeUnique },
};
+const uint16_t canonicalTableLChar[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0x39c, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xf7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0x178
+};
+
} } // JSC::Yarr
Modified: trunk/Source/_javascript_Core/yarr/YarrCanonicalizeUCS2.js (235635 => 235636)
--- trunk/Source/_javascript_Core/yarr/YarrCanonicalizeUCS2.js 2018-09-04 21:18:22 UTC (rev 235635)
+++ trunk/Source/_javascript_Core/yarr/YarrCanonicalizeUCS2.js 2018-09-04 21:18:58 UTC (rev 235636)
@@ -183,6 +183,24 @@
}
print("};");
print();
+ // Create canonical table for LChar domain
+ let line = "const uint16_t canonicalTableLChar[256] = {";
+ for (let i = 0; i < 256; i++) {
+ if (!(i % 16)) {
+ print(line);
+ line = " ";
+ }
+ let canonicalChar = canonicalize(i);
+ line = line + (canonicalChar < 16 ? "0x0" : "0x") + canonicalChar.toString(16);
+ if ((i % 16) != 15)
+ line += ", ";
+ else if (i != 255)
+ line += ",";
+ }
+ print(line);
+ print("};");
+ print();
+
}
printHeader();
Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.cpp (235635 => 235636)
--- trunk/Source/_javascript_Core/yarr/YarrJIT.cpp 2018-09-04 21:18:22 UTC (rev 235635)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.cpp 2018-09-04 21:18:58 UTC (rev 235636)
@@ -464,6 +464,12 @@
return branch32(BelowOrEqual, index, length);
}
+ Jump checkNotEnoughInput(RegisterID additionalAmount)
+ {
+ add32(index, additionalAmount);
+ return branch32(Above, additionalAmount, length);
+ }
+
Jump checkInput()
{
return branch32(BelowOrEqual, index, length);
@@ -546,6 +552,16 @@
}
#endif
+ void readCharacterDontDecodeSurrogates(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
+ {
+ BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
+
+ if (m_charSize == Char8)
+ load8(address, resultReg);
+ else
+ load16Unaligned(address, resultReg);
+ }
+
void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
{
BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
@@ -1106,6 +1122,228 @@
backtrackTermDefault(opIndex);
}
+#if ENABLE(YARR_JIT_BACKREFERENCES)
+ void matchBackreference(size_t opIndex, JumpList& characterMatchFails, RegisterID character, RegisterID patternIndex, RegisterID patternCharacter)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+ unsigned subpatternId = term->backReferenceSubpatternId;
+
+ Label loop(this);
+
+ readCharacterDontDecodeSurrogates(0, patternCharacter, patternIndex);
+ readCharacterDontDecodeSurrogates(m_checkedOffset - term->inputPosition, character);
+
+ if (!m_pattern.ignoreCase())
+ characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
+ else {
+ Jump charactersMatch = branch32(Equal, character, patternCharacter);
+ ExtendedAddress characterTableEntry(character, reinterpret_cast<intptr_t>(&canonicalTableLChar));
+ load16(characterTableEntry, character);
+ ExtendedAddress patternTableEntry(patternCharacter, reinterpret_cast<intptr_t>(&canonicalTableLChar));
+ load16(patternTableEntry, patternCharacter);
+ characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
+ charactersMatch.link(this);
+ }
+
+
+ add32(TrustedImm32(1), index);
+ add32(TrustedImm32(1), patternIndex);
+
+ branch32(NotEqual, patternIndex, Address(output, ((subpatternId << 1) + 1) * sizeof(int))).linkTo(loop, this);
+ }
+
+ void generateBackReference(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ if (m_pattern.ignoreCase() && m_charSize != Char8) {
+ m_failureReason = JITFailureReason::BackReference;
+ return;
+ }
+
+ unsigned subpatternId = term->backReferenceSubpatternId;
+ unsigned parenthesesFrameLocation = term->frameLocation;
+
+ const RegisterID characterOrTemp = regT0;
+ const RegisterID patternIndex = regT1;
+ const RegisterID patternTemp = regT2;
+
+ storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
+ if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1)
+ storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+
+ JumpList matches;
+
+ if (term->quantityType != QuantifierNonGreedy) {
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
+
+ // An empty match is successful without consuming characters
+ if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1) {
+ matches.append(branch32(Equal, TrustedImm32(-1), patternIndex));
+ matches.append(branch32(Equal, patternIndex, patternTemp));
+ } else {
+ Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
+ Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
+ zeroLengthMatch.link(this);
+ storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ matches.append(jump());
+ tryNonZeroMatch.link(this);
+ }
+ }
+
+ switch (term->quantityType) {
+ case QuantifierFixedCount: {
+ Label outerLoop(this);
+
+ // PatternTemp should contain pattern end index at this point
+ sub32(patternIndex, patternTemp);
+ if (m_checkedOffset - term->inputPosition)
+ sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
+ op.m_jumps.append(checkNotEnoughInput(patternTemp));
+
+ matchBackreference(opIndex, op.m_jumps, characterOrTemp, patternIndex, patternTemp);
+
+ if (term->quantityMaxCount != 1) {
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
+ add32(TrustedImm32(1), characterOrTemp);
+ storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
+ jump(outerLoop);
+ }
+ matches.link(this);
+ break;
+ }
+
+ case QuantifierGreedy: {
+ JumpList incompleteMatches;
+
+ Label outerLoop(this);
+
+ // PatternTemp should contain pattern end index at this point
+ sub32(patternIndex, patternTemp);
+ if (m_checkedOffset - term->inputPosition)
+ sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
+ matches.append(checkNotEnoughInput(patternTemp));
+
+ matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
+
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
+ add32(TrustedImm32(1), characterOrTemp);
+ storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ if (term->quantityMaxCount != quantifyInfinite)
+ matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
+
+ // Store current index in frame for restoring after a partial match
+ storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
+ jump(outerLoop);
+
+ incompleteMatches.link(this);
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
+
+ matches.link(this);
+ op.m_reentry = label();
+ break;
+ }
+
+ case QuantifierNonGreedy: {
+ JumpList incompleteMatches;
+
+ matches.append(jump());
+
+ op.m_reentry = label();
+
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
+
+ // An empty match is successful without consuming characters
+ Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
+ Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
+ zeroLengthMatch.link(this);
+ storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ matches.append(jump());
+ tryNonZeroMatch.link(this);
+
+ // Check if we have input remaining to match
+ sub32(patternIndex, patternTemp);
+ if (m_checkedOffset - term->inputPosition)
+ sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
+ matches.append(checkNotEnoughInput(patternTemp));
+
+ storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
+
+ matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
+
+ matches.append(jump());
+
+ incompleteMatches.link(this);
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
+
+ matches.link(this);
+ break;
+ }
+ }
+ }
+ void backtrackBackReference(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ unsigned subpatternId = term->backReferenceSubpatternId;
+
+ m_backtrackingState.link(this);
+ op.m_jumps.link(this);
+
+ JumpList failures;
+
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ switch (term->quantityType) {
+ case QuantifierFixedCount:
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
+ break;
+
+ case QuantifierGreedy: {
+ const RegisterID matchAmount = regT0;
+ const RegisterID patternStartIndex = regT1;
+ const RegisterID patternEndIndexOrLen = regT2;
+
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
+ failures.append(branchTest32(Zero, matchAmount));
+
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternStartIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternEndIndexOrLen);
+ sub32(patternStartIndex, patternEndIndexOrLen);
+ sub32(patternEndIndexOrLen, index);
+
+ sub32(TrustedImm32(1), matchAmount);
+ storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ jump(op.m_reentry);
+ break;
+ }
+
+ case QuantifierNonGreedy: {
+ const RegisterID matchAmount = regT0;
+
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
+ if (term->quantityMaxCount != quantifyInfinite)
+ failures.append(branch32(AboveOrEqual, Imm32(term->quantityMaxCount.unsafeGet()), matchAmount));
+ add32(TrustedImm32(1), matchAmount);
+ storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ jump(op.m_reentry);
+ break;
+ }
+ }
+ failures.link(this);
+ m_backtrackingState.fallthrough();
+ }
+#endif
+
void generatePatternCharacterOnce(size_t opIndex)
{
YarrOp& op = m_ops[opIndex];
@@ -1854,13 +2092,19 @@
break;
case PatternTerm::TypeForwardReference:
+ m_failureReason = JITFailureReason::ForwardReference;
break;
case PatternTerm::TypeParenthesesSubpattern:
case PatternTerm::TypeParentheticalAssertion:
RELEASE_ASSERT_NOT_REACHED();
+
case PatternTerm::TypeBackReference:
+#if ENABLE(YARR_JIT_BACKREFERENCES)
+ generateBackReference(opIndex);
+#else
m_failureReason = JITFailureReason::BackReference;
+#endif
break;
case PatternTerm::TypeDotStarEnclosure:
generateDotStarEnclosure(opIndex);
@@ -1920,6 +2164,7 @@
break;
case PatternTerm::TypeForwardReference:
+ m_failureReason = JITFailureReason::ForwardReference;
break;
case PatternTerm::TypeParenthesesSubpattern:
@@ -1926,13 +2171,17 @@
case PatternTerm::TypeParentheticalAssertion:
RELEASE_ASSERT_NOT_REACHED();
+ case PatternTerm::TypeBackReference:
+#if ENABLE(YARR_JIT_BACKREFERENCES)
+ backtrackBackReference(opIndex);
+#else
+ m_failureReason = JITFailureReason::BackReference;
+#endif
+ break;
+
case PatternTerm::TypeDotStarEnclosure:
backtrackDotStarEnclosure(opIndex);
break;
-
- case PatternTerm::TypeBackReference:
- m_failureReason = JITFailureReason::BackReference;
- break;
}
}
@@ -3566,6 +3815,15 @@
}
#endif
+ if (m_pattern.m_containsBackreferences
+#if ENABLE(YARR_JIT_BACKREFERENCES)
+ && (compileMode == MatchOnly || (m_pattern.ignoreCase() && m_charSize != Char8))
+#endif
+ ) {
+ codeBlock.setFallBackWithFailureReason(JITFailureReason::BackReference);
+ return;
+ }
+
// We need to compile before generating code since we set flags based on compilation that
// are used during generation.
opCompileBody(m_pattern.m_body);
@@ -3713,6 +3971,11 @@
out.print("Assert EOL");
break;
+ case PatternTerm::TypeBackReference:
+ out.printf("BackReference pattern #%u", term->backReferenceSubpatternId);
+ term->dumpQuantifier(out);
+ break;
+
case PatternTerm::TypePatternCharacter:
out.print("TypePatternCharacter ");
dumpUChar32(out, term->patternCharacter);
@@ -3739,7 +4002,9 @@
break;
case PatternTerm::TypeForwardReference:
- case PatternTerm::TypeBackReference:
+ out.print("TypeForwardReference <not handled>");
+ break;
+
case PatternTerm::TypeParenthesesSubpattern:
case PatternTerm::TypeParentheticalAssertion:
RELEASE_ASSERT_NOT_REACHED();
@@ -3853,7 +4118,7 @@
return(0);
case OpParentheticalAssertionEnd:
- out.print("OpParentheticalAssertionEnd%s\n", term->invert() ? " inverted" : "");
+ out.printf("OpParentheticalAssertionEnd%s\n", term->invert() ? " inverted" : "");
return(0);
case OpMatchFailed:
@@ -3917,8 +4182,11 @@
dataLog("Can't JIT a pattern decoding surrogate pairs\n");
break;
case JITFailureReason::BackReference:
- dataLog("Can't JIT a pattern containing back references\n");
+ dataLog("Can't JIT some patterns containing back references\n");
break;
+ case JITFailureReason::ForwardReference:
+ dataLog("Can't JIT a pattern containing forward references\n");
+ break;
case JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum:
dataLog("Can't JIT a pattern containing a variable counted parenthesis with a non-zero minimum\n");
break;
Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.h (235635 => 235636)
--- trunk/Source/_javascript_Core/yarr/YarrJIT.h 2018-09-04 21:18:22 UTC (rev 235635)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.h 2018-09-04 21:18:58 UTC (rev 235636)
@@ -52,6 +52,7 @@
enum class JITFailureReason : uint8_t {
DecodeSurrogatePair,
BackReference,
+ ForwardReference,
VariableCountedParenthesisWithNonZeroMinimum,
ParenthesizedSubpattern,
FixedCountParenthesizedSubpattern,
Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.h (235635 => 235636)
--- trunk/Source/_javascript_Core/yarr/YarrPattern.h 2018-09-04 21:18:22 UTC (rev 235635)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.h 2018-09-04 21:18:58 UTC (rev 235636)
@@ -555,8 +555,8 @@
uintptr_t begin; // Not really needed for greedy quantifiers.
uintptr_t matchAmount; // Not really needed for fixed quantifiers.
- unsigned beginIndex() { return offsetof(BackTrackInfoBackReference, begin) / sizeof(uintptr_t); }
- unsigned matchAmountIndex() { return offsetof(BackTrackInfoBackReference, matchAmount) / sizeof(uintptr_t); }
+ static unsigned beginIndex() { return offsetof(BackTrackInfoBackReference, begin) / sizeof(uintptr_t); }
+ static unsigned matchAmountIndex() { return offsetof(BackTrackInfoBackReference, matchAmount) / sizeof(uintptr_t); }
};
struct BackTrackInfoAlternative {
Modified: trunk/Source/WTF/ChangeLog (235635 => 235636)
--- trunk/Source/WTF/ChangeLog 2018-09-04 21:18:22 UTC (rev 235635)
+++ trunk/Source/WTF/ChangeLog 2018-09-04 21:18:58 UTC (rev 235636)
@@ -1,3 +1,15 @@
+2018-09-04 Michael Saboff <msab...@apple.com>
+
+ YARR: JIT RegExps with back references
+ https://bugs.webkit.org/show_bug.cgi?id=180874
+
+ Reviewed by Filip Pizlo.
+
+ Added ENABLE_YARR_JIT_BACKREFERENCES to enable RegExp JIT'ing of back references
+ for 64 bit platforms only.
+
+ * wtf/Platform.h:
+
2018-08-31 Antti Koivisto <an...@apple.com>
Replace OptionSet |= and -= operators with add() and remove() functions
Modified: trunk/Source/WTF/wtf/Platform.h (235635 => 235636)
--- trunk/Source/WTF/wtf/Platform.h 2018-09-04 21:18:22 UTC (rev 235635)
+++ trunk/Source/WTF/wtf/Platform.h 2018-09-04 21:18:58 UTC (rev 235636)
@@ -973,8 +973,9 @@
#if ENABLE(YARR_JIT)
#if CPU(ARM64) || (CPU(X86_64) && !OS(WINDOWS))
-/* Enable JIT'ing Regular Expressions that have nested parenthesis. */
+/* Enable JIT'ing Regular Expressions that have nested parenthesis and back references. */
#define ENABLE_YARR_JIT_ALL_PARENS_EXPRESSIONS 1
+#define ENABLE_YARR_JIT_BACKREFERENCES 1
#endif
#endif