Diff
Modified: trunk/LayoutTests/ChangeLog (90961 => 90962)
--- trunk/LayoutTests/ChangeLog 2011-07-13 23:00:52 UTC (rev 90961)
+++ trunk/LayoutTests/ChangeLog 2011-07-13 23:05:40 UTC (rev 90962)
@@ -1,3 +1,16 @@
+2011-07-13 Michael Saboff <msab...@apple.com>
+
+ https://bugs.webkit.org/show_bug.cgi?id=64202
+ Enh: Improve handling of RegExp in the form of /.*blah.*/
+
+ Reviewed by Gavin Barraclough.
+
+ New tests to support /.*<_expression_>.*/ enhancement.
+
+ * fast/regex/dotstar-expected.txt: Added.
+ * fast/regex/dotstar.html: Added.
+ * fast/regex/script-tests/dotstar.js: Added.
+
2011-07-13 Vincent Scheib <sch...@chromium.org>
[chromium] Update chromium test expectations.
Added: trunk/LayoutTests/fast/regex/dotstar-expected.txt (0 => 90962)
--- trunk/LayoutTests/fast/regex/dotstar-expected.txt (rev 0)
+++ trunk/LayoutTests/fast/regex/dotstar-expected.txt 2011-07-13 23:05:40 UTC (rev 90962)
@@ -0,0 +1,118 @@
+This page tests handling of parentheses subexpressions.
+
+On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
+
+
+PASS regexp1.exec('test') is null
+PASS regexp1.exec('blah') is ['blah']
+PASS regexp1.exec('1blah') is ['1blah']
+PASS regexp1.exec('blah1') is ['blah1']
+PASS regexp1.exec('blah blah blah') is ['blah blah blah']
+PASS regexp1.exec('blah\nsecond') is ['blah']
+PASS regexp1.exec('first\nblah') is ['blah']
+PASS regexp1.exec('first\nblah\nthird') is ['blah']
+PASS regexp1.exec('first\nblah2\nblah3') is ['blah2']
+PASS regexp2.exec('test') is null
+PASS regexp2.exec('blah') is ['blah']
+PASS regexp2.exec('1blah') is ['1blah']
+PASS regexp2.exec('blah1') is ['blah1']
+PASS regexp2.exec('blah blah blah') is ['blah blah blah']
+PASS regexp2.exec('blah\nsecond') is ['blah']
+PASS regexp2.exec('first\nblah') is null
+PASS regexp2.exec('first\nblah\nthird') is null
+PASS regexp2.exec('first\nblah2\nblah3') is null
+PASS regexp3.exec('test') is null
+PASS regexp3.exec('blah') is ['blah']
+PASS regexp3.exec('1blah') is ['1blah']
+PASS regexp3.exec('blah1') is ['blah1']
+PASS regexp3.exec('blah blah blah') is ['blah blah blah']
+PASS regexp3.exec('blah\nsecond') is null
+PASS regexp3.exec('first\nblah') is ['blah']
+PASS regexp3.exec('first\nblah\nthird') is null
+PASS regexp3.exec('first\nblah2\nblah3') is ['blah3']
+PASS regexp4.exec('test') is null
+PASS regexp4.exec('blah') is ['blah']
+PASS regexp4.exec('1blah') is ['1blah']
+PASS regexp4.exec('blah1') is ['blah1']
+PASS regexp4.exec('blah blah blah') is ['blah blah blah']
+PASS regexp4.exec('blah\nsecond') is null
+PASS regexp4.exec('first\nblah') is null
+PASS regexp4.exec('first\nblah\nthird') is null
+PASS regexp4.exec('first\nblah2\nblah3') is null
+PASS regexp5.exec('test') is null
+PASS regexp5.exec('blah') is ['blah']
+PASS regexp5.exec('1blah') is ['1blah']
+PASS regexp5.exec('blah1') is ['blah1']
+PASS regexp5.exec('blah blah blah') is ['blah blah blah']
+PASS regexp5.exec('blah\nsecond') is ['blah']
+PASS regexp5.exec('first\nblah') is ['blah']
+PASS regexp5.exec('first\nblah\nthird') is ['blah']
+PASS regexp5.exec('first\nblah2\nblah3') is ['blah2']
+PASS regexp6.exec('test') is null
+PASS regexp6.exec('blah') is ['blah']
+PASS regexp6.exec('1blah') is ['1blah']
+PASS regexp6.exec('blah1') is ['blah']
+PASS regexp6.exec('blah blah blah') is ['blah blah blah']
+PASS regexp6.exec('blah\nsecond') is ['blah']
+PASS regexp6.exec('first\nblah') is ['blah']
+PASS regexp6.exec('first\nblah\nthird') is ['blah']
+PASS regexp6.exec('first\nblah2\nblah3') is ['blah']
+PASS regexp7.exec('test') is null
+PASS regexp7.exec('blah') is ['blah']
+PASS regexp7.exec('1blah') is ['1blah']
+PASS regexp7.exec('blah1') is ['blah1']
+PASS regexp7.exec('blah blah blah') is ['blah blah blah']
+PASS regexp7.exec('blah\nsecond') is null
+PASS regexp7.exec('first\nblah') is null
+PASS regexp7.exec('first\nblah\nthird') is null
+PASS regexp7.exec('first\nblah2\nblah3') is null
+PASS regexp8.exec('test') is null
+PASS regexp8.exec('blah') is ['blah','']
+PASS regexp8.exec('1blah') is ['1blah','1']
+PASS regexp8.exec('blah1') is ['blah1','']
+PASS regexp8.exec('blah blah blah') is ['blah blah blah','blah blah ']
+PASS regexp8.exec('blah\nsecond') is null
+PASS regexp8.exec('first\nblah') is null
+PASS regexp8.exec('first\nblah\nthird') is null
+PASS regexp8.exec('first\nblah2\nblah3') is null
+PASS regexp9.exec('test') is null
+PASS regexp9.exec('blah') is ['blah']
+PASS regexp9.exec('1blah') is ['1blah']
+PASS regexp9.exec('blah1') is ['blah1']
+PASS regexp9.exec('blah blah blah') is ['blah blah blah']
+PASS regexp9.exec('blah\nsecond') is ['blah']
+PASS regexp9.exec('first\nblah') is ['blah']
+PASS regexp9.exec('first\nblah\nthird') is ['blah']
+PASS regexp9.exec('first\nblah2\nblah3') is ['blah2']
+PASS regexp10.exec('test') is null
+PASS regexp10.exec('blah') is ['blah']
+PASS regexp10.exec('1blah') is ['1blah']
+PASS regexp10.exec('blah1') is ['blah1']
+PASS regexp10.exec('blah blah blah') is ['blah blah blah']
+PASS regexp10.exec('blah\nsecond') is ['blah']
+PASS regexp10.exec('first\nblah') is ['blah']
+PASS regexp10.exec('first\nblah\nthird') is ['blah']
+PASS regexp10.exec('first\nblah2\nblah3') is ['blah2']
+PASS regexp11.exec('test') is null
+PASS regexp11.exec('blah') is ['blah']
+PASS regexp11.exec('1blah') is ['1blah']
+PASS regexp11.exec('blah1') is ['blah1']
+PASS regexp11.exec('blah blah blah') is ['blah blah blah']
+PASS regexp11.exec('blah\nsecond') is null
+PASS regexp11.exec('first\nblah') is ['blah']
+PASS regexp11.exec('first\nblah\nthird') is null
+PASS regexp11.exec('first\nblah2\nblah3') is ['blah3']
+PASS regexp12.exec('test') is null
+PASS regexp12.exec('blah') is ['blah']
+PASS regexp12.exec('1blah') is ['1blah']
+PASS regexp12.exec('blah1') is ['blah1']
+PASS regexp12.exec('blah blah blah') is ['blah blah blah']
+PASS regexp12.exec('blah\nsecond') is null
+PASS regexp12.exec('first\nblah') is ['blah']
+PASS regexp12.exec('first\nblah\nthird') is null
+PASS regexp12.exec('first\nblah2\nblah3') is ['blah3']
+PASS regexp13.exec('abc\n123') is ['abc\n123']
+PASS successfullyParsed is true
+
+TEST COMPLETE
+
Added: trunk/LayoutTests/fast/regex/dotstar.html (0 => 90962)
--- trunk/LayoutTests/fast/regex/dotstar.html (rev 0)
+++ trunk/LayoutTests/fast/regex/dotstar.html 2011-07-13 23:05:40 UTC (rev 90962)
@@ -0,0 +1,13 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<html>
+<head>
+<link rel="stylesheet" href=""
+<script src=""
+</head>
+<body>
+<p id="description"></p>
+<div id="console"></div>
+<script src=""
+<script src=""
+</body>
+</html>
Added: trunk/LayoutTests/fast/regex/script-tests/dotstar.js (0 => 90962)
--- trunk/LayoutTests/fast/regex/script-tests/dotstar.js (rev 0)
+++ trunk/LayoutTests/fast/regex/script-tests/dotstar.js 2011-07-13 23:05:40 UTC (rev 90962)
@@ -0,0 +1,139 @@
+description("This page tests handling of parentheses subexpressions.");
+
+var regexp1 = /.*blah.*/;
+shouldBeNull("regexp1.exec('test')");
+shouldBe("regexp1.exec('blah')", "['blah']");
+shouldBe("regexp1.exec('1blah')", "['1blah']");
+shouldBe("regexp1.exec('blah1')", "['blah1']");
+shouldBe("regexp1.exec('blah blah blah')", "['blah blah blah']");
+shouldBe("regexp1.exec('blah\\nsecond')", "['blah']");
+shouldBe("regexp1.exec('first\\nblah')", "['blah']");
+shouldBe("regexp1.exec('first\\nblah\\nthird')", "['blah']");
+shouldBe("regexp1.exec('first\\nblah2\\nblah3')", "['blah2']");
+
+var regexp2 = /^.*blah.*/;
+shouldBeNull("regexp2.exec('test')");
+shouldBe("regexp2.exec('blah')", "['blah']");
+shouldBe("regexp2.exec('1blah')", "['1blah']");
+shouldBe("regexp2.exec('blah1')", "['blah1']");
+shouldBe("regexp2.exec('blah blah blah')", "['blah blah blah']");
+shouldBe("regexp2.exec('blah\\nsecond')", "['blah']");
+shouldBeNull("regexp2.exec('first\\nblah')");
+shouldBeNull("regexp2.exec('first\\nblah\\nthird')");
+shouldBeNull("regexp2.exec('first\\nblah2\\nblah3')");
+
+var regexp3 = /.*blah.*$/;
+shouldBeNull("regexp3.exec('test')");
+shouldBe("regexp3.exec('blah')", "['blah']");
+shouldBe("regexp3.exec('1blah')", "['1blah']");
+shouldBe("regexp3.exec('blah1')", "['blah1']");
+shouldBe("regexp3.exec('blah blah blah')", "['blah blah blah']");
+shouldBeNull("regexp3.exec('blah\\nsecond')");
+shouldBe("regexp3.exec('first\\nblah')", "['blah']");
+shouldBeNull("regexp3.exec('first\\nblah\\nthird')");
+shouldBe("regexp3.exec('first\\nblah2\\nblah3')", "['blah3']");
+
+var regexp4 = /^.*blah.*$/;
+shouldBeNull("regexp4.exec('test')");
+shouldBe("regexp4.exec('blah')", "['blah']");
+shouldBe("regexp4.exec('1blah')", "['1blah']");
+shouldBe("regexp4.exec('blah1')", "['blah1']");
+shouldBe("regexp4.exec('blah blah blah')", "['blah blah blah']");
+shouldBeNull("regexp4.exec('blah\\nsecond')");
+shouldBeNull("regexp4.exec('first\\nblah')");
+shouldBeNull("regexp4.exec('first\\nblah\\nthird')");
+shouldBeNull("regexp4.exec('first\\nblah2\\nblah3')");
+
+var regexp5 = /.*?blah.*/;
+shouldBeNull("regexp5.exec('test')");
+shouldBe("regexp5.exec('blah')", "['blah']");
+shouldBe("regexp5.exec('1blah')", "['1blah']");
+shouldBe("regexp5.exec('blah1')", "['blah1']");
+shouldBe("regexp5.exec('blah blah blah')", "['blah blah blah']");
+shouldBe("regexp5.exec('blah\\nsecond')", "['blah']");
+shouldBe("regexp5.exec('first\\nblah')", "['blah']");
+shouldBe("regexp5.exec('first\\nblah\\nthird')", "['blah']");
+shouldBe("regexp5.exec('first\\nblah2\\nblah3')", "['blah2']");
+
+var regexp6 = /.*blah.*?/;
+shouldBeNull("regexp6.exec('test')");
+shouldBe("regexp6.exec('blah')", "['blah']");
+shouldBe("regexp6.exec('1blah')", "['1blah']");
+shouldBe("regexp6.exec('blah1')", "['blah']");
+shouldBe("regexp6.exec('blah blah blah')", "['blah blah blah']");
+shouldBe("regexp6.exec('blah\\nsecond')", "['blah']");
+shouldBe("regexp6.exec('first\\nblah')", "['blah']");
+shouldBe("regexp6.exec('first\\nblah\\nthird')", "['blah']");
+shouldBe("regexp6.exec('first\\nblah2\\nblah3')", "['blah']");
+
+var regexp7 = /^.*?blah.*?$/;
+shouldBeNull("regexp7.exec('test')");
+shouldBe("regexp7.exec('blah')", "['blah']");
+shouldBe("regexp7.exec('1blah')", "['1blah']");
+shouldBe("regexp7.exec('blah1')", "['blah1']");
+shouldBe("regexp7.exec('blah blah blah')", "['blah blah blah']");
+shouldBeNull("regexp7.exec('blah\\nsecond')");
+shouldBeNull("regexp7.exec('first\\nblah')");
+shouldBeNull("regexp7.exec('first\\nblah\\nthird')");
+shouldBeNull("regexp7.exec('first\\nblah2\\nblah3')");
+
+var regexp8 = /^(.*)blah.*$/;
+shouldBeNull("regexp8.exec('test')");
+shouldBe("regexp8.exec('blah')", "['blah','']");
+shouldBe("regexp8.exec('1blah')", "['1blah','1']");
+shouldBe("regexp8.exec('blah1')", "['blah1','']");
+shouldBe("regexp8.exec('blah blah blah')", "['blah blah blah','blah blah ']");
+shouldBeNull("regexp8.exec('blah\\nsecond')");
+shouldBeNull("regexp8.exec('first\\nblah')");
+shouldBeNull("regexp8.exec('first\\nblah\\nthird')");
+shouldBeNull("regexp8.exec('first\\nblah2\\nblah3')");
+
+var regexp9 = /.*blah.*/m;
+shouldBeNull("regexp9.exec('test')");
+shouldBe("regexp9.exec('blah')", "['blah']");
+shouldBe("regexp9.exec('1blah')", "['1blah']");
+shouldBe("regexp9.exec('blah1')", "['blah1']");
+shouldBe("regexp9.exec('blah blah blah')", "['blah blah blah']");
+shouldBe("regexp9.exec('blah\\nsecond')", "['blah']");
+shouldBe("regexp9.exec('first\\nblah')", "['blah']");
+shouldBe("regexp9.exec('first\\nblah\\nthird')", "['blah']");
+shouldBe("regexp9.exec('first\\nblah2\\nblah3')", "['blah2']");
+
+var regexp10 = /^.*blah.*/m;
+shouldBeNull("regexp10.exec('test')");
+shouldBe("regexp10.exec('blah')", "['blah']");
+shouldBe("regexp10.exec('1blah')", "['1blah']");
+shouldBe("regexp10.exec('blah1')", "['blah1']");
+shouldBe("regexp10.exec('blah blah blah')", "['blah blah blah']");
+shouldBe("regexp10.exec('blah\\nsecond')", "['blah']");
+shouldBe("regexp10.exec('first\\nblah')", "['blah']");
+shouldBe("regexp10.exec('first\\nblah\\nthird')", "['blah']");
+shouldBe("regexp10.exec('first\\nblah2\\nblah3')", "['blah2']");
+
+var regexp11 = /.*(?:blah).*$/;
+shouldBeNull("regexp11.exec('test')");
+shouldBe("regexp11.exec('blah')", "['blah']");
+shouldBe("regexp11.exec('1blah')", "['1blah']");
+shouldBe("regexp11.exec('blah1')", "['blah1']");
+shouldBe("regexp11.exec('blah blah blah')", "['blah blah blah']");
+shouldBeNull("regexp11.exec('blah\\nsecond')");
+shouldBe("regexp11.exec('first\\nblah')", "['blah']");
+shouldBeNull("regexp11.exec('first\\nblah\\nthird')");
+shouldBe("regexp11.exec('first\\nblah2\\nblah3')", "['blah3']");
+
+var regexp12 = /.*(?:blah|buzz|bang).*$/;
+shouldBeNull("regexp12.exec('test')");
+shouldBe("regexp12.exec('blah')", "['blah']");
+shouldBe("regexp12.exec('1blah')", "['1blah']");
+shouldBe("regexp12.exec('blah1')", "['blah1']");
+shouldBe("regexp12.exec('blah blah blah')", "['blah blah blah']");
+shouldBeNull("regexp12.exec('blah\\nsecond')");
+shouldBe("regexp12.exec('first\\nblah')", "['blah']");
+shouldBeNull("regexp12.exec('first\\nblah\\nthird')");
+shouldBe("regexp12.exec('first\\nblah2\\nblah3')", "['blah3']");
+
+var regexp13 = /.*\n\d+.*/;
+shouldBe("regexp13.exec('abc\\n123')", "['abc\\n123']");
+
+var successfullyParsed = true;
+
Modified: trunk/Source/_javascript_Core/ChangeLog (90961 => 90962)
--- trunk/Source/_javascript_Core/ChangeLog 2011-07-13 23:00:52 UTC (rev 90961)
+++ trunk/Source/_javascript_Core/ChangeLog 2011-07-13 23:05:40 UTC (rev 90962)
@@ -1,3 +1,48 @@
+2011-07-13 Michael Saboff <msab...@apple.com>
+
+ https://bugs.webkit.org/show_bug.cgi?id=64202
+ Enh: Improve handling of RegExp in the form of /.*blah.*/
+
+ Reviewed by Gavin Barraclough.
+
+ Added code to both the Yarr interpreter and JIT to handle
+ these expressions a little differently. First off, the terms
+ in between the leading and trailing .*'s cannot capture and
+ also this enhancement is limited to single alternative expressions.
+ If an _expression_ is of the right form with the aforementioned
+ restrictions, we process the inner terms and then look for the
+ beginning of the string and end of the string. There is handling
+ for multiline expressions to allow the beginning and end to be
+ right after and right before newlines.
+
+ This enhancement speeds up expressions of this type 12x on
+ a MacBookPro.
+
+ Cleaned up 'case' statement indentation.
+
+ A new set of tests was added as LayoutTests/fast/regex/dotstar.html
+
+ * yarr/YarrInterpreter.cpp:
+ (JSC::Yarr::Interpreter::InputStream::end):
+ (JSC::Yarr::Interpreter::matchDotStarEnclosure):
+ (JSC::Yarr::Interpreter::matchDisjunction):
+ (JSC::Yarr::ByteCompiler::assertionDotStarEnclosure):
+ (JSC::Yarr::ByteCompiler::emitDisjunction):
+ * yarr/YarrInterpreter.h:
+ (JSC::Yarr::ByteTerm::DotStarEnclosure):
+ * yarr/YarrJIT.cpp:
+ (JSC::Yarr::YarrGenerator::generateDotStarEnclosure):
+ (JSC::Yarr::YarrGenerator::backtrackDotStarEnclosure):
+ (JSC::Yarr::YarrGenerator::generateTerm):
+ (JSC::Yarr::YarrGenerator::backtrackTerm):
+ * yarr/YarrPattern.cpp:
+ (JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
+ (JSC::Yarr::YarrPatternConstructor::containsCapturingTerms):
+ (JSC::Yarr::YarrPatternConstructor::optimizeDotStarWrappedExpressions):
+ (JSC::Yarr::YarrPattern::compile):
+ * yarr/YarrPattern.h:
+ (JSC::Yarr::PatternTerm::PatternTerm):
+
2011-07-13 Xan Lopez <xlo...@igalia.com>
[GTK] Fix distcheck
Modified: trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp (90961 => 90962)
--- trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp 2011-07-13 23:00:52 UTC (rev 90961)
+++ trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp 2011-07-13 23:05:40 UTC (rev 90962)
@@ -243,6 +243,11 @@
return pos == length;
}
+ unsigned end()
+ {
+ return length;
+ }
+
bool checkInput(int count)
{
if ((pos + count) <= length) {
@@ -1044,6 +1049,38 @@
return JSRegExpErrorNoMatch;
}
+ bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
+ {
+ UNUSED_PARAM(term);
+ unsigned matchBegin = context->matchBegin;
+
+ if (matchBegin) {
+ for (matchBegin--; true; matchBegin--) {
+ if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
+ ++matchBegin;
+ break;
+ }
+
+ if (!matchBegin)
+ break;
+ }
+ }
+
+ unsigned matchEnd = input.getPos();
+
+ for (; (matchEnd != input.end())
+ && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
+
+ if (((matchBegin && term.anchors.m_bol)
+ || ((matchEnd != input.end()) && term.anchors.m_eol))
+ && !pattern->m_multiline)
+ return false;
+
+ context->matchBegin = matchBegin;
+ context->matchEnd = matchEnd;
+ return true;
+ }
+
#define MATCH_NEXT() { ++context->term; goto matchAgain; }
#define BACKTRACK() { --context->term; goto backtrack; }
#define currentTerm() (disjunction->terms[context->term])
@@ -1203,9 +1240,14 @@
MATCH_NEXT();
BACKTRACK();
- case ByteTerm::TypeUncheckInput:
- input.uncheckInput(currentTerm().checkInputCount);
- MATCH_NEXT();
+ case ByteTerm::TypeUncheckInput:
+ input.uncheckInput(currentTerm().checkInputCount);
+ MATCH_NEXT();
+
+ case ByteTerm::TypeDotStarEnclosure:
+ if (matchDotStarEnclosure(currentTerm(), context))
+ return JSRegExpMatch;
+ BACKTRACK();
}
// We should never fall-through to here.
@@ -1242,91 +1284,94 @@
case ByteTerm::TypeBodyAlternativeEnd:
ASSERT_NOT_REACHED();
- case ByteTerm::TypeAlternativeBegin:
- case ByteTerm::TypeAlternativeDisjunction: {
- int offset = currentTerm().alternative.next;
- context->term += offset;
- if (offset > 0)
- MATCH_NEXT();
- BACKTRACK();
- }
- case ByteTerm::TypeAlternativeEnd: {
- // We should never backtrack back into an alternative of the main body of the regex.
- BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
- unsigned offset = backTrack->offset;
- context->term -= offset;
- BACKTRACK();
- }
+ case ByteTerm::TypeAlternativeBegin:
+ case ByteTerm::TypeAlternativeDisjunction: {
+ int offset = currentTerm().alternative.next;
+ context->term += offset;
+ if (offset > 0)
+ MATCH_NEXT();
+ BACKTRACK();
+ }
+ case ByteTerm::TypeAlternativeEnd: {
+ // We should never backtrack back into an alternative of the main body of the regex.
+ BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
+ unsigned offset = backTrack->offset;
+ context->term -= offset;
+ BACKTRACK();
+ }
- case ByteTerm::TypeAssertionBOL:
- case ByteTerm::TypeAssertionEOL:
- case ByteTerm::TypeAssertionWordBoundary:
- BACKTRACK();
+ case ByteTerm::TypeAssertionBOL:
+ case ByteTerm::TypeAssertionEOL:
+ case ByteTerm::TypeAssertionWordBoundary:
+ BACKTRACK();
- case ByteTerm::TypePatternCharacterOnce:
- case ByteTerm::TypePatternCharacterFixed:
- case ByteTerm::TypePatternCharacterGreedy:
- case ByteTerm::TypePatternCharacterNonGreedy:
- if (backtrackPatternCharacter(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypePatternCasedCharacterOnce:
- case ByteTerm::TypePatternCasedCharacterFixed:
- case ByteTerm::TypePatternCasedCharacterGreedy:
- case ByteTerm::TypePatternCasedCharacterNonGreedy:
- if (backtrackPatternCasedCharacter(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeCharacterClass:
- if (backtrackCharacterClass(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeBackReference:
- if (backtrackBackReference(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpattern: {
- JSRegExpResult result = backtrackParentheses(currentTerm(), context);
+ case ByteTerm::TypePatternCharacterOnce:
+ case ByteTerm::TypePatternCharacterFixed:
+ case ByteTerm::TypePatternCharacterGreedy:
+ case ByteTerm::TypePatternCharacterNonGreedy:
+ if (backtrackPatternCharacter(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypePatternCasedCharacterOnce:
+ case ByteTerm::TypePatternCasedCharacterFixed:
+ case ByteTerm::TypePatternCasedCharacterGreedy:
+ case ByteTerm::TypePatternCasedCharacterNonGreedy:
+ if (backtrackPatternCasedCharacter(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeCharacterClass:
+ if (backtrackCharacterClass(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeBackReference:
+ if (backtrackBackReference(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpattern: {
+ JSRegExpResult result = backtrackParentheses(currentTerm(), context);
- if (result == JSRegExpMatch) {
- MATCH_NEXT();
- } else if (result != JSRegExpNoMatch)
- return result;
+ if (result == JSRegExpMatch) {
+ MATCH_NEXT();
+ } else if (result != JSRegExpNoMatch)
+ return result;
- BACKTRACK();
- }
- case ByteTerm::TypeParenthesesSubpatternOnceBegin:
- if (backtrackParenthesesOnceBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternOnceEnd:
- if (backtrackParenthesesOnceEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
- if (backtrackParenthesesTerminalBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
- if (backtrackParenthesesTerminalEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParentheticalAssertionBegin:
- if (backtrackParentheticalAssertionBegin(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
- case ByteTerm::TypeParentheticalAssertionEnd:
- if (backtrackParentheticalAssertionEnd(currentTerm(), context))
- MATCH_NEXT();
- BACKTRACK();
+ BACKTRACK();
+ }
+ case ByteTerm::TypeParenthesesSubpatternOnceBegin:
+ if (backtrackParenthesesOnceBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternOnceEnd:
+ if (backtrackParenthesesOnceEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
+ if (backtrackParenthesesTerminalBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
+ if (backtrackParenthesesTerminalEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionBegin:
+ if (backtrackParentheticalAssertionBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionEnd:
+ if (backtrackParentheticalAssertionEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
- case ByteTerm::TypeCheckInput:
- input.uncheckInput(currentTerm().checkInputCount);
- BACKTRACK();
+ case ByteTerm::TypeCheckInput:
+ input.uncheckInput(currentTerm().checkInputCount);
+ BACKTRACK();
- case ByteTerm::TypeUncheckInput:
- input.checkInput(currentTerm().checkInputCount);
- BACKTRACK();
+ case ByteTerm::TypeUncheckInput:
+ input.checkInput(currentTerm().checkInputCount);
+ BACKTRACK();
+
+ case ByteTerm::TypeDotStarEnclosure:
+ ASSERT_NOT_REACHED();
}
ASSERT_NOT_REACHED();
@@ -1559,6 +1604,11 @@
m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
}
+ void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
+ {
+ m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
+ }
+
unsigned popParenthesesStack()
{
ASSERT(m_parenthesesStack.size());
@@ -1838,6 +1888,10 @@
}
break;
}
+
+ case PatternTerm::TypeDotStarEnclosure:
+ assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
+ break;
}
}
}
Modified: trunk/Source/_javascript_Core/yarr/YarrInterpreter.h (90961 => 90962)
--- trunk/Source/_javascript_Core/yarr/YarrInterpreter.h 2011-07-13 23:00:52 UTC (rev 90961)
+++ trunk/Source/_javascript_Core/yarr/YarrInterpreter.h 2011-07-13 23:05:40 UTC (rev 90962)
@@ -71,6 +71,7 @@
TypeParentheticalAssertionEnd,
TypeCheckInput,
TypeUncheckInput,
+ TypeDotStarEnclosure,
} type;
union {
struct {
@@ -95,6 +96,10 @@
int end;
bool onceThrough;
} alternative;
+ struct {
+ bool m_bol : 1;
+ bool m_eol : 1;
+ } anchors;
unsigned checkInputCount;
};
unsigned frameLocation;
@@ -295,6 +300,14 @@
{
return ByteTerm(TypeSubpatternEnd);
}
+
+ static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
+ {
+ ByteTerm term(TypeDotStarEnclosure);
+ term.anchors.m_bol = bolAnchor;
+ term.anchors.m_eol = eolAnchor;
+ return term;
+ }
bool invert()
{
Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.cpp (90961 => 90962)
--- trunk/Source/_javascript_Core/yarr/YarrJIT.cpp 2011-07-13 23:00:52 UTC (rev 90961)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.cpp 2011-07-13 23:05:40 UTC (rev 90962)
@@ -995,6 +995,63 @@
m_backtrackingState.fallthrough();
}
+ void generateDotStarEnclosure(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ const RegisterID character = regT0;
+ const RegisterID matchPos = regT1;
+
+ JumpList foundBeginningNewLine;
+ JumpList saveStartIndex;
+ JumpList foundEndingNewLine;
+
+ if (m_pattern.m_body->m_hasFixedSize) {
+ move(index, matchPos);
+ sub32(Imm32(m_checked), matchPos);
+ } else
+ load32(Address(output), matchPos);
+
+ saveStartIndex.append(branchTest32(Zero, matchPos));
+ Label findBOLLoop(this);
+ sub32(TrustedImm32(1), matchPos);
+ load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
+ matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
+ branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this);
+ saveStartIndex.append(jump());
+
+ foundBeginningNewLine.link(this);
+ add32(TrustedImm32(1), matchPos); // Advance past newline
+ saveStartIndex.link(this);
+
+ if (!m_pattern.m_multiline && term->anchors.bolAnchor)
+ op.m_jumps.append(branchTest32(NonZero, matchPos));
+
+ store32(matchPos, Address(output));
+
+ move(index, matchPos);
+
+ Label findEOLLoop(this);
+ foundEndingNewLine.append(branch32(Equal, matchPos, length));
+ load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
+ matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
+ add32(TrustedImm32(1), matchPos);
+ jump(findEOLLoop);
+
+ foundEndingNewLine.link(this);
+
+ if (!m_pattern.m_multiline && term->anchors.eolAnchor)
+ op.m_jumps.append(branch32(NotEqual, matchPos, length));
+
+ move(matchPos, index);
+ }
+
+ void backtrackDotStarEnclosure(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
+
// Code generation/backtracking for simple terms
// (pattern characters, character classes, and assertions).
// These methods farm out work to the set of functions above.
@@ -1059,6 +1116,9 @@
case PatternTerm::TypeBackReference:
m_shouldFallBack = true;
break;
+ case PatternTerm::TypeDotStarEnclosure:
+ generateDotStarEnclosure(opIndex);
+ break;
}
}
void backtrackTerm(size_t opIndex)
@@ -1119,6 +1179,11 @@
case PatternTerm::TypeParenthesesSubpattern:
case PatternTerm::TypeParentheticalAssertion:
ASSERT_NOT_REACHED();
+
+ case PatternTerm::TypeDotStarEnclosure:
+ backtrackDotStarEnclosure(opIndex);
+ break;
+
case PatternTerm::TypeBackReference:
m_shouldFallBack = true;
break;
Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.cpp (90961 => 90962)
--- trunk/Source/_javascript_Core/yarr/YarrPattern.cpp 2011-07-13 23:00:52 UTC (rev 90961)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.cpp 2011-07-13 23:05:40 UTC (rev 90962)
@@ -582,6 +582,11 @@
term.frameLocation = currentCallFrameSize;
currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + YarrStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition);
break;
+
+ case PatternTerm::TypeDotStarEnclosure:
+ alternative->m_hasFixedSize = false;
+ term.inputPosition = initialInputPosition;
+ break;
}
}
@@ -675,6 +680,87 @@
}
}
+ bool containsCapturingTerms(PatternAlternative* alternative, size_t firstTermIndex, size_t lastTermIndex)
+ {
+ Vector<PatternTerm>& terms = alternative->m_terms;
+
+ for (size_t termIndex = firstTermIndex; termIndex <= lastTermIndex; ++termIndex) {
+ PatternTerm& term = terms[termIndex];
+
+ if (term.m_capture)
+ return true;
+
+ if (term.type == PatternTerm::TypeParenthesesSubpattern) {
+ PatternDisjunction* nestedDisjunction = term.parentheses.disjunction;
+ for (unsigned alt = 0; alt < nestedDisjunction->m_alternatives.size(); ++alt) {
+ if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt], 0, nestedDisjunction->m_alternatives[alt]->m_terms.size() - 1))
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ // This optimization identifies alternatives in the form of
+ // [^].*[?]<_expression_>.*[$] for expressions that don't have any
+ // capturing terms. The alternative is changed to <_expression_>
+ // followed by processing of the dot stars to find and adjust the
+ // beginning and the end of the match.
+ void optimizeDotStarWrappedExpressions()
+ {
+ Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
+ if (alternatives.size() != 1)
+ return;
+
+ PatternAlternative* alternative = alternatives[0];
+ Vector<PatternTerm>& terms = alternative->m_terms;
+ if (terms.size() >= 3) {
+ bool startsWithBOL = false;
+ bool endsWithEOL = false;
+ size_t termIndex, firstExpressionTerm, lastExpressionTerm;
+
+ termIndex = 0;
+ if (terms[termIndex].type == PatternTerm::TypeAssertionBOL) {
+ startsWithBOL = true;
+ ++termIndex;
+ }
+
+ PatternTerm& firstNonAnchorTerm = terms[termIndex];
+ if ((firstNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (firstNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || !((firstNonAnchorTerm.quantityType == QuantifierGreedy) || (firstNonAnchorTerm.quantityType == QuantifierNonGreedy)))
+ return;
+
+ firstExpressionTerm = termIndex + 1;
+
+ termIndex = terms.size() - 1;
+ if (terms[termIndex].type == PatternTerm::TypeAssertionEOL) {
+ endsWithEOL = true;
+ --termIndex;
+ }
+
+ PatternTerm& lastNonAnchorTerm = terms[termIndex];
+ if ((lastNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (lastNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || (lastNonAnchorTerm.quantityType != QuantifierGreedy))
+ return;
+
+ lastExpressionTerm = termIndex - 1;
+
+ if (firstExpressionTerm > lastExpressionTerm)
+ return;
+
+ if (!containsCapturingTerms(alternative, firstExpressionTerm, lastExpressionTerm)) {
+ for (termIndex = terms.size() - 1; termIndex > lastExpressionTerm; --termIndex)
+ terms.remove(termIndex);
+
+ for (termIndex = firstExpressionTerm; termIndex > 0; --termIndex)
+ terms.remove(termIndex - 1);
+
+ terms.append(PatternTerm(startsWithBOL, endsWithEOL));
+
+ m_pattern.m_containsBOL = false;
+ }
+ }
+ }
+
private:
YarrPattern& m_pattern;
PatternAlternative* m_alternative;
@@ -708,6 +794,7 @@
}
constructor.checkForTerminalParentheses();
+ constructor.optimizeDotStarWrappedExpressions();
constructor.optimizeBOL();
constructor.setupOffsets();
Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.h (90961 => 90962)
--- trunk/Source/_javascript_Core/yarr/YarrPattern.h 2011-07-13 23:00:52 UTC (rev 90961)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.h 2011-07-13 23:05:40 UTC (rev 90962)
@@ -96,6 +96,7 @@
TypeForwardReference,
TypeParenthesesSubpattern,
TypeParentheticalAssertion,
+ TypeDotStarEnclosure,
} type;
bool m_capture :1;
bool m_invert :1;
@@ -110,6 +111,10 @@
bool isCopy;
bool isTerminal;
} parentheses;
+ struct {
+ bool bolAnchor : 1;
+ bool eolAnchor : 1;
+ } anchors;
};
QuantifierType quantityType;
unsigned quantityCount;
@@ -168,6 +173,17 @@
quantityCount = 1;
}
+ PatternTerm(bool bolAnchor, bool eolAnchor)
+ : type(TypeDotStarEnclosure)
+ , m_capture(false)
+ , m_invert(false)
+ {
+ anchors.bolAnchor = bolAnchor;
+ anchors.eolAnchor = eolAnchor;
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
static PatternTerm ForwardReference()
{
return PatternTerm(TypeForwardReference);