Diff
Modified: branches/safari-534-branch/Source/_javascript_Core/ChangeLog (87233 => 87234)
--- branches/safari-534-branch/Source/_javascript_Core/ChangeLog 2011-05-25 00:12:27 UTC (rev 87233)
+++ branches/safari-534-branch/Source/_javascript_Core/ChangeLog 2011-05-25 00:16:33 UTC (rev 87234)
@@ -1,3 +1,30 @@
+2011-05-24 Lucas Forschler <lforsch...@apple.com>
+
+ Merged r87109.
+
+ 2011-05-23 Gavin Barraclough <barraclo...@apple.com>
+
+ Reviewed by Geoff Garen.
+
+ https://bugs.webkit.org/show_bug.cgi?id=61306
+
+ The begin characters optimization currently has issues (#61129),
+ and does not appear to still be a performance win. The prudent
+ next step seems to be to disable while we ascertain whether this
+ is still a useful performance optimization.
+
+ * yarr/YarrInterpreter.cpp:
+ (JSC::Yarr::Interpreter::matchDisjunction):
+ (JSC::Yarr::Interpreter::interpret):
+ * yarr/YarrInterpreter.h:
+ (JSC::Yarr::BytecodePattern::BytecodePattern):
+ * yarr/YarrPattern.cpp:
+ (JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
+ (JSC::Yarr::YarrPattern::compile):
+ (JSC::Yarr::YarrPattern::YarrPattern):
+ * yarr/YarrPattern.h:
+ (JSC::Yarr::YarrPattern::reset):
+
2011-05-24 Steve Falkenburg <sfal...@apple.com>
Reviewed by Adam Roben.
Modified: branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.cpp (87233 => 87234)
--- branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.cpp 2011-05-25 00:12:27 UTC (rev 87233)
+++ branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.cpp 2011-05-25 00:16:33 UTC (rev 87234)
@@ -1044,39 +1044,10 @@
return JSRegExpErrorNoMatch;
}
- void lookupForBeginChars()
- {
- int character;
- bool firstSingleCharFound;
-
- while (true) {
- if (input.isNotAvailableInput(2))
- return;
-
- firstSingleCharFound = false;
-
- character = input.readPair();
-
- for (unsigned i = 0; i < pattern->m_beginChars.size(); ++i) {
- BeginChar bc = pattern->m_beginChars[i];
-
- if (!firstSingleCharFound && bc.value <= 0xFFFF) {
- firstSingleCharFound = true;
- character &= 0xFFFF;
- }
-
- if ((character | bc.mask) == bc.value)
- return;
- }
-
- input.next();
- }
- }
-
#define MATCH_NEXT() { ++context->term; goto matchAgain; }
#define BACKTRACK() { --context->term; goto backtrack; }
#define currentTerm() (disjunction->terms[context->term])
- JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false, bool isBody = false)
+ JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
{
if (!--remainingMatchCount)
return JSRegExpErrorHitLimit;
@@ -1084,9 +1055,6 @@
if (btrack)
BACKTRACK();
- if (pattern->m_containsBeginChars && isBody)
- lookupForBeginChars();
-
context->matchBegin = input.getPos();
context->term = 0;
@@ -1264,9 +1232,6 @@
input.next();
- if (pattern->m_containsBeginChars && isBody)
- lookupForBeginChars();
-
context->matchBegin = input.getPos();
if (currentTerm().alternative.onceThrough)
@@ -1395,7 +1360,7 @@
DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
- JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false, true);
+ JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
if (result == JSRegExpMatch) {
output[0] = context->matchBegin;
output[1] = context->matchEnd;
Modified: branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.h (87233 => 87234)
--- branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.h 2011-05-25 00:12:27 UTC (rev 87233)
+++ branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.h 2011-05-25 00:16:33 UTC (rev 87234)
@@ -328,7 +328,6 @@
: m_body(body)
, m_ignoreCase(pattern.m_ignoreCase)
, m_multiline(pattern.m_multiline)
- , m_containsBeginChars(pattern.m_containsBeginChars)
, m_allocator(allocator)
{
newlineCharacterClass = pattern.newlineCharacterClass();
@@ -340,8 +339,6 @@
// array, so that it won't delete them on destruction. We'll
// take responsibility for that.
pattern.m_userCharacterClasses.clear();
-
- m_beginChars.append(pattern.m_beginChars);
}
~BytecodePattern()
@@ -353,7 +350,6 @@
OwnPtr<ByteDisjunction> m_body;
bool m_ignoreCase;
bool m_multiline;
- bool m_containsBeginChars;
// Each BytecodePattern is associated with a RegExp, each RegExp is associated
// with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator.
BumpPointerAllocator* m_allocator;
@@ -361,8 +357,6 @@
CharacterClass* newlineCharacterClass;
CharacterClass* wordcharCharacterClass;
- Vector<BeginChar> m_beginChars;
-
private:
Vector<ByteDisjunction*> m_allParenthesesInfo;
Vector<CharacterClass*> m_userCharacterClasses;
Modified: branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.cpp (87233 => 87234)
--- branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.cpp 2011-05-25 00:12:27 UTC (rev 87233)
+++ branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.cpp 2011-05-25 00:16:33 UTC (rev 87234)
@@ -234,117 +234,11 @@
Vector<CharacterRange> m_rangesUnicode;
};
-struct BeginCharHelper {
- BeginCharHelper(Vector<BeginChar>* beginChars, bool isCaseInsensitive = false)
- : m_beginChars(beginChars)
- , m_isCaseInsensitive(isCaseInsensitive)
- {}
-
- void addBeginChar(BeginChar beginChar, Vector<TermChain>* hotTerms, QuantifierType quantityType, unsigned quantityCount)
- {
- if (quantityType == QuantifierFixedCount && quantityCount > 1) {
- // We duplicate the first found character if the quantity of the term is more than one. eg.: /a{3}/
- beginChar.value |= beginChar.value << 16;
- beginChar.mask |= beginChar.mask << 16;
- addCharacter(beginChar);
- } else if (quantityType == QuantifierFixedCount && quantityCount == 1 && hotTerms->size())
- // In case of characters with fixed quantifier we should check the next character as well.
- linkHotTerms(beginChar, hotTerms);
- else
- // In case of greedy matching the next character checking is unnecessary therefore we just store
- // the first character.
- addCharacter(beginChar);
- }
-
- // Merge two following BeginChars in the vector to reduce the number of character checks.
- void merge(unsigned size)
- {
- for (unsigned i = 0; i < size; i++) {
- BeginChar* curr = &m_beginChars->at(i);
- BeginChar* next = &m_beginChars->at(i + 1);
-
- // If the current and the next size of value is different we should skip the merge process
- // because the 16bit and 32bit values are unmergable.
- if (curr->value <= 0xFFFF && next->value > 0xFFFF)
- continue;
-
- unsigned diff = curr->value ^ next->value;
-
- curr->mask |= diff;
- curr->value |= curr->mask;
-
- m_beginChars->remove(i + 1);
- size--;
- }
- }
-
-private:
- void addCharacter(BeginChar beginChar)
- {
- unsigned pos = 0;
- unsigned range = m_beginChars->size();
-
- // binary chop, find position to insert char.
- while (range) {
- unsigned index = range >> 1;
-
- int val = m_beginChars->at(pos+index).value - beginChar.value;
- if (!val)
- return;
- if (val < 0)
- range = index;
- else {
- pos += (index+1);
- range -= (index+1);
- }
- }
-
- if (pos == m_beginChars->size())
- m_beginChars->append(beginChar);
- else
- m_beginChars->insert(pos, beginChar);
- }
-
- // Create BeginChar objects by appending each terms from a hotTerms vector to an existing BeginChar object.
- void linkHotTerms(BeginChar beginChar, Vector<TermChain>* hotTerms)
- {
- for (unsigned i = 0; i < hotTerms->size(); i++) {
- PatternTerm hotTerm = hotTerms->at(i).term;
- ASSERT(hotTerm.type == PatternTerm::TypePatternCharacter);
-
- UChar characterNext = hotTerm.patternCharacter;
-
- // Append a character to an existing BeginChar object.
- if (characterNext <= 0x7f) {
- unsigned mask = 0;
-
- if (m_isCaseInsensitive && isASCIIAlpha(characterNext)) {
- mask = 32;
- characterNext = toASCIILower(characterNext);
- }
-
- addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask | (mask << 16)));
- } else {
- UChar upper, lower;
- if (m_isCaseInsensitive && ((upper = Unicode::toUpper(characterNext)) != (lower = Unicode::toLower(characterNext)))) {
- addCharacter(BeginChar(beginChar.value | (upper << 16), beginChar.mask));
- addCharacter(BeginChar(beginChar.value | (lower << 16), beginChar.mask));
- } else
- addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask));
- }
- }
- }
-
- Vector<BeginChar>* m_beginChars;
- bool m_isCaseInsensitive;
-};
-
class YarrPatternConstructor {
public:
YarrPatternConstructor(YarrPattern& pattern)
: m_pattern(pattern)
, m_characterClassConstructor(pattern.m_ignoreCase)
- , m_beginCharHelper(&pattern.m_beginChars, pattern.m_ignoreCase)
, m_invertParentheticalAssertion(false)
{
m_pattern.m_body = new PatternDisjunction();
@@ -781,144 +675,10 @@
}
}
- // This function collects the terms which are potentially matching the first number of depth characters in the result.
- // If this function returns false then it found at least one term which makes the beginning character
- // look-up optimization inefficient.
- bool setupDisjunctionBeginTerms(PatternDisjunction* disjunction, Vector<TermChain>* beginTerms, unsigned depth)
- {
- for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
- PatternAlternative* alternative = disjunction->m_alternatives[alt];
-
- if (!setupAlternativeBeginTerms(alternative, beginTerms, 0, depth))
- return false;
- }
-
- return true;
- }
-
- bool setupAlternativeBeginTerms(PatternAlternative* alternative, Vector<TermChain>* beginTerms, unsigned termIndex, unsigned depth)
- {
- bool checkNext = true;
- unsigned numTerms = alternative->m_terms.size();
-
- while (checkNext && termIndex < numTerms) {
- PatternTerm term = alternative->m_terms[termIndex];
- checkNext = false;
-
- switch (term.type) {
- case PatternTerm::TypeAssertionBOL:
- case PatternTerm::TypeAssertionEOL:
- case PatternTerm::TypeAssertionWordBoundary:
- return false;
-
- case PatternTerm::TypeBackReference:
- case PatternTerm::TypeForwardReference:
- return false;
-
- case PatternTerm::TypePatternCharacter:
- if (termIndex != numTerms - 1) {
- beginTerms->append(TermChain(term));
- termIndex++;
- checkNext = true;
- } else if (term.quantityType == QuantifierFixedCount) {
- beginTerms->append(TermChain(term));
- if (depth < 2 && termIndex < numTerms - 1 && term.quantityCount == 1)
- if (!setupAlternativeBeginTerms(alternative, &beginTerms->last().hotTerms, termIndex + 1, depth + 1))
- return false;
- }
-
- break;
-
- case PatternTerm::TypeCharacterClass:
- return false;
-
- case PatternTerm::TypeParentheticalAssertion:
- if (term.invert())
- return false;
-
- case PatternTerm::TypeParenthesesSubpattern:
- if (term.quantityType != QuantifierFixedCount) {
- if (termIndex == numTerms - 1)
- break;
-
- termIndex++;
- checkNext = true;
- }
-
- if (!setupDisjunctionBeginTerms(term.parentheses.disjunction, beginTerms, depth))
- return false;
-
- break;
- }
- }
-
- return true;
- }
-
- void setupBeginChars()
- {
- Vector<TermChain> beginTerms;
- bool containsFixedCharacter = false;
-
- if ((!m_pattern.m_body->m_hasFixedSize || m_pattern.m_body->m_alternatives.size() > 1)
- && setupDisjunctionBeginTerms(m_pattern.m_body, &beginTerms, 0)) {
- unsigned size = beginTerms.size();
-
- // If we haven't collected any terms we should abort the preparation of beginning character look-up optimization.
- if (!size)
- return;
-
- m_pattern.m_containsBeginChars = true;
-
- for (unsigned i = 0; i < size; i++) {
- PatternTerm term = beginTerms[i].term;
-
- // We have just collected PatternCharacter terms, other terms are not allowed.
- ASSERT(term.type == PatternTerm::TypePatternCharacter);
-
- if (term.quantityType == QuantifierFixedCount)
- containsFixedCharacter = true;
-
- UChar character = term.patternCharacter;
- unsigned mask = 0;
-
- if (character <= 0x7f) {
- if (m_pattern.m_ignoreCase && isASCIIAlpha(character)) {
- mask = 32;
- character = toASCIILower(character);
- }
-
- m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
- } else {
- UChar upper, lower;
- if (m_pattern.m_ignoreCase && ((upper = Unicode::toUpper(character)) != (lower = Unicode::toLower(character)))) {
- m_beginCharHelper.addBeginChar(BeginChar(upper, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
- m_beginCharHelper.addBeginChar(BeginChar(lower, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
- } else
- m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
- }
- }
-
- // If the pattern doesn't contain terms with fixed quantifiers then the beginning character look-up optimization is inefficient.
- if (!containsFixedCharacter) {
- m_pattern.m_containsBeginChars = false;
- return;
- }
-
- size = m_pattern.m_beginChars.size();
-
- if (size > 2)
- m_beginCharHelper.merge(size - 1);
- else if (size <= 1)
- m_pattern.m_containsBeginChars = false;
- }
- }
-
private:
YarrPattern& m_pattern;
PatternAlternative* m_alternative;
CharacterClassConstructor m_characterClassConstructor;
- BeginCharHelper m_beginCharHelper;
bool m_invertCharacterClass;
bool m_invertParentheticalAssertion;
};
@@ -951,7 +711,6 @@
constructor.optimizeBOL();
constructor.setupOffsets();
- constructor.setupBeginChars();
return 0;
}
@@ -960,7 +719,6 @@
: m_ignoreCase(ignoreCase)
, m_multiline(multiline)
, m_containsBackreferences(false)
- , m_containsBeginChars(false)
, m_containsBOL(false)
, m_numSubpatterns(0)
, m_maxBackReference(0)
Modified: branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.h (87233 => 87234)
--- branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.h 2011-05-25 00:12:27 UTC (rev 87233)
+++ branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.h 2011-05-25 00:16:33 UTC (rev 87234)
@@ -297,21 +297,6 @@
Vector<TermChain> hotTerms;
};
-struct BeginChar {
- BeginChar()
- : value(0)
- , mask(0)
- {}
-
- BeginChar(unsigned value, unsigned mask)
- : value(value)
- , mask(mask)
- {}
-
- unsigned value;
- unsigned mask;
-};
-
struct YarrPattern {
YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error);
@@ -327,7 +312,6 @@
m_maxBackReference = 0;
m_containsBackreferences = false;
- m_containsBeginChars = false;
m_containsBOL = false;
newlineCached = 0;
@@ -342,7 +326,6 @@
m_disjunctions.clear();
deleteAllValues(m_userCharacterClasses);
m_userCharacterClasses.clear();
- m_beginChars.clear();
}
bool containsIllegalBackReference()
@@ -396,14 +379,12 @@
bool m_ignoreCase : 1;
bool m_multiline : 1;
bool m_containsBackreferences : 1;
- bool m_containsBeginChars : 1;
bool m_containsBOL : 1;
unsigned m_numSubpatterns;
unsigned m_maxBackReference;
PatternDisjunction* m_body;
Vector<PatternDisjunction*, 4> m_disjunctions;
Vector<CharacterClass*> m_userCharacterClasses;
- Vector<BeginChar> m_beginChars;
private:
const char* compile(const UString& patternString);