Skip to site navigation (Press enter)

[webkit-changes] [87234] branches/safari-534-branch/Source/JavaScriptCore

lforschler Tue, 24 May 2011 17:16:40 -0700

Title: [87234] branches/safari-534-branch/Source/_javascript_Core

Revision: 87234
Author: lforsch...@apple.com
Date: 2011-05-24 17:16:33 -0700 (Tue, 24 May 2011)

Log Message

Merge r87109.


Modified Paths

branches/safari-534-branch/Source/_javascript_Core/ChangeLog
branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.cpp
branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.h
branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.cpp
branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.h

Diff

Modified: branches/safari-534-branch/Source/_javascript_Core/ChangeLog (87233 => 87234)


--- branches/safari-534-branch/Source/_javascript_Core/ChangeLog	2011-05-25 00:12:27 UTC (rev 87233)
+++ branches/safari-534-branch/Source/_javascript_Core/ChangeLog	2011-05-25 00:16:33 UTC (rev 87234)
@@ -1,3 +1,30 @@
+2011-05-24  Lucas Forschler  <lforsch...@apple.com>
+
+    Merged r87109.
+
+    2011-05-23  Gavin Barraclough  <barraclo...@apple.com>
+
+        Reviewed by Geoff Garen.
+
+        https://bugs.webkit.org/show_bug.cgi?id=61306
+
+        The begin characters optimization currently has issues (#61129),
+        and does not appear to still be a performance win. The prudent
+        next step seems to be to disable while we ascertain whether this
+        is still a useful performance optimization.
+
+        * yarr/YarrInterpreter.cpp:
+        (JSC::Yarr::Interpreter::matchDisjunction):
+        (JSC::Yarr::Interpreter::interpret):
+        * yarr/YarrInterpreter.h:
+        (JSC::Yarr::BytecodePattern::BytecodePattern):
+        * yarr/YarrPattern.cpp:
+        (JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
+        (JSC::Yarr::YarrPattern::compile):
+        (JSC::Yarr::YarrPattern::YarrPattern):
+        * yarr/YarrPattern.h:
+        (JSC::Yarr::YarrPattern::reset):
+
 2011-05-24  Steve Falkenburg  <sfal...@apple.com>
 
         Reviewed by Adam Roben.

Modified: branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.cpp (87233 => 87234)


--- branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.cpp	2011-05-25 00:12:27 UTC (rev 87233)
+++ branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.cpp	2011-05-25 00:16:33 UTC (rev 87234)
@@ -1044,39 +1044,10 @@
         return JSRegExpErrorNoMatch;
     }
 
-    void lookupForBeginChars()
-    {
-        int character;
-        bool firstSingleCharFound;
-
-        while (true) {
-            if (input.isNotAvailableInput(2))
-                return;
-
-            firstSingleCharFound = false;
-
-            character = input.readPair();
-
-            for (unsigned i = 0; i < pattern->m_beginChars.size(); ++i) {
-                BeginChar bc = pattern->m_beginChars[i];
-
-                if (!firstSingleCharFound && bc.value <= 0xFFFF) {
-                    firstSingleCharFound = true;
-                    character &= 0xFFFF;
-                }
-
-                if ((character | bc.mask) == bc.value)
-                    return;
-            }
-
-            input.next();
-        }
-    }
-
 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
 #define BACKTRACK() { --context->term; goto backtrack; }
 #define currentTerm() (disjunction->terms[context->term])
-    JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false, bool isBody = false)
+    JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
     {
         if (!--remainingMatchCount)
             return JSRegExpErrorHitLimit;
@@ -1084,9 +1055,6 @@
         if (btrack)
             BACKTRACK();
 
-        if (pattern->m_containsBeginChars && isBody)
-            lookupForBeginChars();
-
         context->matchBegin = input.getPos();
         context->term = 0;
 
@@ -1264,9 +1232,6 @@
 
             input.next();
 
-            if (pattern->m_containsBeginChars && isBody)
-                lookupForBeginChars();
-
             context->matchBegin = input.getPos();
 
             if (currentTerm().alternative.onceThrough)
@@ -1395,7 +1360,7 @@
 
         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
 
-        JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false, true);
+        JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
         if (result == JSRegExpMatch) {
             output[0] = context->matchBegin;
             output[1] = context->matchEnd;

Modified: branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.h (87233 => 87234)


--- branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.h	2011-05-25 00:12:27 UTC (rev 87233)
+++ branches/safari-534-branch/Source/_javascript_Core/yarr/YarrInterpreter.h	2011-05-25 00:16:33 UTC (rev 87234)
@@ -328,7 +328,6 @@
         : m_body(body)
         , m_ignoreCase(pattern.m_ignoreCase)
         , m_multiline(pattern.m_multiline)
-        , m_containsBeginChars(pattern.m_containsBeginChars)
         , m_allocator(allocator)
     {
         newlineCharacterClass = pattern.newlineCharacterClass();
@@ -340,8 +339,6 @@
         // array, so that it won't delete them on destruction.  We'll
         // take responsibility for that.
         pattern.m_userCharacterClasses.clear();
-
-        m_beginChars.append(pattern.m_beginChars);
     }
 
     ~BytecodePattern()
@@ -353,7 +350,6 @@
     OwnPtr<ByteDisjunction> m_body;
     bool m_ignoreCase;
     bool m_multiline;
-    bool m_containsBeginChars;
     // Each BytecodePattern is associated with a RegExp, each RegExp is associated
     // with a JSGlobalData.  Cache a pointer to out JSGlobalData's m_regExpAllocator.
     BumpPointerAllocator* m_allocator;
@@ -361,8 +357,6 @@
     CharacterClass* newlineCharacterClass;
     CharacterClass* wordcharCharacterClass;
 
-    Vector<BeginChar> m_beginChars;
-
 private:
     Vector<ByteDisjunction*> m_allParenthesesInfo;
     Vector<CharacterClass*> m_userCharacterClasses;

Modified: branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.cpp (87233 => 87234)


--- branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.cpp	2011-05-25 00:12:27 UTC (rev 87233)
+++ branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.cpp	2011-05-25 00:16:33 UTC (rev 87234)
@@ -234,117 +234,11 @@
     Vector<CharacterRange> m_rangesUnicode;
 };
 
-struct BeginCharHelper {
-    BeginCharHelper(Vector<BeginChar>* beginChars, bool isCaseInsensitive = false)
-        : m_beginChars(beginChars)
-        , m_isCaseInsensitive(isCaseInsensitive)
-    {}
-
-    void addBeginChar(BeginChar beginChar, Vector<TermChain>* hotTerms, QuantifierType quantityType, unsigned quantityCount)
-    {
-        if (quantityType == QuantifierFixedCount && quantityCount > 1) {
-            // We duplicate the first found character if the quantity of the term is more than one. eg.: /a{3}/
-            beginChar.value |= beginChar.value << 16;
-            beginChar.mask |= beginChar.mask << 16;
-            addCharacter(beginChar);
-        } else if (quantityType == QuantifierFixedCount && quantityCount == 1 && hotTerms->size())
-            // In case of characters with fixed quantifier we should check the next character as well.
-            linkHotTerms(beginChar, hotTerms);
-        else
-            // In case of greedy matching the next character checking is unnecessary therefore we just store
-            // the first character.
-            addCharacter(beginChar);
-    }
-
-    // Merge two following BeginChars in the vector to reduce the number of character checks.
-    void merge(unsigned size)
-    {
-        for (unsigned i = 0; i < size; i++) {
-            BeginChar* curr = &m_beginChars->at(i);
-            BeginChar* next = &m_beginChars->at(i + 1);
-
-            // If the current and the next size of value is different we should skip the merge process
-            // because the 16bit and 32bit values are unmergable.
-            if (curr->value <= 0xFFFF && next->value > 0xFFFF)
-                continue;
-
-            unsigned diff = curr->value ^ next->value;
-
-            curr->mask |= diff;
-            curr->value |= curr->mask;
-
-            m_beginChars->remove(i + 1);
-            size--;
-        }
-    }
-
-private:
-    void addCharacter(BeginChar beginChar)
-    {
-        unsigned pos = 0;
-        unsigned range = m_beginChars->size();
-
-        // binary chop, find position to insert char.
-        while (range) {
-            unsigned index = range >> 1;
-
-            int val = m_beginChars->at(pos+index).value - beginChar.value;
-            if (!val)
-                return;
-            if (val < 0)
-                range = index;
-            else {
-                pos += (index+1);
-                range -= (index+1);
-            }
-        }
-
-        if (pos == m_beginChars->size())
-            m_beginChars->append(beginChar);
-        else
-            m_beginChars->insert(pos, beginChar);
-    }
-
-    // Create BeginChar objects by appending each terms from a hotTerms vector to an existing BeginChar object.
-    void linkHotTerms(BeginChar beginChar, Vector<TermChain>* hotTerms)
-    {
-        for (unsigned i = 0; i < hotTerms->size(); i++) {
-            PatternTerm hotTerm = hotTerms->at(i).term;
-            ASSERT(hotTerm.type == PatternTerm::TypePatternCharacter);
-
-            UChar characterNext = hotTerm.patternCharacter;
-
-            // Append a character to an existing BeginChar object.
-            if (characterNext <= 0x7f) {
-                unsigned mask = 0;
-
-                if (m_isCaseInsensitive && isASCIIAlpha(characterNext)) {
-                    mask = 32;
-                    characterNext = toASCIILower(characterNext);
-                }
-
-                addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask | (mask << 16)));
-            } else {
-                UChar upper, lower;
-                if (m_isCaseInsensitive && ((upper = Unicode::toUpper(characterNext)) != (lower = Unicode::toLower(characterNext)))) {
-                    addCharacter(BeginChar(beginChar.value | (upper << 16), beginChar.mask));
-                    addCharacter(BeginChar(beginChar.value | (lower << 16), beginChar.mask));
-                } else
-                    addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask));
-            }
-        }
-    }
-
-    Vector<BeginChar>* m_beginChars;
-    bool m_isCaseInsensitive;
-};
-
 class YarrPatternConstructor {
 public:
     YarrPatternConstructor(YarrPattern& pattern)
         : m_pattern(pattern)
         , m_characterClassConstructor(pattern.m_ignoreCase)
-        , m_beginCharHelper(&pattern.m_beginChars, pattern.m_ignoreCase)
         , m_invertParentheticalAssertion(false)
     {
         m_pattern.m_body = new PatternDisjunction();
@@ -781,144 +675,10 @@
         }
     }
 
-    // This function collects the terms which are potentially matching the first number of depth characters in the result.
-    // If this function returns false then it found at least one term which makes the beginning character
-    // look-up optimization inefficient.
-    bool setupDisjunctionBeginTerms(PatternDisjunction* disjunction, Vector<TermChain>* beginTerms, unsigned depth)
-    {
-        for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
-            PatternAlternative* alternative = disjunction->m_alternatives[alt];
-
-            if (!setupAlternativeBeginTerms(alternative, beginTerms, 0, depth))
-                return false;
-        }
-
-        return true;
-    }
-
-    bool setupAlternativeBeginTerms(PatternAlternative* alternative, Vector<TermChain>* beginTerms, unsigned termIndex, unsigned depth)
-    {
-        bool checkNext = true;
-        unsigned numTerms = alternative->m_terms.size();
-
-        while (checkNext && termIndex < numTerms) {
-            PatternTerm term = alternative->m_terms[termIndex];
-            checkNext = false;
-
-            switch (term.type) {
-            case PatternTerm::TypeAssertionBOL:
-            case PatternTerm::TypeAssertionEOL:
-            case PatternTerm::TypeAssertionWordBoundary:
-                return false;
-
-            case PatternTerm::TypeBackReference:
-            case PatternTerm::TypeForwardReference:
-                return false;
-
-            case PatternTerm::TypePatternCharacter:
-                if (termIndex != numTerms - 1) {
-                    beginTerms->append(TermChain(term));
-                    termIndex++;
-                    checkNext = true;
-                } else if (term.quantityType == QuantifierFixedCount) {
-                    beginTerms->append(TermChain(term));
-                    if (depth < 2 && termIndex < numTerms - 1 && term.quantityCount == 1)
-                        if (!setupAlternativeBeginTerms(alternative, &beginTerms->last().hotTerms, termIndex + 1, depth + 1))
-                            return false;
-                }
-
-                break;
-
-            case PatternTerm::TypeCharacterClass:
-                return false;
-
-            case PatternTerm::TypeParentheticalAssertion:
-                if (term.invert())
-                    return false;
-
-            case PatternTerm::TypeParenthesesSubpattern:
-                if (term.quantityType != QuantifierFixedCount) {
-                    if (termIndex == numTerms - 1)
-                        break;
-
-                    termIndex++;
-                    checkNext = true;
-                }
-
-                if (!setupDisjunctionBeginTerms(term.parentheses.disjunction, beginTerms, depth))
-                    return false;
-
-                break;
-            }
-        }
-
-        return true;
-    }
-
-    void setupBeginChars()
-    {
-        Vector<TermChain> beginTerms;
-        bool containsFixedCharacter = false;
-
-        if ((!m_pattern.m_body->m_hasFixedSize || m_pattern.m_body->m_alternatives.size() > 1)
-                && setupDisjunctionBeginTerms(m_pattern.m_body, &beginTerms, 0)) {
-            unsigned size = beginTerms.size();
-
-            // If we haven't collected any terms we should abort the preparation of beginning character look-up optimization.
-            if (!size)
-                return;
-
-            m_pattern.m_containsBeginChars = true;
-
-            for (unsigned i = 0; i < size; i++) {
-                PatternTerm term = beginTerms[i].term;
-
-                // We have just collected PatternCharacter terms, other terms are not allowed.
-                ASSERT(term.type == PatternTerm::TypePatternCharacter);
-
-                if (term.quantityType == QuantifierFixedCount)
-                    containsFixedCharacter = true;
-
-                UChar character = term.patternCharacter;
-                unsigned mask = 0;
-
-                if (character <= 0x7f) {
-                    if (m_pattern.m_ignoreCase && isASCIIAlpha(character)) {
-                        mask = 32;
-                        character = toASCIILower(character);
-                    }
-
-                    m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
-                } else {
-                    UChar upper, lower;
-                    if (m_pattern.m_ignoreCase && ((upper = Unicode::toUpper(character)) != (lower = Unicode::toLower(character)))) {
-                        m_beginCharHelper.addBeginChar(BeginChar(upper, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
-                        m_beginCharHelper.addBeginChar(BeginChar(lower, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
-                    } else
-                        m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);
-                }
-            }
-
-            // If the pattern doesn't contain terms with fixed quantifiers then the beginning character look-up optimization is inefficient.
-            if (!containsFixedCharacter) {
-                m_pattern.m_containsBeginChars = false;
-                return;
-            }
-
-            size = m_pattern.m_beginChars.size();
-
-            if (size > 2)
-                m_beginCharHelper.merge(size - 1);
-            else if (size <= 1)
-                m_pattern.m_containsBeginChars = false;
-        }
-    }
-
 private:
     YarrPattern& m_pattern;
     PatternAlternative* m_alternative;
     CharacterClassConstructor m_characterClassConstructor;
-    BeginCharHelper m_beginCharHelper;
     bool m_invertCharacterClass;
     bool m_invertParentheticalAssertion;
 };
@@ -951,7 +711,6 @@
     constructor.optimizeBOL();
         
     constructor.setupOffsets();
-    constructor.setupBeginChars();
 
     return 0;
 }
@@ -960,7 +719,6 @@
     : m_ignoreCase(ignoreCase)
     , m_multiline(multiline)
     , m_containsBackreferences(false)
-    , m_containsBeginChars(false)
     , m_containsBOL(false)
     , m_numSubpatterns(0)
     , m_maxBackReference(0)

Modified: branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.h (87233 => 87234)


--- branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.h	2011-05-25 00:12:27 UTC (rev 87233)
+++ branches/safari-534-branch/Source/_javascript_Core/yarr/YarrPattern.h	2011-05-25 00:16:33 UTC (rev 87234)
@@ -297,21 +297,6 @@
     Vector<TermChain> hotTerms;
 };
 
-struct BeginChar {
-    BeginChar()
-        : value(0)
-        , mask(0)
-    {}
-
-    BeginChar(unsigned value, unsigned mask)
-        : value(value)
-        , mask(mask)
-    {}
-
-    unsigned value;
-    unsigned mask;
-};
-
 struct YarrPattern {
     YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error);
 
@@ -327,7 +312,6 @@
         m_maxBackReference = 0;
 
         m_containsBackreferences = false;
-        m_containsBeginChars = false;
         m_containsBOL = false;
 
         newlineCached = 0;
@@ -342,7 +326,6 @@
         m_disjunctions.clear();
         deleteAllValues(m_userCharacterClasses);
         m_userCharacterClasses.clear();
-        m_beginChars.clear();
     }
 
     bool containsIllegalBackReference()
@@ -396,14 +379,12 @@
     bool m_ignoreCase : 1;
     bool m_multiline : 1;
     bool m_containsBackreferences : 1;
-    bool m_containsBeginChars : 1;
     bool m_containsBOL : 1;
     unsigned m_numSubpatterns;
     unsigned m_maxBackReference;
     PatternDisjunction* m_body;
     Vector<PatternDisjunction*, 4> m_disjunctions;
     Vector<CharacterClass*> m_userCharacterClasses;
-    Vector<BeginChar> m_beginChars;
 
 private:
     const char* compile(const UString& patternString);

_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes