Skip to site navigation (Press enter)

[webkit-changes] [102475] trunk/Source/JavaScriptCore

msaboff Fri, 09 Dec 2011 14:42:39 -0800

Title: [102475] trunk/Source/_javascript_Core

Revision: 102475
Author: msab...@apple.com
Date: 2011-12-09 14:42:31 -0800 (Fri, 09 Dec 2011)

Log Message

YARR: Multi-character read optimization for 8bit strings
https://bugs.webkit.org/show_bug.cgi?id=74191


Reviewed by Oliver Hunt.

Changed generatePatternCharacterOnce to generate
code for 1 to 4 characters in the 8 bit case.
This is worth 29% improvement on SunSpider regexp-dna test.
It provides no benefit to v8-regexp.

* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generate): Spelling fix in comment.

Modified Paths

trunk/Source/_javascript_Core/ChangeLog
trunk/Source/_javascript_Core/yarr/YarrJIT.cpp

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (102474 => 102475)


--- trunk/Source/_javascript_Core/ChangeLog	2011-12-09 22:07:08 UTC (rev 102474)
+++ trunk/Source/_javascript_Core/ChangeLog	2011-12-09 22:42:31 UTC (rev 102475)
@@ -1,3 +1,19 @@
+2011-12-09  Michael Saboff  <msab...@apple.com>
+
+        YARR: Multi-character read optimization for 8bit strings
+        https://bugs.webkit.org/show_bug.cgi?id=74191
+
+        Reviewed by Oliver Hunt.
+
+        Changed generatePatternCharacterOnce to generate
+        code for 1 to 4 characters in the 8 bit case.
+        This is worth 29% improvement on SunSpider regexp-dna test.
+        It provides no benefit to v8-regexp.
+
+        * yarr/YarrJIT.cpp:
+        (JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
+        (JSC::Yarr::YarrGenerator::generate): Spelling fix in comment.
+
 2011-12-09  David Levin  <le...@chromium.org>
 
         Regression(r53595): Sync xhr requests in workers aren't terminated on worker close.

Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.cpp (102474 => 102475)


--- trunk/Source/_javascript_Core/yarr/YarrJIT.cpp	2011-12-09 22:07:08 UTC (rev 102474)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.cpp	2011-12-09 22:42:31 UTC (rev 102475)
@@ -655,14 +655,14 @@
     {
         YarrOp& op = m_ops[opIndex];
 
+        if (op.m_isDeadCode)
+            return;
+        
         // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
         // node, so there must always be at least one more node.
         ASSERT(opIndex + 1 < m_ops.size());
-        YarrOp& nextOp = m_ops[opIndex + 1];
+        YarrOp* nextOp = &m_ops[opIndex + 1];
 
-        if (op.m_isDeadCode)
-            return;
-
         PatternTerm* term = op.m_term;
         UChar ch = term->patternCharacter;
 
@@ -673,47 +673,90 @@
         }
 
         const RegisterID character = regT0;
+        int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
+        unsigned ignoreCaseMask = 0;
+        unsigned currentCharacterMask = m_charSize == Char8 ? 0xff : 0xffff;
+        int allCharacters = ch;
+        int numberCharacters;
+        int startTermPosition = term->inputPosition;
 
-        if (nextOp.m_op == OpTerm) {
-            PatternTerm* nextTerm = nextOp.m_term;
-            if (nextTerm->type == PatternTerm::TypePatternCharacter
-                && nextTerm->quantityType == QuantifierFixedCount
-                && nextTerm->quantityCount == 1
-                && nextTerm->inputPosition == (term->inputPosition + 1)) {
+        // For case-insesitive compares, non-ascii characters that have different
+        // upper & lower case representations are converted to a character class.
+        ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
 
-                UChar ch2 = nextTerm->patternCharacter;
+        if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(ch)))
+            ignoreCaseMask |= 32;
 
-                int shiftAmount = m_charSize == Char8 ? 8 : 16;
-                int mask = 0;
-                int chPair = ch | (ch2 << shiftAmount);
+        for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
+            PatternTerm* nextTerm = nextOp->m_term;
+            
+            if (nextTerm->type != PatternTerm::TypePatternCharacter
+                || nextTerm->quantityType != QuantifierFixedCount
+                || nextTerm->quantityCount != 1
+                || nextTerm->inputPosition != (startTermPosition + numberCharacters))
+                break;
 
-                // For case-insesitive compares, non-ascii characters that have different
-                // upper & lower case representations are converted to a character class.
-                ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
-                if (m_pattern.m_ignoreCase) {
-                    if (isASCIIAlpha(ch))
-                        mask |= 32;
-                    if (isASCIIAlpha(ch2))
-                        mask |= 32 << shiftAmount;
-                }
+            nextOp->m_isDeadCode = true;
 
-                if (m_charSize == Char8) {
-                    BaseIndex address(input, index, TimesOne, (term->inputPosition - m_checked) * sizeof(char));
-                    load16Unaligned(address, character);
-                } else {
-                    BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
-                    load32WithUnalignedHalfWords(address, character);
-                }
-                if (mask)
-                    or32(Imm32(mask), character);
-                op.m_jumps.append(branch32(NotEqual, character, Imm32(chPair | mask)));
+            int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
+            currentCharacterMask = (m_charSize == Char8 ? 0xff : 0xffff) << shiftAmount;  
 
-                nextOp.m_isDeadCode = true;
+            UChar currentCharacter = nextTerm->patternCharacter;
+
+            if ((currentCharacter > 0xff) && (m_charSize == Char8)) {
+                // Have a 16 bit pattern character and an 8 bit string - short circuit
+                op.m_jumps.append(jump());
                 return;
             }
+
+            // For case-insesitive compares, non-ascii characters that have different
+            // upper & lower case representations are converted to a character class.
+            ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || (Unicode::toLower(currentCharacter) == Unicode::toUpper(currentCharacter)));
+
+            allCharacters |= (currentCharacter << shiftAmount);
+
+            if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter)))
+                ignoreCaseMask |= 32 << shiftAmount;                    
         }
 
-        op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
+        if (m_charSize == Char8) {
+            switch (numberCharacters) {
+            case 1:
+                op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character));
+                return;
+            case 2: {
+                BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
+                load16(address, character);
+                break;
+            }
+            case 3: {
+                BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
+                load32WithUnalignedHalfWords(address, character);
+                and32(Imm32(0xffffff), character);
+                break;
+            }
+            case 4: {
+                BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
+                load32WithUnalignedHalfWords(address, character);
+                break;
+            }
+            }
+        } else {
+            switch (numberCharacters) {
+            case 1:
+                op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
+                return;
+            case 2:
+                BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
+                load32WithUnalignedHalfWords(address, character);
+                break;
+            }
+        }
+
+        if (ignoreCaseMask)
+            or32(Imm32(ignoreCaseMask), character);
+        op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask)));
+        return;
     }
     void backtrackPatternCharacterOnce(size_t opIndex)
     {
@@ -1305,7 +1348,7 @@
                         sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
                 } else if (op.m_nextOp == notFound) {
                     // This is the reentry point for the End of 'once through' alternatives,
-                    // jumped to when the las alternative fails to match.
+                    // jumped to when the last alternative fails to match.
                     op.m_reentry = label();
                     sub32(Imm32(priorAlternative->m_minimumSize), index);
                 }

_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes