Title: [102017] trunk/Source/_javascript_Core
Revision
102017
Author
msab...@apple.com
Date
2011-12-05 11:08:44 -0800 (Mon, 05 Dec 2011)

Log Message

8 bit string work slows down Kraken json-stringify-tinderbox
https://bugs.webkit.org/show_bug.cgi?id=73457

Added 8 bit path to StringBuilder.  StringBuilder starts
assuming 8 bit contents and gets converted to 16 bit upon
seeing the first 16 bit character or string.  Split
appendUninitialiezed into an inlined fast and function call
slow case.

Factored out the processing of the UString argument from
Stringifier::appendQuotedString() to a static templated function
based on character size.

This change eliminates 5% of the 7% slowdown to json-stringify-tinderbox.
This change introduces a 4.8% slowdown to json-parse-financial.
This slowdown will be addressed in a subsequent patch to StringImpl::equal.

Reviewed by Oliver Hunt.

* runtime/JSONObject.cpp:
(JSC::appendStringToUStringBuilder):
(JSC::Stringifier::appendQuotedString):
* wtf/text/StringBuilder.cpp:
(WTF::StringBuilder::resize):
(WTF::StringBuilder::allocateBuffer):
(WTF::StringBuilder::allocateBufferUpConvert):
(WTF::LChar):
(WTF::UChar):
(WTF::StringBuilder::reserveCapacity):
(WTF::StringBuilder::appendUninitialized):
(WTF::StringBuilder::appendUninitializedSlow):
(WTF::StringBuilder::append):
(WTF::StringBuilder::shrinkToFit):
* wtf/text/StringBuilder.h:
(WTF::StringBuilder::StringBuilder):
(WTF::StringBuilder::append):
(WTF::StringBuilder::operator[]):
(WTF::StringBuilder::characters8):
(WTF::StringBuilder::characters16):
(WTF::StringBuilder::charactersBlah):
(WTF::LChar):
(WTF::UChar):

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (102016 => 102017)


--- trunk/Source/_javascript_Core/ChangeLog	2011-12-05 18:46:55 UTC (rev 102016)
+++ trunk/Source/_javascript_Core/ChangeLog	2011-12-05 19:08:44 UTC (rev 102017)
@@ -1,3 +1,48 @@
+2011-12-05  Michael Saboff  <msab...@apple.com>
+
+        8 bit string work slows down Kraken json-stringify-tinderbox
+        https://bugs.webkit.org/show_bug.cgi?id=73457
+
+        Added 8 bit path to StringBuilder.  StringBuilder starts
+        assuming 8 bit contents and gets converted to 16 bit upon
+        seeing the first 16 bit character or string.  Split
+        appendUninitialiezed into an inlined fast and function call
+        slow case.
+
+        Factored out the processing of the UString argument from
+        Stringifier::appendQuotedString() to a static templated function
+        based on character size.
+
+        This change eliminates 5% of the 7% slowdown to json-stringify-tinderbox.
+        This change introduces a 4.8% slowdown to json-parse-financial.
+        This slowdown will be addressed in a subsequent patch to StringImpl::equal.
+
+        Reviewed by Oliver Hunt.
+
+        * runtime/JSONObject.cpp:
+        (JSC::appendStringToUStringBuilder):
+        (JSC::Stringifier::appendQuotedString):
+        * wtf/text/StringBuilder.cpp:
+        (WTF::StringBuilder::resize):
+        (WTF::StringBuilder::allocateBuffer):
+        (WTF::StringBuilder::allocateBufferUpConvert):
+        (WTF::LChar):
+        (WTF::UChar):
+        (WTF::StringBuilder::reserveCapacity):
+        (WTF::StringBuilder::appendUninitialized):
+        (WTF::StringBuilder::appendUninitializedSlow):
+        (WTF::StringBuilder::append):
+        (WTF::StringBuilder::shrinkToFit):
+        * wtf/text/StringBuilder.h:
+        (WTF::StringBuilder::StringBuilder):
+        (WTF::StringBuilder::append):
+        (WTF::StringBuilder::operator[]):
+        (WTF::StringBuilder::characters8):
+        (WTF::StringBuilder::characters16):
+        (WTF::StringBuilder::charactersBlah):
+        (WTF::LChar):
+        (WTF::UChar):
+
 2011-12-01  Gavin Barraclough  <barraclo...@apple.com>
 
         https://bugs.webkit.org/show_bug.cgi?id=73624

Modified: trunk/Source/_javascript_Core/runtime/JSONObject.cpp (102016 => 102017)


--- trunk/Source/_javascript_Core/runtime/JSONObject.cpp	2011-12-05 18:46:55 UTC (rev 102016)
+++ trunk/Source/_javascript_Core/runtime/JSONObject.cpp	2011-12-05 19:08:44 UTC (rev 102017)
@@ -266,13 +266,9 @@
     return Local<Unknown>(m_exec->globalData(), jsString(m_exec, result.toUString()));
 }
 
-void Stringifier::appendQuotedString(UStringBuilder& builder, const UString& value)
+template <typename CharType>
+static void appendStringToUStringBuilder(UStringBuilder& builder, const CharType* data, int length)
 {
-    int length = value.length();
-
-    builder.append('"');
-
-    const UChar* data = ""
     for (int i = 0; i < length; ++i) {
         int start = i;
         while (i < length && (data[i] > 0x1F && data[i] != '"' && data[i] != '\\'))
@@ -281,44 +277,56 @@
         if (i >= length)
             break;
         switch (data[i]) {
-            case '\t':
-                builder.append('\\');
-                builder.append('t');
-                break;
-            case '\r':
-                builder.append('\\');
-                builder.append('r');
-                break;
-            case '\n':
-                builder.append('\\');
-                builder.append('n');
-                break;
-            case '\f':
-                builder.append('\\');
-                builder.append('f');
-                break;
-            case '\b':
-                builder.append('\\');
-                builder.append('b');
-                break;
-            case '"':
-                builder.append('\\');
-                builder.append('"');
-                break;
-            case '\\':
-                builder.append('\\');
-                builder.append('\\');
-                break;
-            default:
-                static const char hexDigits[] = "0123456789abcdef";
-                UChar ch = data[i];
-                UChar hex[] = { '\\', 'u', hexDigits[(ch >> 12) & 0xF], hexDigits[(ch >> 8) & 0xF], hexDigits[(ch >> 4) & 0xF], hexDigits[ch & 0xF] };
-                builder.append(hex, WTF_ARRAY_LENGTH(hex));
-                break;
+        case '\t':
+            builder.append('\\');
+            builder.append('t');
+            break;
+        case '\r':
+            builder.append('\\');
+            builder.append('r');
+            break;
+        case '\n':
+            builder.append('\\');
+            builder.append('n');
+            break;
+        case '\f':
+            builder.append('\\');
+            builder.append('f');
+            break;
+        case '\b':
+            builder.append('\\');
+            builder.append('b');
+            break;
+        case '"':
+            builder.append('\\');
+            builder.append('"');
+            break;
+        case '\\':
+            builder.append('\\');
+            builder.append('\\');
+            break;
+        default:
+            static const char hexDigits[] = "0123456789abcdef";
+            UChar ch = data[i];
+            LChar hex[] = { '\\', 'u', hexDigits[(ch >> 12) & 0xF], hexDigits[(ch >> 8) & 0xF], hexDigits[(ch >> 4) & 0xF], hexDigits[ch & 0xF] };
+            builder.append(hex, WTF_ARRAY_LENGTH(hex));
+            break;
         }
     }
+}
+    
+void Stringifier::appendQuotedString(UStringBuilder& builder, const UString& value)
+{
+    int length = value.length();
 
     builder.append('"');
+
+    if (value.is8Bit())
+        appendStringToUStringBuilder<LChar>(builder, value.characters8(), length);
+    else
+        appendStringToUStringBuilder<UChar>(builder, value.characters16(), length);
+
+    builder.append('"');
 }
 
 inline JSValue Stringifier::toJSON(JSValue value, const PropertyNameForFunctionCall& propertyName)

Modified: trunk/Source/_javascript_Core/wtf/text/StringBuilder.cpp (102016 => 102017)


--- trunk/Source/_javascript_Core/wtf/text/StringBuilder.cpp	2011-12-05 18:46:55 UTC (rev 102016)
+++ trunk/Source/_javascript_Core/wtf/text/StringBuilder.cpp	2011-12-05 19:08:44 UTC (rev 102017)
@@ -63,8 +63,12 @@
 
     // If there is a buffer, we only need to duplicate it if it has more than one ref.
     if (m_buffer) {
-        if (!m_buffer->hasOneRef())
-            allocateBuffer(m_buffer->characters(), m_buffer->length());
+        if (!m_buffer->hasOneRef()) {
+            if (m_buffer->is8Bit())
+                allocateBuffer(m_buffer->characters8(), m_buffer->length());
+            else
+                allocateBuffer(m_buffer->characters16(), m_buffer->length());
+        }
         m_length = newSize;
         m_string = String();
         return;
@@ -78,46 +82,110 @@
     m_string = StringImpl::create(m_string.impl(), 0, newSize);
 }
 
-void StringBuilder::reserveCapacity(unsigned newCapacity)
+// Allocate a new 8 bit buffer, copying in currentCharacters (these may come from either m_string
+// or m_buffer, neither will be reassigned until the copy has completed).
+void StringBuilder::allocateBuffer(const LChar* currentCharacters, unsigned requiredLength)
 {
-    if (m_buffer) {
-        // If there is already a buffer, then grow if necessary.
-        if (newCapacity > m_buffer->length())
-            reallocateBuffer(newCapacity);
-    } else {
-        // Grow the string, if necessary.
-        if (newCapacity > m_length)
-            allocateBuffer(m_string.characters(), newCapacity);
-    }
+    ASSERT(m_is8Bit);
+    // Copy the existing data into a new buffer, set result to point to the end of the existing data.
+    RefPtr<StringImpl> buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters8);
+    memcpy(m_bufferCharacters8, currentCharacters, static_cast<size_t>(m_length) * sizeof(LChar)); // This can't overflow.
+    
+    // Update the builder state.
+    m_buffer = buffer.release();
+    m_string = String();
 }
 
-// Allocate a new buffer, copying in currentCharacters (these may come from either m_string
+// Allocate a new 16 bit buffer, copying in currentCharacters (these may come from either m_string
 // or m_buffer,  neither will be reassigned until the copy has completed).
 void StringBuilder::allocateBuffer(const UChar* currentCharacters, unsigned requiredLength)
 {
+    ASSERT(!m_is8Bit);
     // Copy the existing data into a new buffer, set result to point to the end of the existing data.
-    RefPtr<StringImpl> buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters);
-    memcpy(m_bufferCharacters, currentCharacters, static_cast<size_t>(m_length) * sizeof(UChar)); // This can't overflow.
+    RefPtr<StringImpl> buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16);
+    memcpy(m_bufferCharacters16, currentCharacters, static_cast<size_t>(m_length) * sizeof(UChar)); // This can't overflow.
+    
+    // Update the builder state.
+    m_buffer = buffer.release();
+    m_string = String();
+}
 
+// Allocate a new 16 bit buffer, copying in currentCharacters (which is 8 bit and may come
+// from either m_string or m_buffer, neither will be reassigned until the copy has completed).
+void StringBuilder::allocateBufferUpConvert(const LChar* currentCharacters, unsigned requiredLength)
+{
+    ASSERT(m_is8Bit);
+    // Copy the existing data into a new buffer, set result to point to the end of the existing data.
+    RefPtr<StringImpl> buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16);
+    for (unsigned i = 0; i < m_length; i++)
+        m_bufferCharacters16[i] = currentCharacters[i];
+    
+    m_is8Bit = false;
+    
     // Update the builder state.
     m_buffer = buffer.release();
     m_string = String();
 }
 
-void StringBuilder::reallocateBuffer(unsigned requiredLength)
+template <>
+void StringBuilder::reallocateBuffer<LChar>(unsigned requiredLength)
 {
     // If the buffer has only one ref (by this StringBuilder), reallocate it,
     // otherwise fall back to "allocate and copy" method.
     m_string = String();
+    
+    ASSERT(m_is8Bit);
+    ASSERT(m_buffer->is8Bit());
+    
     if (m_buffer->hasOneRef())
-        m_buffer = StringImpl::reallocate(m_buffer.release(), requiredLength, m_bufferCharacters);
+        m_buffer = StringImpl::reallocate(m_buffer.release(), requiredLength, m_bufferCharacters8);
     else
-        allocateBuffer(m_buffer->characters(), requiredLength);
+        allocateBuffer(m_buffer->characters8(), requiredLength);
 }
 
+template <>
+void StringBuilder::reallocateBuffer<UChar>(unsigned requiredLength)
+{
+    // If the buffer has only one ref (by this StringBuilder), reallocate it,
+    // otherwise fall back to "allocate and copy" method.
+    m_string = String();
+    
+    if (m_buffer->is8Bit())
+        allocateBufferUpConvert(m_buffer->characters8(), requiredLength);
+    else if (m_buffer->hasOneRef())
+        m_buffer = StringImpl::reallocate(m_buffer.release(), requiredLength, m_bufferCharacters16);
+    else
+        allocateBuffer(m_buffer->characters16(), requiredLength);
+}
+
+void StringBuilder::reserveCapacity(unsigned newCapacity)
+{
+    if (m_buffer) {
+        // If there is already a buffer, then grow if necessary.
+        if (newCapacity > m_buffer->length()) {
+            if (m_buffer->is8Bit())
+                reallocateBuffer<LChar>(newCapacity);
+            else
+                reallocateBuffer<UChar>(newCapacity);
+        }
+    } else {
+        // Grow the string, if necessary.
+        if (newCapacity > m_length) {
+            if (!m_length) {
+                LChar* nullPlaceholder = 0;
+                allocateBuffer(nullPlaceholder, newCapacity);
+            } else if (m_string.is8Bit())
+                allocateBuffer(m_string.characters8(), newCapacity);
+            else
+                allocateBuffer(m_string.characters16(), newCapacity);
+        }
+    }
+}
+
 // Make 'length' additional capacity be available in m_buffer, update m_string & m_length,
 // return a pointer to the newly allocated storage.
-UChar* StringBuilder::appendUninitialized(unsigned length)
+template <typename CharType>
+ALWAYS_INLINE CharType* StringBuilder::appendUninitialized(unsigned length)
 {
     ASSERT(length);
 
@@ -126,25 +194,36 @@
     if (requiredLength < length)
         CRASH();
 
-    if (m_buffer) {
+    if ((m_buffer) && (requiredLength <= m_buffer->length())) {
         // If the buffer is valid it must be at least as long as the current builder contents!
         ASSERT(m_buffer->length() >= m_length);
+        unsigned currentLength = m_length;
+        m_string = String();
+        m_length = requiredLength;
+        return getBufferCharacters<CharType>() + currentLength;
+    }
+    
+    return appendUninitializedSlow<CharType>(requiredLength);
+}
 
-        // Check if the buffer already has sufficient capacity.
-        if (requiredLength <= m_buffer->length()) {
-            unsigned currentLength = m_length;
-            m_string = String();
-            m_length = requiredLength;
-            return m_bufferCharacters + currentLength;
-        }
+// Make 'length' additional capacity be available in m_buffer, update m_string & m_length,
+// return a pointer to the newly allocated storage.
+template <typename CharType>
+CharType* StringBuilder::appendUninitializedSlow(unsigned requiredLength)
+{
+    ASSERT(requiredLength);
 
-        reallocateBuffer(std::max(requiredLength, std::max(minimumCapacity, m_buffer->length() * 2)));
+    if (m_buffer) {
+        // If the buffer is valid it must be at least as long as the current builder contents!
+        ASSERT(m_buffer->length() >= m_length);
+        
+        reallocateBuffer<CharType>(std::max(requiredLength, std::max(minimumCapacity, m_buffer->length() * 2)));
     } else {
         ASSERT(m_string.length() == m_length);
-        allocateBuffer(m_string.characters(), std::max(requiredLength, std::max(minimumCapacity, m_length * 2)));
+        allocateBuffer(m_length ? m_string.getCharacters<CharType>() : 0, std::max(requiredLength, std::max(minimumCapacity, m_length * 2)));
     }
-
-    UChar* result = m_bufferCharacters + m_length;
+    
+    CharType* result = getBufferCharacters<CharType>() + m_length;
     m_length = requiredLength;
     return result;
 }
@@ -153,9 +232,29 @@
 {
     if (!length)
         return;
+
     ASSERT(characters);
 
-    memcpy(appendUninitialized(length), characters, static_cast<size_t>(length) * 2);
+    if (m_is8Bit) {
+        // Calculate the new size of the builder after appending.
+        unsigned requiredLength = length + m_length;
+        if (requiredLength < length)
+            CRASH();
+        
+        if (m_buffer) {
+            // If the buffer is valid it must be at least as long as the current builder contents!
+            ASSERT(m_buffer->length() >= m_length);
+            
+            allocateBufferUpConvert(m_buffer->characters8(), requiredLength);
+        } else {
+            ASSERT(m_string.length() == m_length);
+            allocateBufferUpConvert(m_string.isNull() ? 0 : m_string.characters8(), std::max(requiredLength, std::max(minimumCapacity, m_length * 2)));
+        }
+
+        memcpy(m_bufferCharacters16 + m_length, characters, static_cast<size_t>(length) * sizeof(UChar));        
+        m_length = requiredLength;
+    } else
+        memcpy(appendUninitialized<UChar>(length), characters, static_cast<size_t>(length) * sizeof(UChar));
 }
 
 void StringBuilder::append(const LChar* characters, unsigned length)
@@ -164,17 +263,31 @@
         return;
     ASSERT(characters);
 
-    UChar* dest = appendUninitialized(length);
-    const LChar* end = characters + length;
-    while (characters < end)
-        *(dest++) = *(characters++);
+    if (m_is8Bit) {
+        LChar* dest = appendUninitialized<LChar>(length);
+        if (length > 8)
+            memcpy(dest, characters, static_cast<size_t>(length) * sizeof(LChar));
+        else {
+            const LChar* end = characters + length;
+            while (characters < end)
+                *(dest++) = *(characters++);
+        }
+    } else {
+        UChar* dest = appendUninitialized<UChar>(length);
+        const LChar* end = characters + length;
+        while (characters < end)
+            *(dest++) = *(characters++);
+    }
 }
 
 void StringBuilder::shrinkToFit()
 {
     // If the buffer is at least 80% full, don't bother copying. Need to tune this heuristic!
     if (m_buffer && m_buffer->length() > (m_length + (m_length >> 2))) {
-        reallocateBuffer(m_length);
+        if (m_is8Bit)
+            reallocateBuffer<LChar>(m_length);
+        else
+            reallocateBuffer<UChar>(m_length);
         m_string = m_buffer;
         m_buffer = 0;
     }

Modified: trunk/Source/_javascript_Core/wtf/text/StringBuilder.h (102016 => 102017)


--- trunk/Source/_javascript_Core/wtf/text/StringBuilder.h	2011-12-05 18:46:55 UTC (rev 102016)
+++ trunk/Source/_javascript_Core/wtf/text/StringBuilder.h	2011-12-05 19:08:44 UTC (rev 102017)
@@ -35,6 +35,8 @@
 public:
     StringBuilder()
         : m_length(0)
+        , m_is8Bit(true)
+        , m_bufferCharacters8(0)
     {
     }
 
@@ -45,15 +47,23 @@
 
     void append(const String& string)
     {
+        if (!string.length())
+            return;
+
         // If we're appending to an empty string, and there is not buffer
         // (in case reserveCapacity has been called) then just retain the
         // string.
         if (!m_length && !m_buffer) {
             m_string = string;
             m_length = string.length();
+            m_is8Bit = m_string.is8Bit();
             return;
         }
-        append(string.characters(), string.length());
+
+        if (string.is8Bit())
+            append(string.characters8(), string.length());
+        else
+            append(string.characters16(), string.length());
     }
 
     void append(const char* characters)
@@ -64,16 +74,20 @@
 
     void append(UChar c)
     {
-        if (m_buffer && m_length < m_buffer->length() && m_string.isNull())
-            m_bufferCharacters[m_length++] = c;
+        if (m_buffer && !m_is8Bit && m_length < m_buffer->length() && m_string.isNull())
+            m_bufferCharacters16[m_length++] = c;
         else
             append(&c, 1);
     }
 
     void append(char c)
     {
-        if (m_buffer && m_length < m_buffer->length() && m_string.isNull())
-            m_bufferCharacters[m_length++] = (unsigned char)c;
+        if (m_buffer && m_length < m_buffer->length() && m_string.isNull()) {
+            if (m_is8Bit)
+                m_bufferCharacters8[m_length++] = (LChar)c;
+            else
+                m_bufferCharacters16[m_length++] = (LChar)c;
+        }
         else
             append(&c, 1);
     }
@@ -110,9 +124,33 @@
     UChar operator[](unsigned i) const
     {
         ASSERT(i < m_length);
-        return characters()[i];
+        if (m_is8Bit)
+            return characters8()[i];
+        return characters16()[i];
     }
 
+    const LChar* characters8() const
+    {
+        ASSERT(m_is8Bit);
+        if (!m_length)
+            return 0;
+        if (!m_string.isNull())
+            return m_string.characters8();
+        ASSERT(m_buffer);
+        return m_buffer->characters8();
+    }
+
+    const UChar* characters16() const
+    {
+        ASSERT(!m_is8Bit);
+        if (!m_length)
+            return 0;
+        if (!m_string.isNull())
+            return m_string.characters16();
+        ASSERT(m_buffer);
+        return m_buffer->characters16();
+    }
+    
     const UChar* characters() const
     {
         if (!m_length)
@@ -122,7 +160,7 @@
         ASSERT(m_buffer);
         return m_buffer->characters();
     }
-
+    
     void clear()
     {
         m_length = 0;
@@ -131,17 +169,43 @@
     }
 
 private:
+    void allocateBuffer(const LChar* currentCharacters, unsigned requiredLength);
     void allocateBuffer(const UChar* currentCharacters, unsigned requiredLength);
+    void allocateBufferUpConvert(const LChar* currentCharacters, unsigned requiredLength);
+    template <typename CharType>
     void reallocateBuffer(unsigned requiredLength);
-    UChar* appendUninitialized(unsigned length);
+    template <typename CharType>
+    ALWAYS_INLINE CharType* appendUninitialized(unsigned length);
+    template <typename CharType>
+    CharType* appendUninitializedSlow(unsigned length);
+    template <typename CharType>
+    ALWAYS_INLINE CharType * getBufferCharacters();
     void reifyString();
 
     unsigned m_length;
     String m_string;
     RefPtr<StringImpl> m_buffer;
-    UChar* m_bufferCharacters;
+    bool m_is8Bit;
+    union {
+        LChar* m_bufferCharacters8;
+        UChar* m_bufferCharacters16;
+    };
 };
 
+template <>
+ALWAYS_INLINE LChar* StringBuilder::getBufferCharacters<LChar>()
+{
+    ASSERT(m_is8Bit);
+    return m_bufferCharacters8;
+}
+
+template <>
+ALWAYS_INLINE UChar* StringBuilder::getBufferCharacters<UChar>()
+{
+    ASSERT(!m_is8Bit);
+    return m_bufferCharacters16;
+}    
+
 } // namespace WTF
 
 using WTF::StringBuilder;
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes

Reply via email to