Revision: 4300
Author: [email protected]
Date: Fri Mar 26 16:33:37 2010
Log: Some string optimizations:

 * Faster hashing for sequential strings.

 * When adding short external two-byte strings try to convert them
   back to ascii. Chances are high the embedder uses two-byte
   representation even for ascii strings. This optimization saves
   memory and makes hashing faster.

Review URL: http://codereview.chromium.org/1444001
http://code.google.com/p/v8/source/detail?r=4300

Modified:
 /branches/bleeding_edge/src/heap.cc
 /branches/bleeding_edge/src/objects-inl.h
 /branches/bleeding_edge/src/objects.cc
 /branches/bleeding_edge/src/objects.h
 /branches/bleeding_edge/src/v8-counters.h

=======================================
--- /branches/bleeding_edge/src/heap.cc Thu Mar 25 08:32:58 2010
+++ /branches/bleeding_edge/src/heap.cc Fri Mar 26 16:33:37 2010
@@ -1961,8 +1961,9 @@
     return MakeOrFindTwoCharacterString(c1, c2);
   }

-  bool is_ascii = first->IsAsciiRepresentation()
-      && second->IsAsciiRepresentation();
+  bool first_is_ascii = first->IsAsciiRepresentation();
+  bool second_is_ascii = second->IsAsciiRepresentation();
+  bool is_ascii = first_is_ascii && second_is_ascii;

   // Make sure that an out of memory exception is thrown if the length
   // of the new cons string is too large.
@@ -1997,6 +1998,25 @@
       for (int i = 0; i < second_length; i++) *dest++ = src[i];
       return result;
     } else {
+      // For short external two-byte strings we check whether they can
+      // be represented using ascii.
+      if (!first_is_ascii) {
+        first_is_ascii = first->IsExternalTwoByteStringWithAsciiChars();
+      }
+      if (first_is_ascii && !second_is_ascii) {
+        second_is_ascii = second->IsExternalTwoByteStringWithAsciiChars();
+      }
+      if (first_is_ascii && second_is_ascii) {
+        Object* result = AllocateRawAsciiString(length);
+        if (result->IsFailure()) return result;
+        // Copy the characters into the new object.
+        char* dest = SeqAsciiString::cast(result)->GetChars();
+        String::WriteToFlat(first, dest, 0, first_length);
+        String::WriteToFlat(second, dest + first_length, 0, second_length);
+        Counters::string_add_runtime_ext_to_ascii.Increment();
+        return result;
+      }
+
       Object* result = AllocateRawTwoByteString(length);
       if (result->IsFailure()) return result;
       // Copy the characters into the new object.
=======================================
--- /branches/bleeding_edge/src/objects-inl.h   Tue Mar 23 04:40:38 2010
+++ /branches/bleeding_edge/src/objects-inl.h   Fri Mar 26 16:33:37 2010
@@ -253,6 +253,16 @@
   }
   return (type & kStringEncodingMask) == kTwoByteStringTag;
 }
+
+
+bool String::IsExternalTwoByteStringWithAsciiChars() {
+  if (!IsExternalTwoByteString()) return false;
+  const uc16* data = ExternalTwoByteString::cast(this)->resource()->data();
+  for (int i = 0, len = length(); i < len; i++) {
+    if (data[i] > kMaxAsciiCharCode) return false;
+  }
+  return true;
+}


 bool StringShape::IsCons() {
=======================================
--- /branches/bleeding_edge/src/objects.cc      Tue Mar 23 04:40:38 2010
+++ /branches/bleeding_edge/src/objects.cc      Fri Mar 26 16:33:37 2010
@@ -4658,15 +4658,40 @@
   }
   return i == slen && !decoder->has_more();
 }
+
+
+template <typename schar>
+static inline uint32_t HashSequentialString(const schar* chars, int length) {
+  StringHasher hasher(length);
+  if (!hasher.has_trivial_hash()) {
+    int i;
+    for (i = 0; hasher.is_array_index() && (i < length); i++) {
+      hasher.AddCharacter(chars[i]);
+    }
+    for (; i < length; i++) {
+      hasher.AddCharacterNoIndex(chars[i]);
+    }
+  }
+  return hasher.GetHashField();
+}


 uint32_t String::ComputeAndSetHash() {
   // Should only be called if hash code has not yet been computed.
   ASSERT(!(hash_field() & kHashComputedMask));

+  const int len = length();
+
   // Compute the hash code.
-  StringInputBuffer buffer(this);
-  uint32_t field = ComputeHashField(&buffer, length());
+  uint32_t field = 0;
+  if (StringShape(this).IsSequentialAscii()) {
+ field = HashSequentialString(SeqAsciiString::cast(this)->GetChars(), len);
+  } else if (StringShape(this).IsSequentialTwoByte()) {
+ field = HashSequentialString(SeqTwoByteString::cast(this)->GetChars(), len);
+  } else {
+    StringInputBuffer buffer(this);
+    field = ComputeHashField(&buffer, len);
+  }

   // Store the hash code in the object.
   set_hash_field(field);
=======================================
--- /branches/bleeding_edge/src/objects.h       Fri Mar 26 07:19:47 2010
+++ /branches/bleeding_edge/src/objects.h       Fri Mar 26 16:33:37 2010
@@ -3926,6 +3926,13 @@
   inline bool IsAsciiRepresentation();
   inline bool IsTwoByteRepresentation();

+  // Check whether this string is an external two-byte string that in
+  // fact contains only ascii characters.
+  //
+  // Such strings may appear when the embedder prefers two-byte
+  // representations even for ascii data.
+  inline bool IsExternalTwoByteStringWithAsciiChars();
+
   // Get and set individual two byte chars in the string.
   inline void Set(int index, uint16_t value);
   // Get individual two byte char in the string.  Repeated calls
=======================================
--- /branches/bleeding_edge/src/v8-counters.h   Tue Mar 23 04:40:38 2010
+++ /branches/bleeding_edge/src/v8-counters.h   Fri Mar 26 16:33:37 2010
@@ -166,6 +166,7 @@
   SC(generic_binary_stub_calls_regs, V8.GenericBinaryStubCallsRegs)   \
   SC(string_add_runtime, V8.StringAddRuntime)                         \
   SC(string_add_native, V8.StringAddNative)                           \
+  SC(string_add_runtime_ext_to_ascii, V8.StringAddRuntimeExtToAscii)  \
   SC(sub_string_runtime, V8.SubStringRuntime)                         \
   SC(sub_string_native, V8.SubStringNative)                           \
   SC(string_compare_native, V8.StringCompareNative)                   \

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

To unsubscribe from this group, send email to v8-dev+unsubscribegooglegroups.com or reply 
to this email with the words "REMOVE ME" as the subject.

Reply via email to