Revision: 12762
Author:   [email protected]
Date:     Thu Oct 18 08:08:11 2012
Log: Keep track of the first non-ascii word/char to avoid redoing the work.

Review URL: https://chromiumcodereview.appspot.com/11194053
http://code.google.com/p/v8/source/detail?r=12762

Modified:
 /branches/bleeding_edge/src/heap-inl.h
 /branches/bleeding_edge/src/heap.cc
 /branches/bleeding_edge/src/heap.h
 /branches/bleeding_edge/src/objects.h

=======================================
--- /branches/bleeding_edge/src/heap-inl.h      Wed Oct 17 06:04:49 2012
+++ /branches/bleeding_edge/src/heap-inl.h      Thu Oct 18 08:08:11 2012
@@ -85,13 +85,16 @@
 MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
                                           PretenureFlag pretenure) {
   // Check for ASCII first since this is the common case.
-  if (String::IsAscii(str.start(), str.length())) {
+  const char* start = str.start();
+  int length = str.length();
+  int non_ascii_start = String::NonAsciiStart(start, length);
+  if (non_ascii_start >= length) {
     // If the string is ASCII, we do not need to convert the characters
     // since UTF8 is backwards compatible with ASCII.
     return AllocateStringFromAscii(str, pretenure);
   }
   // Non-ASCII and we need to decode.
-  return AllocateStringFromUtf8Slow(str, pretenure);
+  return AllocateStringFromUtf8Slow(str, non_ascii_start, pretenure);
 }


=======================================
--- /branches/bleeding_edge/src/heap.cc Wed Oct 17 06:04:49 2012
+++ /branches/bleeding_edge/src/heap.cc Thu Oct 18 08:08:11 2012
@@ -4428,13 +4428,14 @@


 MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
+                                              int non_ascii_start,
                                               PretenureFlag pretenure) {
-  // Count the number of characters in the UTF-8 string and check if
-  // it is an ASCII string.
+ // Continue counting the number of characters in the UTF-8 string, starting
+  // from the first non-ascii character or word.
+  int chars = non_ascii_start;
   Access<UnicodeCache::Utf8Decoder>
       decoder(isolate_->unicode_cache()->utf8_decoder());
-  decoder->Reset(string.start(), string.length());
-  int chars = 0;
+ decoder->Reset(string.start() + non_ascii_start, string.length() - chars);
   while (decoder->has_more()) {
     uint32_t r = decoder->GetNext();
     if (r <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
=======================================
--- /branches/bleeding_edge/src/heap.h  Fri Oct 12 06:49:12 2012
+++ /branches/bleeding_edge/src/heap.h  Thu Oct 18 08:08:11 2012
@@ -705,6 +705,7 @@
       PretenureFlag pretenure = NOT_TENURED);
   MUST_USE_RESULT MaybeObject* AllocateStringFromUtf8Slow(
       Vector<const char> str,
+      int non_ascii_start,
       PretenureFlag pretenure = NOT_TENURED);
   MUST_USE_RESULT MaybeObject* AllocateStringFromTwoByte(
       Vector<const uc16> str,
=======================================
--- /branches/bleeding_edge/src/objects.h       Wed Oct 17 07:09:42 2012
+++ /branches/bleeding_edge/src/objects.h       Thu Oct 18 08:08:11 2012
@@ -7413,32 +7413,47 @@
                           int from,
                           int to);

-  static inline bool IsAscii(const char* chars, int length) {
+  // The return value may point to the first aligned word containing the
+ // first non-ascii character, rather than directly to the non-ascii character. + // If the return value is >= the passed length, the entire string was ASCII.
+  static inline int NonAsciiStart(const char* chars, int length) {
+    const char* start = chars;
     const char* limit = chars + length;
 #ifdef V8_HOST_CAN_READ_UNALIGNED
     ASSERT(kMaxAsciiCharCode == 0x7F);
     const uintptr_t non_ascii_mask = kUintptrAllBitsSet / 0xFF * 0x80;
     while (chars + sizeof(uintptr_t) <= limit) {
       if (*reinterpret_cast<const uintptr_t*>(chars) & non_ascii_mask) {
-        return false;
+        return chars - start;
       }
       chars += sizeof(uintptr_t);
     }
 #endif
     while (chars < limit) {
-      if (static_cast<uint8_t>(*chars) > kMaxAsciiCharCodeU) return false;
+      if (static_cast<uint8_t>(*chars) > kMaxAsciiCharCodeU) {
+        return chars - start;
+      }
       ++chars;
     }
-    return true;
+    return chars - start;
+  }
+
+  static inline bool IsAscii(const char* chars, int length) {
+    return NonAsciiStart(chars, length) >= length;
   }

-  static inline bool IsAscii(const uc16* chars, int length) {
+  static inline int NonAsciiStart(const uc16* chars, int length) {
     const uc16* limit = chars + length;
+    const uc16* start = chars;
     while (chars < limit) {
-      if (*chars > kMaxAsciiCharCodeU) return false;
+      if (*chars > kMaxAsciiCharCodeU) return chars - start;
       ++chars;
     }
-    return true;
+    return chars - start;
+  }
+
+  static inline bool IsAscii(const uc16* chars, int length) {
+    return NonAsciiStart(chars, length) >= length;
   }

  protected:

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to