Revision: 12762
Author: [email protected]
Date: Thu Oct 18 08:08:11 2012
Log: Keep track of the first non-ascii word/char to avoid redoing the
work.
Review URL: https://chromiumcodereview.appspot.com/11194053
http://code.google.com/p/v8/source/detail?r=12762
Modified:
/branches/bleeding_edge/src/heap-inl.h
/branches/bleeding_edge/src/heap.cc
/branches/bleeding_edge/src/heap.h
/branches/bleeding_edge/src/objects.h
=======================================
--- /branches/bleeding_edge/src/heap-inl.h Wed Oct 17 06:04:49 2012
+++ /branches/bleeding_edge/src/heap-inl.h Thu Oct 18 08:08:11 2012
@@ -85,13 +85,16 @@
MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
PretenureFlag pretenure) {
// Check for ASCII first since this is the common case.
- if (String::IsAscii(str.start(), str.length())) {
+ const char* start = str.start();
+ int length = str.length();
+ int non_ascii_start = String::NonAsciiStart(start, length);
+ if (non_ascii_start >= length) {
// If the string is ASCII, we do not need to convert the characters
// since UTF8 is backwards compatible with ASCII.
return AllocateStringFromAscii(str, pretenure);
}
// Non-ASCII and we need to decode.
- return AllocateStringFromUtf8Slow(str, pretenure);
+ return AllocateStringFromUtf8Slow(str, non_ascii_start, pretenure);
}
=======================================
--- /branches/bleeding_edge/src/heap.cc Wed Oct 17 06:04:49 2012
+++ /branches/bleeding_edge/src/heap.cc Thu Oct 18 08:08:11 2012
@@ -4428,13 +4428,14 @@
MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
+ int non_ascii_start,
PretenureFlag pretenure) {
- // Count the number of characters in the UTF-8 string and check if
- // it is an ASCII string.
+ // Continue counting the number of characters in the UTF-8 string,
starting
+ // from the first non-ascii character or word.
+ int chars = non_ascii_start;
Access<UnicodeCache::Utf8Decoder>
decoder(isolate_->unicode_cache()->utf8_decoder());
- decoder->Reset(string.start(), string.length());
- int chars = 0;
+ decoder->Reset(string.start() + non_ascii_start, string.length() -
chars);
while (decoder->has_more()) {
uint32_t r = decoder->GetNext();
if (r <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
=======================================
--- /branches/bleeding_edge/src/heap.h Fri Oct 12 06:49:12 2012
+++ /branches/bleeding_edge/src/heap.h Thu Oct 18 08:08:11 2012
@@ -705,6 +705,7 @@
PretenureFlag pretenure = NOT_TENURED);
MUST_USE_RESULT MaybeObject* AllocateStringFromUtf8Slow(
Vector<const char> str,
+ int non_ascii_start,
PretenureFlag pretenure = NOT_TENURED);
MUST_USE_RESULT MaybeObject* AllocateStringFromTwoByte(
Vector<const uc16> str,
=======================================
--- /branches/bleeding_edge/src/objects.h Wed Oct 17 07:09:42 2012
+++ /branches/bleeding_edge/src/objects.h Thu Oct 18 08:08:11 2012
@@ -7413,32 +7413,47 @@
int from,
int to);
- static inline bool IsAscii(const char* chars, int length) {
+ // The return value may point to the first aligned word containing the
+ // first non-ascii character, rather than directly to the non-ascii
character.
+ // If the return value is >= the passed length, the entire string was
ASCII.
+ static inline int NonAsciiStart(const char* chars, int length) {
+ const char* start = chars;
const char* limit = chars + length;
#ifdef V8_HOST_CAN_READ_UNALIGNED
ASSERT(kMaxAsciiCharCode == 0x7F);
const uintptr_t non_ascii_mask = kUintptrAllBitsSet / 0xFF * 0x80;
while (chars + sizeof(uintptr_t) <= limit) {
if (*reinterpret_cast<const uintptr_t*>(chars) & non_ascii_mask) {
- return false;
+ return chars - start;
}
chars += sizeof(uintptr_t);
}
#endif
while (chars < limit) {
- if (static_cast<uint8_t>(*chars) > kMaxAsciiCharCodeU) return false;
+ if (static_cast<uint8_t>(*chars) > kMaxAsciiCharCodeU) {
+ return chars - start;
+ }
++chars;
}
- return true;
+ return chars - start;
+ }
+
+ static inline bool IsAscii(const char* chars, int length) {
+ return NonAsciiStart(chars, length) >= length;
}
- static inline bool IsAscii(const uc16* chars, int length) {
+ static inline int NonAsciiStart(const uc16* chars, int length) {
const uc16* limit = chars + length;
+ const uc16* start = chars;
while (chars < limit) {
- if (*chars > kMaxAsciiCharCodeU) return false;
+ if (*chars > kMaxAsciiCharCodeU) return chars - start;
++chars;
}
- return true;
+ return chars - start;
+ }
+
+ static inline bool IsAscii(const uc16* chars, int length) {
+ return NonAsciiStart(chars, length) >= length;
}
protected:
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev