Revision: 4300
Author: [email protected]
Date: Fri Mar 26 16:33:37 2010
Log: Some string optimizations:
* Faster hashing for sequential strings.
* When adding short external two-byte strings try to convert them
back to ascii. Chances are high the embedder uses two-byte
representation even for ascii strings. This optimization saves
memory and makes hashing faster.
Review URL: http://codereview.chromium.org/1444001
http://code.google.com/p/v8/source/detail?r=4300
Modified:
/branches/bleeding_edge/src/heap.cc
/branches/bleeding_edge/src/objects-inl.h
/branches/bleeding_edge/src/objects.cc
/branches/bleeding_edge/src/objects.h
/branches/bleeding_edge/src/v8-counters.h
=======================================
--- /branches/bleeding_edge/src/heap.cc Thu Mar 25 08:32:58 2010
+++ /branches/bleeding_edge/src/heap.cc Fri Mar 26 16:33:37 2010
@@ -1961,8 +1961,9 @@
return MakeOrFindTwoCharacterString(c1, c2);
}
- bool is_ascii = first->IsAsciiRepresentation()
- && second->IsAsciiRepresentation();
+ bool first_is_ascii = first->IsAsciiRepresentation();
+ bool second_is_ascii = second->IsAsciiRepresentation();
+ bool is_ascii = first_is_ascii && second_is_ascii;
// Make sure that an out of memory exception is thrown if the length
// of the new cons string is too large.
@@ -1997,6 +1998,25 @@
for (int i = 0; i < second_length; i++) *dest++ = src[i];
return result;
} else {
+ // For short external two-byte strings we check whether they can
+ // be represented using ascii.
+ if (!first_is_ascii) {
+ first_is_ascii = first->IsExternalTwoByteStringWithAsciiChars();
+ }
+ if (first_is_ascii && !second_is_ascii) {
+ second_is_ascii = second->IsExternalTwoByteStringWithAsciiChars();
+ }
+ if (first_is_ascii && second_is_ascii) {
+ Object* result = AllocateRawAsciiString(length);
+ if (result->IsFailure()) return result;
+ // Copy the characters into the new object.
+ char* dest = SeqAsciiString::cast(result)->GetChars();
+ String::WriteToFlat(first, dest, 0, first_length);
+ String::WriteToFlat(second, dest + first_length, 0, second_length);
+ Counters::string_add_runtime_ext_to_ascii.Increment();
+ return result;
+ }
+
Object* result = AllocateRawTwoByteString(length);
if (result->IsFailure()) return result;
// Copy the characters into the new object.
=======================================
--- /branches/bleeding_edge/src/objects-inl.h Tue Mar 23 04:40:38 2010
+++ /branches/bleeding_edge/src/objects-inl.h Fri Mar 26 16:33:37 2010
@@ -253,6 +253,16 @@
}
return (type & kStringEncodingMask) == kTwoByteStringTag;
}
+
+
+bool String::IsExternalTwoByteStringWithAsciiChars() {
+ if (!IsExternalTwoByteString()) return false;
+ const uc16* data = ExternalTwoByteString::cast(this)->resource()->data();
+ for (int i = 0, len = length(); i < len; i++) {
+ if (data[i] > kMaxAsciiCharCode) return false;
+ }
+ return true;
+}
bool StringShape::IsCons() {
=======================================
--- /branches/bleeding_edge/src/objects.cc Tue Mar 23 04:40:38 2010
+++ /branches/bleeding_edge/src/objects.cc Fri Mar 26 16:33:37 2010
@@ -4658,15 +4658,40 @@
}
return i == slen && !decoder->has_more();
}
+
+
+template <typename schar>
+static inline uint32_t HashSequentialString(const schar* chars, int
length) {
+ StringHasher hasher(length);
+ if (!hasher.has_trivial_hash()) {
+ int i;
+ for (i = 0; hasher.is_array_index() && (i < length); i++) {
+ hasher.AddCharacter(chars[i]);
+ }
+ for (; i < length; i++) {
+ hasher.AddCharacterNoIndex(chars[i]);
+ }
+ }
+ return hasher.GetHashField();
+}
uint32_t String::ComputeAndSetHash() {
// Should only be called if hash code has not yet been computed.
ASSERT(!(hash_field() & kHashComputedMask));
+ const int len = length();
+
// Compute the hash code.
- StringInputBuffer buffer(this);
- uint32_t field = ComputeHashField(&buffer, length());
+ uint32_t field = 0;
+ if (StringShape(this).IsSequentialAscii()) {
+ field = HashSequentialString(SeqAsciiString::cast(this)->GetChars(),
len);
+ } else if (StringShape(this).IsSequentialTwoByte()) {
+ field = HashSequentialString(SeqTwoByteString::cast(this)->GetChars(),
len);
+ } else {
+ StringInputBuffer buffer(this);
+ field = ComputeHashField(&buffer, len);
+ }
// Store the hash code in the object.
set_hash_field(field);
=======================================
--- /branches/bleeding_edge/src/objects.h Fri Mar 26 07:19:47 2010
+++ /branches/bleeding_edge/src/objects.h Fri Mar 26 16:33:37 2010
@@ -3926,6 +3926,13 @@
inline bool IsAsciiRepresentation();
inline bool IsTwoByteRepresentation();
+ // Check whether this string is an external two-byte string that in
+ // fact contains only ascii characters.
+ //
+ // Such strings may appear when the embedder prefers two-byte
+ // representations even for ascii data.
+ inline bool IsExternalTwoByteStringWithAsciiChars();
+
// Get and set individual two byte chars in the string.
inline void Set(int index, uint16_t value);
// Get individual two byte char in the string. Repeated calls
=======================================
--- /branches/bleeding_edge/src/v8-counters.h Tue Mar 23 04:40:38 2010
+++ /branches/bleeding_edge/src/v8-counters.h Fri Mar 26 16:33:37 2010
@@ -166,6 +166,7 @@
SC(generic_binary_stub_calls_regs, V8.GenericBinaryStubCallsRegs) \
SC(string_add_runtime, V8.StringAddRuntime) \
SC(string_add_native, V8.StringAddNative) \
+ SC(string_add_runtime_ext_to_ascii, V8.StringAddRuntimeExtToAscii) \
SC(sub_string_runtime, V8.SubStringRuntime) \
SC(sub_string_native, V8.SubStringNative) \
SC(string_compare_native, V8.StringCompareNative) \
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
To unsubscribe from this group, send email to v8-dev+unsubscribegooglegroups.com or reply
to this email with the words "REMOVE ME" as the subject.