Revision: 4985
Author: [email protected]
Date: Wed Jun 30 00:40:40 2010
Log: Fix Chromium issue 47824.
In rare cases a two-byte string was mistaken for an ascii-string.

Review URL: http://codereview.chromium.org/2858033
http://code.google.com/p/v8/source/detail?r=4985

Modified:
 /branches/bleeding_edge/src/handles.cc
 /branches/bleeding_edge/src/handles.h
 /branches/bleeding_edge/src/jsregexp.cc
 /branches/bleeding_edge/src/objects.cc
 /branches/bleeding_edge/src/regexp-macro-assembler.cc
 /branches/bleeding_edge/src/runtime.cc
 /branches/bleeding_edge/test/cctest/test-api.cc

=======================================
--- /branches/bleeding_edge/src/handles.cc      Tue May 25 05:14:49 2010
+++ /branches/bleeding_edge/src/handles.cc      Wed Jun 30 00:40:40 2010
@@ -197,7 +197,17 @@

 void FlattenString(Handle<String> string) {
   CALL_HEAP_FUNCTION_VOID(string->TryFlatten());
+}
+
+
+Handle<String> FlattenGetString(Handle<String> string) {
+  Handle<String> result;
+  CALL_AND_RETRY(string->TryFlatten(),
+                 { result = Handle<String>(String::cast(__object__));
+                   break; },
+                 return Handle<String>());
   ASSERT(string->IsFlat());
+  return result;
 }


=======================================
--- /branches/bleeding_edge/src/handles.h       Tue May 25 05:14:49 2010
+++ /branches/bleeding_edge/src/handles.h       Wed Jun 30 00:40:40 2010
@@ -193,8 +193,14 @@
 void NormalizeElements(Handle<JSObject> object);
 void TransformToFastProperties(Handle<JSObject> object,
                                int unused_property_fields);
+
+// Flattens a string.
 void FlattenString(Handle<String> str);

+// Flattens a string and returns the underlying external or sequential
+// string.
+Handle<String> FlattenGetString(Handle<String> str);
+
 Handle<Object> SetProperty(Handle<JSObject> object,
                            Handle<String> key,
                            Handle<Object> value,
=======================================
--- /branches/bleeding_edge/src/jsregexp.cc     Tue Jun 22 01:38:32 2010
+++ /branches/bleeding_edge/src/jsregexp.cc     Wed Jun 30 00:40:40 2010
@@ -356,7 +356,16 @@
   if (!subject->IsFlat()) {
     FlattenString(subject);
   }
-  bool is_ascii = subject->IsAsciiRepresentation();
+  // Check the asciiness of the underlying storage.
+  bool is_ascii;
+  {
+    AssertNoAllocation no_gc;
+    String* sequential_string = *subject;
+    if (subject->IsConsString()) {
+      sequential_string =  ConsString::cast(*subject)->first();
+    }
+    is_ascii = sequential_string->IsAsciiRepresentation();
+  }
   if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
     return -1;
   }
@@ -380,6 +389,11 @@
   ASSERT(index >= 0);
   ASSERT(index <= subject->length());
   ASSERT(subject->IsFlat());
+
+  // A flat ASCII string might have a two-byte first part.
+  if (subject->IsConsString()) {
+    subject = Handle<String>(ConsString::cast(*subject)->first());
+  }

 #ifndef V8_INTERPRETED_REGEXP
   ASSERT(output.length() >=
@@ -407,7 +421,7 @@
     // If result is RETRY, the string has changed representation, and we
     // must restart from scratch.
     // In this case, it means we must make sure we are prepared to handle
-    // the, potentially, differen subject (the string can switch between
+    // the, potentially, different subject (the string can switch between
     // being internal and external, and even between being ASCII and UC16,
     // but the characters are always the same).
     IrregexpPrepare(regexp, subject);
=======================================
--- /branches/bleeding_edge/src/objects.cc      Thu Jun 24 06:56:35 2010
+++ /branches/bleeding_edge/src/objects.cc      Wed Jun 30 00:40:40 2010
@@ -678,7 +678,7 @@


 bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
-  // Externalizing twice leaks the external resouce, so it's
+  // Externalizing twice leaks the external resource, so it's
   // prohibited by the API.
   ASSERT(!this->IsExternalString());
 #ifdef DEBUG
=======================================
--- /branches/bleeding_edge/src/regexp-macro-assembler.cc Mon Apr 19 12:30:11 2010 +++ /branches/bleeding_edge/src/regexp-macro-assembler.cc Wed Jun 30 00:40:40 2010
@@ -120,8 +120,6 @@
   int start_offset = previous_index;
   int end_offset = subject_ptr->length();

-  bool is_ascii = subject->IsAsciiRepresentation();
-
// The string has been flattened, so it it is a cons string it contains the
   // full string in the first part.
   if (StringShape(subject_ptr).IsCons()) {
@@ -129,7 +127,7 @@
     subject_ptr = ConsString::cast(subject_ptr)->first();
   }
   // Ensure that an underlying string has the same ascii-ness.
-  ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
+  bool is_ascii = subject_ptr->IsAsciiRepresentation();
   ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
   // String is now either Sequential or External
   int char_size_shift = is_ascii ? 0 : 1;
=======================================
--- /branches/bleeding_edge/src/runtime.cc      Mon Jun 28 05:09:29 2010
+++ /branches/bleeding_edge/src/runtime.cc      Wed Jun 30 00:40:40 2010
@@ -2782,13 +2782,17 @@
   // algorithm is unnecessary overhead.
   if (pattern_length == 1) {
     AssertNoAllocation no_heap_allocation;  // ensure vectors stay valid
-    if (sub->IsAsciiRepresentation()) {
+    String* seq_sub = *sub;
+    if (seq_sub->IsConsString()) {
+      seq_sub = ConsString::cast(seq_sub)->first();
+    }
+    if (seq_sub->IsAsciiRepresentation()) {
       uc16 pchar = pat->Get(0);
       if (pchar > String::kMaxAsciiCharCode) {
         return -1;
       }
       Vector<const char> ascii_vector =
-        sub->ToAsciiVector().SubVector(start_index, subject_length);
+          seq_sub->ToAsciiVector().SubVector(start_index, subject_length);
       const void* pos = memchr(ascii_vector.start(),
                                static_cast<const char>(pchar),
                                static_cast<size_t>(ascii_vector.length()));
@@ -2798,7 +2802,9 @@
       return static_cast<int>(reinterpret_cast<const char*>(pos)
           - ascii_vector.start() + start_index);
     }
- return SingleCharIndexOf(sub->ToUC16Vector(), pat->Get(0), start_index);
+    return SingleCharIndexOf(seq_sub->ToUC16Vector(),
+                             pat->Get(0),
+                             start_index);
   }

   if (!pat->IsFlat()) {
@@ -2806,19 +2812,29 @@
   }

   AssertNoAllocation no_heap_allocation;  // ensure vectors stay valid
+ // Extract flattened substrings of cons strings before determining asciiness.
+  String* seq_sub = *sub;
+  if (seq_sub->IsConsString()) {
+    seq_sub = ConsString::cast(seq_sub)->first();
+  }
+  String* seq_pat = *pat;
+  if (seq_pat->IsConsString()) {
+    seq_pat = ConsString::cast(seq_pat)->first();
+  }
+
   // dispatch on type of strings
-  if (pat->IsAsciiRepresentation()) {
-    Vector<const char> pat_vector = pat->ToAsciiVector();
-    if (sub->IsAsciiRepresentation()) {
-      return StringSearch(sub->ToAsciiVector(), pat_vector, start_index);
-    }
-    return StringSearch(sub->ToUC16Vector(), pat_vector, start_index);
-  }
-  Vector<const uc16> pat_vector = pat->ToUC16Vector();
-  if (sub->IsAsciiRepresentation()) {
-    return StringSearch(sub->ToAsciiVector(), pat_vector, start_index);
-  }
-  return StringSearch(sub->ToUC16Vector(), pat_vector, start_index);
+  if (seq_pat->IsAsciiRepresentation()) {
+    Vector<const char> pat_vector = seq_pat->ToAsciiVector();
+    if (seq_sub->IsAsciiRepresentation()) {
+ return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index);
+    }
+    return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index);
+  }
+  Vector<const uc16> pat_vector = seq_pat->ToUC16Vector();
+  if (seq_sub->IsAsciiRepresentation()) {
+    return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index);
+  }
+  return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index);
 }


=======================================
--- /branches/bleeding_edge/test/cctest/test-api.cc     Mon Jun 28 02:25:09 2010
+++ /branches/bleeding_edge/test/cctest/test-api.cc     Wed Jun 30 00:40:40 2010
@@ -58,7 +58,7 @@
 using ::v8::AccessorInfo;
 using ::v8::Extension;

-namespace i = ::v8::internal;
+namespace i = ::i;


 static void ExpectString(const char* code, const char* expected) {
@@ -381,11 +381,11 @@
     CHECK(source->IsExternal());
     CHECK_EQ(resource,
static_cast<TestResource*>(source->GetExternalStringResource()));
-    v8::internal::Heap::CollectAllGarbage(false);
+    i::Heap::CollectAllGarbage(false);
     CHECK_EQ(0, TestResource::dispose_count);
   }
-  v8::internal::CompilationCache::Clear();
-  v8::internal::Heap::CollectAllGarbage(false);
+  i::CompilationCache::Clear();
+  i::Heap::CollectAllGarbage(false);
   CHECK_EQ(1, TestResource::dispose_count);
 }

@@ -402,11 +402,11 @@
     Local<Value> value = script->Run();
     CHECK(value->IsNumber());
     CHECK_EQ(7, value->Int32Value());
-    v8::internal::Heap::CollectAllGarbage(false);
+    i::Heap::CollectAllGarbage(false);
     CHECK_EQ(0, TestAsciiResource::dispose_count);
   }
-  v8::internal::CompilationCache::Clear();
-  v8::internal::Heap::CollectAllGarbage(false);
+  i::CompilationCache::Clear();
+  i::Heap::CollectAllGarbage(false);
   CHECK_EQ(1, TestAsciiResource::dispose_count);
 }

@@ -427,11 +427,11 @@
     Local<Value> value = script->Run();
     CHECK(value->IsNumber());
     CHECK_EQ(7, value->Int32Value());
-    v8::internal::Heap::CollectAllGarbage(false);
+    i::Heap::CollectAllGarbage(false);
     CHECK_EQ(0, TestResource::dispose_count);
   }
-  v8::internal::CompilationCache::Clear();
-  v8::internal::Heap::CollectAllGarbage(false);
+  i::CompilationCache::Clear();
+  i::Heap::CollectAllGarbage(false);
   CHECK_EQ(1, TestResource::dispose_count);
 }

@@ -453,11 +453,11 @@
     Local<Value> value = script->Run();
     CHECK(value->IsNumber());
     CHECK_EQ(7, value->Int32Value());
-    v8::internal::Heap::CollectAllGarbage(false);
+    i::Heap::CollectAllGarbage(false);
     CHECK_EQ(0, TestAsciiResource::dispose_count);
   }
-  v8::internal::CompilationCache::Clear();
-  v8::internal::Heap::CollectAllGarbage(false);
+  i::CompilationCache::Clear();
+  i::Heap::CollectAllGarbage(false);
   CHECK_EQ(1, TestAsciiResource::dispose_count);
 }

@@ -645,11 +645,11 @@
     Local<Value> value = script->Run();
     CHECK(value->IsNumber());
     CHECK_EQ(7, value->Int32Value());
-    v8::internal::Heap::CollectAllGarbage(false);
+    i::Heap::CollectAllGarbage(false);
     CHECK_EQ(0, TestAsciiResource::dispose_count);
   }
-  v8::internal::CompilationCache::Clear();
-  v8::internal::Heap::CollectAllGarbage(false);
+  i::CompilationCache::Clear();
+  i::Heap::CollectAllGarbage(false);
   CHECK_EQ(1, TestAsciiResourceWithDisposeControl::dispose_calls);
   CHECK_EQ(0, TestAsciiResource::dispose_count);

@@ -666,11 +666,11 @@
     Local<Value> value = script->Run();
     CHECK(value->IsNumber());
     CHECK_EQ(7, value->Int32Value());
-    v8::internal::Heap::CollectAllGarbage(false);
+    i::Heap::CollectAllGarbage(false);
     CHECK_EQ(0, TestAsciiResource::dispose_count);
   }
-  v8::internal::CompilationCache::Clear();
-  v8::internal::Heap::CollectAllGarbage(false);
+  i::CompilationCache::Clear();
+  i::Heap::CollectAllGarbage(false);
   CHECK_EQ(1, TestAsciiResourceWithDisposeControl::dispose_calls);
   CHECK_EQ(1, TestAsciiResource::dispose_count);
 }
@@ -708,7 +708,7 @@
     CHECK(value->IsNumber());
     CHECK_EQ(68, value->Int32Value());
   }
-  v8::internal::CompilationCache::Clear();
+  i::CompilationCache::Clear();
   i::Heap::CollectAllGarbage(false);
   i::Heap::CollectAllGarbage(false);
 }
@@ -1881,7 +1881,7 @@
 // that come after them so they cannot run in parallel.
 TEST(OutOfMemory) {
// It's not possible to read a snapshot into a heap with different dimensions.
-  if (v8::internal::Snapshot::IsEnabled()) return;
+  if (i::Snapshot::IsEnabled()) return;
   // Set heap limits.
   static const int K = 1024;
   v8::ResourceConstraints constraints;
@@ -1922,7 +1922,7 @@

 TEST(OutOfMemoryNested) {
// It's not possible to read a snapshot into a heap with different dimensions.
-  if (v8::internal::Snapshot::IsEnabled()) return;
+  if (i::Snapshot::IsEnabled()) return;
   // Set heap limits.
   static const int K = 1024;
   v8::ResourceConstraints constraints;
@@ -1951,7 +1951,7 @@

 TEST(HugeConsStringOutOfMemory) {
// It's not possible to read a snapshot into a heap with different dimensions.
-  if (v8::internal::Snapshot::IsEnabled()) return;
+  if (i::Snapshot::IsEnabled()) return;
   v8::HandleScope scope;
   LocalContext context;
   // Set heap limits.
@@ -6811,7 +6811,7 @@
int* call_count = reinterpret_cast<int*>(v8::External::Unwrap(info.Data()));
   ++(*call_count);
   if ((*call_count) % 20 == 0) {
-    v8::internal::Heap::CollectAllGarbage(true);
+    i::Heap::CollectAllGarbage(true);
   }
   return v8::Handle<Value>();
 }
@@ -7620,8 +7620,8 @@


 bool ApiTestFuzzer::fuzzing_ = false;
-v8::internal::Semaphore* ApiTestFuzzer::all_tests_done_=
-  v8::internal::OS::CreateSemaphore(0);
+i::Semaphore* ApiTestFuzzer::all_tests_done_=
+  i::OS::CreateSemaphore(0);
 int ApiTestFuzzer::active_tests_;
 int ApiTestFuzzer::tests_being_run_;
 int ApiTestFuzzer::current_;
@@ -7899,7 +7899,7 @@

 static int GetGlobalObjectsCount() {
   int count = 0;
-  v8::internal::HeapIterator it;
+  i::HeapIterator it;
for (i::HeapObject* object = it.next(); object != NULL; object = it.next())
     if (object->IsJSGlobalObject()) count++;
   return count;
@@ -7912,11 +7912,11 @@
   // the first garbage collection but some of the maps have already
   // been marked at that point.  Therefore some of the maps are not
   // collected until the second garbage collection.
-  v8::internal::Heap::CollectAllGarbage(false);
-  v8::internal::Heap::CollectAllGarbage(false);
+  i::Heap::CollectAllGarbage(false);
+  i::Heap::CollectAllGarbage(false);
   int count = GetGlobalObjectsCount();
 #ifdef DEBUG
-  if (count > 0) v8::internal::Heap::TracePathToGlobal();
+  if (count > 0) i::Heap::TracePathToGlobal();
 #endif
   return count;
 }
@@ -10021,7 +10021,7 @@


 THREADED_TEST(ExternalByteArray) {
-  ExternalArrayTestHelper<v8::internal::ExternalByteArray, int8_t>(
+  ExternalArrayTestHelper<i::ExternalByteArray, int8_t>(
       v8::kExternalByteArray,
       -128,
       127);
@@ -10029,7 +10029,7 @@


 THREADED_TEST(ExternalUnsignedByteArray) {
- ExternalArrayTestHelper<v8::internal::ExternalUnsignedByteArray, uint8_t>(
+  ExternalArrayTestHelper<i::ExternalUnsignedByteArray, uint8_t>(
       v8::kExternalUnsignedByteArray,
       0,
       255);
@@ -10037,7 +10037,7 @@


 THREADED_TEST(ExternalShortArray) {
-  ExternalArrayTestHelper<v8::internal::ExternalShortArray, int16_t>(
+  ExternalArrayTestHelper<i::ExternalShortArray, int16_t>(
       v8::kExternalShortArray,
       -32768,
       32767);
@@ -10045,7 +10045,7 @@


 THREADED_TEST(ExternalUnsignedShortArray) {
- ExternalArrayTestHelper<v8::internal::ExternalUnsignedShortArray, uint16_t>(
+  ExternalArrayTestHelper<i::ExternalUnsignedShortArray, uint16_t>(
       v8::kExternalUnsignedShortArray,
       0,
       65535);
@@ -10053,7 +10053,7 @@


 THREADED_TEST(ExternalIntArray) {
-  ExternalArrayTestHelper<v8::internal::ExternalIntArray, int32_t>(
+  ExternalArrayTestHelper<i::ExternalIntArray, int32_t>(
       v8::kExternalIntArray,
       INT_MIN,   // -2147483648
       INT_MAX);  //  2147483647
@@ -10061,7 +10061,7 @@


 THREADED_TEST(ExternalUnsignedIntArray) {
- ExternalArrayTestHelper<v8::internal::ExternalUnsignedIntArray, uint32_t>(
+  ExternalArrayTestHelper<i::ExternalUnsignedIntArray, uint32_t>(
       v8::kExternalUnsignedIntArray,
       0,
       UINT_MAX);  // 4294967295
@@ -10069,7 +10069,7 @@


 THREADED_TEST(ExternalFloatArray) {
-  ExternalArrayTestHelper<v8::internal::ExternalFloatArray, float>(
+  ExternalArrayTestHelper<i::ExternalFloatArray, float>(
       v8::kExternalFloatArray,
       -500,
       500);
@@ -10547,7 +10547,7 @@
     other_context->Enter();
     CompileRun(source_simple);
     other_context->Exit();
-    v8::internal::Heap::CollectAllGarbage(false);
+    i::Heap::CollectAllGarbage(false);
     if (GetGlobalObjectsCount() == 1) break;
   }
   CHECK_GE(2, gc_count);
@@ -10569,7 +10569,7 @@
     other_context->Enter();
     CompileRun(source_eval);
     other_context->Exit();
-    v8::internal::Heap::CollectAllGarbage(false);
+    i::Heap::CollectAllGarbage(false);
     if (GetGlobalObjectsCount() == 1) break;
   }
   CHECK_GE(2, gc_count);
@@ -10596,7 +10596,7 @@
     other_context->Enter();
     CompileRun(source_exception);
     other_context->Exit();
-    v8::internal::Heap::CollectAllGarbage(false);
+    i::Heap::CollectAllGarbage(false);
     if (GetGlobalObjectsCount() == 1) break;
   }
   CHECK_GE(2, gc_count);
@@ -10859,7 +10859,7 @@
" return 'Different results for ' + key1 + ': ' + r1 + ' vs. ' + r1_;"
       "  return 'PASSED';"
       "})()";
-  v8::internal::Heap::ClearJSFunctionResultCaches();
+  i::Heap::ClearJSFunctionResultCaches();
   ExpectString(code, "PASSED");
 }

@@ -10883,7 +10883,7 @@
       "    return 'FAILED: k0CacheSize is too small';"
       "  return 'PASSED';"
       "})()";
-  v8::internal::Heap::ClearJSFunctionResultCaches();
+  i::Heap::ClearJSFunctionResultCaches();
   ExpectString(code, "PASSED");
 }

@@ -10908,7 +10908,7 @@
       "  };"
       "  return 'PASSED';"
       "})()";
-  v8::internal::Heap::ClearJSFunctionResultCaches();
+  i::Heap::ClearJSFunctionResultCaches();
   ExpectString(code, "PASSED");
 }

@@ -10933,7 +10933,7 @@
       "  };"
       "  return 'PASSED';"
       "})()";
-  v8::internal::Heap::ClearJSFunctionResultCaches();
+  i::Heap::ClearJSFunctionResultCaches();
   ExpectString(code, "PASSED");
 }

@@ -10951,6 +10951,87 @@
       "  };"
       "  return 'PASSED';"
       "})()";
-  v8::internal::Heap::ClearJSFunctionResultCaches();
+  i::Heap::ClearJSFunctionResultCaches();
   ExpectString(code, "PASSED");
 }
+
+
+THREADED_TEST(TwoByteStringInAsciiCons) {
+  // See Chromium issue 47824.
+  v8::HandleScope scope;
+
+  LocalContext context;
+  const char* init_code =
+      "var str1 = 'abelspendabel';"
+      "var str2 = str1 + str1 + str1;"
+      "str2;";
+  Local<Value> result = CompileRun(init_code);
+
+  CHECK(result->IsString());
+ i::Handle<i::String> string = v8::Utils::OpenHandle(String::Cast(*result));
+  int length = string->length();
+  CHECK(string->IsAsciiRepresentation());
+
+  FlattenString(string);
+  i::Handle<i::String> flat_string = FlattenGetString(string);
+
+  CHECK(string->IsAsciiRepresentation());
+  CHECK(flat_string->IsAsciiRepresentation());
+
+  // Create external resource.
+  uint16_t* uc16_buffer = new uint16_t[length + 1];
+
+  i::String::WriteToFlat(*flat_string, uc16_buffer, 0, length);
+  uc16_buffer[length] = 0;
+
+  TestResource resource(uc16_buffer);
+
+  flat_string->MakeExternal(&resource);
+
+  CHECK(flat_string->IsTwoByteRepresentation());
+
+  // At this point, we should have a Cons string which is flat and ASCII,
+  // with a first half that is a two-byte string (although it only contains
+ // ASCII characters). This is a valid sequence of steps, and it can happen
+  // in real pages.
+
+  CHECK(string->IsAsciiRepresentation());
+  i::ConsString* cons = i::ConsString::cast(*string);
+  CHECK_EQ(0, cons->second()->length());
+  CHECK(cons->first()->IsTwoByteRepresentation());
+
+  // Check that some string operations work.
+
+  // Atom RegExp.
+  Local<Value> reresult = CompileRun("str2.match(/abel/g).length;");
+  CHECK_EQ(6, reresult->Int32Value());
+
+  // Nonatom RegExp.
+  reresult = CompileRun("str2.match(/abe./g).length;");
+  CHECK_EQ(6, reresult->Int32Value());
+
+  reresult = CompileRun("str2.search(/bel/g);");
+  CHECK_EQ(1, reresult->Int32Value());
+
+  reresult = CompileRun("str2.search(/be./g);");
+  CHECK_EQ(1, reresult->Int32Value());
+
+  ExpectTrue("/bel/g.test(str2);");
+
+  ExpectTrue("/be./g.test(str2);");
+
+  reresult = CompileRun("/bel/g.exec(str2);");
+  CHECK(!reresult->IsNull());
+
+  reresult = CompileRun("/be./g.exec(str2);");
+  CHECK(!reresult->IsNull());
+
+  ExpectString("str2.substring(2, 10);", "elspenda");
+
+  ExpectString("str2.substring(2, 20);", "elspendabelabelspe");
+
+  ExpectString("str2.charAt(2);", "e");
+
+  reresult = CompileRun("str2.charCodeAt(2);");
+  CHECK_EQ(static_cast<int32_t>('e'), reresult->Int32Value());
+}

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to