Author: l...@chromium.org
Date: Wed Mar 25 05:14:10 2009
New Revision: 1608

Modified:
    branches/bleeding_edge/src/jsregexp.cc
    branches/bleeding_edge/src/jsregexp.h
    branches/bleeding_edge/src/platform-win32.cc
    branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
    branches/bleeding_edge/src/runtime.cc
    branches/bleeding_edge/src/runtime.h
    branches/bleeding_edge/src/string.js
    branches/bleeding_edge/test/mjsunit/regexp-indexof.js

Log:
Moved String.prototype.match implementation to C++.
Some extra runtime assertions added.


Modified: branches/bleeding_edge/src/jsregexp.cc
==============================================================================
--- branches/bleeding_edge/src/jsregexp.cc      (original)
+++ branches/bleeding_edge/src/jsregexp.cc      Wed Mar 25 05:14:10 2009
@@ -205,25 +205,6 @@
  }


-Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
-                                      Handle<String> subject,
-                                      Handle<JSArray> last_match_info) {
-  switch (regexp->TypeTag()) {
-    case JSRegExp::ATOM:
-      return AtomExecGlobal(regexp, subject, last_match_info);
-    case JSRegExp::IRREGEXP: {
-      Handle<Object> result =
-          IrregexpExecGlobal(regexp, subject, last_match_info);
-      ASSERT(!result.is_null() || Top::has_pending_exception());
-      return result;
-    }
-    default:
-      UNREACHABLE();
-      return Handle<Object>::null();
-  }
-}
-
-
  // RegExp Atom implementation: Simple string search using indexOf.


@@ -273,55 +254,6 @@
  }


-Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
-                                          Handle<String> subject,
-                                          Handle<JSArray> last_match_info)  
{
-  Handle<String>  
needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
-  ASSERT(last_match_info->HasFastElements());
-  Handle<JSArray> result = Factory::NewJSArray(1);
-  int index = 0;
-  int match_count = 0;
-  int subject_length = subject->length();
-  int needle_length = needle->length();
-  int last_value = -1;
-  while (true) {
-    HandleScope scope;
-    int value = -1;
-    if (index + needle_length <= subject_length) {
-      value = Runtime::StringMatch(subject, needle, index);
-    }
-    if (value == -1) {
-      if (last_value != -1) {
-        Handle<FixedArray> array(last_match_info->elements());
-        SetAtomLastCapture(*array,
-                           *subject,
-                           last_value,
-                           last_value + needle->length());
-      }
-      break;
-    }
-
-    int end = value + needle_length;
-
-    // Create an array that looks like the static last_match_info array
-    // that is attached to the global RegExp object.  We will be returning
-    // an array of these.
-    Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2);
-    SetLastCaptureCount(*array, 2);
-    // Ignore subject and input fields.
-    SetCapture(*array, 0, value);
-    SetCapture(*array, 1, end);
-    Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
-    SetElement(result, match_count, pair);
-    match_count++;
-    index = end;
-    if (needle_length == 0) index++;
-    last_value = value;
-  }
-  return result;
-}
-
-
  // Irregexp implementation.


@@ -331,8 +263,7 @@
  // from the source pattern.
  // If compilation fails, an exception is thrown and this function
  // returns false.
-bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
-                                        bool is_ascii) {
+bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool  
is_ascii) {
    int index;
    if (is_ascii) {
      index = JSRegExp::kIrregexpASCIICodeIndex;
@@ -460,22 +391,20 @@
  }


-Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
+Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
                                          Handle<String> subject,
-                                        int index,
+                                        int previous_index,
                                          Handle<JSArray> last_match_info) {
-  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+  ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);

    // Prepare space for the return values.
    int number_of_capture_registers =
-      (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
+      (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) *  
2;
    OffsetsVector offsets(number_of_capture_registers);

-  int previous_index = index;
-
  #ifdef DEBUG
    if (FLAG_trace_regexp_bytecodes) {
-    String* pattern = regexp->Pattern();
+    String* pattern = jsregexp->Pattern();
      PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString()));
      PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
    }
@@ -487,104 +416,12 @@

    last_match_info->EnsureSize(number_of_capture_registers +  
kLastMatchOverhead);

-  return IrregexpExecOnce(regexp,
-                          number_of_capture_registers,
-                          last_match_info,
-                          subject,
-                          previous_index,
-                          offsets.vector(),
-                          offsets.length());
-}
-
-
-Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
-                                              Handle<String> subject,
-                                              Handle<JSArray>  
last_match_info) {
-  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
-
-  // Prepare space for the return values.
-  int number_of_capture_registers =
-      (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
-  OffsetsVector offsets(number_of_capture_registers);
-
-  int previous_index = 0;
-
-  Handle<JSArray> result = Factory::NewJSArray(0);
-  int result_length = 0;
-  Handle<Object> matches;
-
-  if (!subject->IsFlat()) {
-    FlattenString(subject);
-  }
-
-  last_match_info->EnsureSize(number_of_capture_registers +  
kLastMatchOverhead);
-
-  while (true) {
-    if (previous_index > subject->length() || previous_index < 0) {
-      // Per ECMA-262 15.10.6.2, if the previous index is greater than the
-      // string length, there is no match.
-      return result;
-    } else {
-#ifdef DEBUG
-      if (FLAG_trace_regexp_bytecodes) {
-        String* pattern = regexp->Pattern();
-        PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString()));
-        PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
-      }
-#endif
-      HandleScope scope;
-      matches = IrregexpExecOnce(regexp,
-                                 number_of_capture_registers,
-                                 last_match_info,
-                                 subject,
-                                 previous_index,
-                                 offsets.vector(),
-                                 offsets.length());
-
-      if (matches.is_null()) {
-        ASSERT(Top::has_pending_exception());
-        return matches;
-      }
-
-      if (matches->IsJSArray()) {
-        // Create an array that looks like the static last_match_info array
-        // that is attached to the global RegExp object.  We will be  
returning
-        // an array of these.
-        int match_length = kFirstCapture + number_of_capture_registers;
-        Handle<JSArray> latest_match =
-            Factory::NewJSArray(match_length);
-
-        AssertNoAllocation no_allocation;
-        FixedArray* match_array = JSArray::cast(*matches)->elements();
-        match_array->CopyTo(0,
-                            latest_match->elements(),
-                            0,
-                            match_length);
-        SetElement(result, result_length, latest_match);
-        result_length++;
-        previous_index = GetCapture(match_array, 1);
-        if (GetCapture(match_array, 0) == previous_index) {
-          previous_index++;
-        }
-      } else {
-        ASSERT(matches->IsNull());
-        return result;
-      }
-    }
-  }
-}
-
-
-Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> jsregexp,
-                                            int  
number_of_capture_registers,
-                                            Handle<JSArray>  
last_match_info,
-                                            Handle<String> subject,
-                                            int previous_index,
-                                            int* offsets_vector,
-                                            int offsets_vector_length) {
-  ASSERT(subject->IsFlat());
+  int* offsets_vector = offsets.vector();
+  int offsets_vector_length = offsets.length();
    bool rc;

+  // Dispatch to the correct RegExp implementation.
+
    Handle<String> original_subject = subject;
    Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
    if (UseNativeRegexp()) {
@@ -630,6 +467,8 @@
                                      offsets_vector,
                                      previous_index);
    }
+
+  // Handle results from RegExp implementation.

    if (!rc) {
      return Factory::null_value();

Modified: branches/bleeding_edge/src/jsregexp.h
==============================================================================
--- branches/bleeding_edge/src/jsregexp.h       (original)
+++ branches/bleeding_edge/src/jsregexp.h       Wed Mar 25 05:14:10 2009
@@ -94,20 +94,15 @@
                                   int index,
                                   Handle<JSArray> lastMatchInfo);

-  static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp,
-                                       Handle<String> subject,
-                                       Handle<JSArray> lastMatchInfo);
-
    // Execute an Irregexp bytecode pattern.
+  // On a successful match, the result is a JSArray containing
+  // captured positions. On a failure, the result is the null value.
+  // Returns an empty handle in case of an exception.
    static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
                                       Handle<String> subject,
                                       int index,
                                       Handle<JSArray> lastMatchInfo);

-  static Handle<Object> IrregexpExecGlobal(Handle<JSRegExp> regexp,
-                                           Handle<String> subject,
-                                           Handle<JSArray> lastMatchInfo);
-
    // Offsets in the lastMatchInfo array.
    static const int kLastCaptureCount = 0;
    static const int kLastSubject = 1;
@@ -154,17 +149,6 @@

    static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii);

-
-  // On a successful match, the result is a JSArray containing
-  // captured positions. On a failure, the result is the null value.
-  // Returns an empty handle in case of an exception.
-  static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> jsregexp,
-                                         int num_captures,
-                                         Handle<JSArray> lastMatchInfo,
-                                         Handle<String> subject16,
-                                         int previous_index,
-                                         int* ovector,
-                                         int ovector_length);

    // Set the subject cache.  The previous string buffer is not deleted, so  
the
    // caller should ensure that it doesn't leak.

Modified: branches/bleeding_edge/src/platform-win32.cc
==============================================================================
--- branches/bleeding_edge/src/platform-win32.cc        (original)
+++ branches/bleeding_edge/src/platform-win32.cc        Wed Mar 25 05:14:10 2009
@@ -314,7 +314,7 @@

  // Initialize timestamp from a JavaScript timestamp.
  Time::Time(double jstime) {
-  t() = static_cast<uint64_t>(jstime) * kTimeScaler + kTimeEpoc;
+  t() = static_cast<int64_t>(jstime) * kTimeScaler + kTimeEpoc;
  }



Modified: branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc   (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc   Wed Mar 25  
05:14:10 2009
@@ -962,6 +962,8 @@
      int previous_index) {

    ASSERT(subject->IsFlat());
+  ASSERT(previous_index >= 0);
+  ASSERT(previous_index <= subject->length());

    // No allocations before calling the regexp, but we can't use
    // AssertNoAllocation, since regexps might be preempted, and another  
thread

Modified: branches/bleeding_edge/src/runtime.cc
==============================================================================
--- branches/bleeding_edge/src/runtime.cc       (original)
+++ branches/bleeding_edge/src/runtime.cc       Wed Mar 25 05:14:10 2009
@@ -1038,7 +1038,9 @@
    CONVERT_CHECKED(Smi, index, args[2]);
    CONVERT_CHECKED(JSArray, raw_last_match_info, args[3]);
    Handle<JSArray> last_match_info(raw_last_match_info);
-  CHECK(last_match_info->HasFastElements());
+  RUNTIME_ASSERT(last_match_info->HasFastElements());
+  RUNTIME_ASSERT(index->value() >= 0);
+  RUNTIME_ASSERT(index->value() <= subject->length());
    Handle<Object> result = RegExpImpl::Exec(regexp,
                                             subject,
                                             index->value(),
@@ -1048,23 +1050,6 @@
  }


-static Object* Runtime_RegExpExecGlobal(Arguments args) {
-  HandleScope scope;
-  ASSERT(args.length() == 3);
-  CONVERT_CHECKED(JSRegExp, raw_regexp, args[0]);
-  Handle<JSRegExp> regexp(raw_regexp);
-  CONVERT_CHECKED(String, raw_subject, args[1]);
-  Handle<String> subject(raw_subject);
-  CONVERT_CHECKED(JSArray, raw_last_match_info, args[2]);
-  Handle<JSArray> last_match_info(raw_last_match_info);
-  CHECK(last_match_info->HasFastElements());
-  Handle<Object> result =
-      RegExpImpl::ExecGlobal(regexp, subject, last_match_info);
-  if (result.is_null()) return Failure::Exception();
-  return *result;
-}
-
-
  static Object* Runtime_MaterializeRegExpLiteral(Arguments args) {
    HandleScope scope;
    ASSERT(args.length() == 4);
@@ -2334,6 +2319,57 @@
    RUNTIME_ASSERT(start >= 0);
    RUNTIME_ASSERT(end <= value->length());
    return value->Slice(start, end);
+}
+
+
+static Object* Runtime_StringMatch(Arguments args) {
+  ASSERT_EQ(3, args.length());
+
+  CONVERT_ARG_CHECKED(String, subject, 0);
+  CONVERT_ARG_CHECKED(JSRegExp, regexp, 1);
+  CONVERT_ARG_CHECKED(JSArray, regexp_info, 2);
+  HandleScope handles;
+
+  Handle<Object> match = RegExpImpl::Exec(regexp, subject, 0, regexp_info);
+
+  if (match.is_null()) {
+    return Failure::Exception();
+  }
+  if (match->IsNull()) {
+    return Heap::null_value();
+  }
+  int length = subject->length();
+
+  ZoneScope zone_space(DELETE_ON_EXIT);
+  ZoneList<int> offsets(8);
+  do {
+    int start;
+    int end;
+    {
+      AssertNoAllocation no_alloc;
+      FixedArray* elements = regexp_info->elements();
+      start = Smi::cast(elements->get(RegExpImpl::kFirstCapture))->value();
+      end = Smi::cast(elements->get(RegExpImpl::kFirstCapture +  
1))->value();
+    }
+    offsets.Add(start);
+    offsets.Add(end);
+    int index = start < end ? end : end + 1;
+    if (index > length) break;
+    match = RegExpImpl::Exec(regexp, subject, index, regexp_info);
+    if (match.is_null()) {
+      return Failure::Exception();
+    }
+  } while (!match->IsNull());
+  int matches = offsets.length() / 2;
+  Handle<FixedArray> elements = Factory::NewFixedArray(matches);
+  for (int i = 0; i < matches ; i++) {
+    int from = offsets.at(i * 2);
+    int to = offsets.at(i * 2 + 1);
+    elements->set(i, *Factory::NewStringSlice(subject, from, to));
+  }
+  Handle<JSArray> result = Factory::NewJSArrayWithElements(elements);
+  result->set_length(Smi::FromInt(matches));
+  return *result;
  }



Modified: branches/bleeding_edge/src/runtime.h
==============================================================================
--- branches/bleeding_edge/src/runtime.h        (original)
+++ branches/bleeding_edge/src/runtime.h        Wed Mar 25 05:14:10 2009
@@ -138,7 +138,6 @@
    /* Regular expressions */ \
    F(RegExpCompile, 3) \
    F(RegExpExec, 4) \
-  F(RegExpExecGlobal, 3) \
    \
    /* Strings */ \
    F(StringCharCodeAt, 2) \
@@ -147,6 +146,7 @@
    F(StringLocaleCompare, 2) \
    F(StringSlice, 3) \
    F(StringReplaceRegExpWithString, 4) \
+  F(StringMatch, 3) \
    \
    /* Numbers */ \
    F(NumberToRadixString, 2) \

Modified: branches/bleeding_edge/src/string.js
==============================================================================
--- branches/bleeding_edge/src/string.js        (original)
+++ branches/bleeding_edge/src/string.js        Wed Mar 25 05:14:10 2009
@@ -157,21 +157,8 @@

    if (!regexp.global) return regexp.exec(subject);
    %_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]);
-  var matches = DoRegExpExecGlobal(regexp, subject);
-
-  // If the regexp did not match, return null.
-  if (matches.length == 0) return null;
-
-  // Build the result array.
-  var result = new $Array(match_string);
-  for (var i = 0; i < matches.length; ++i) {
-    var matchInfo = matches[i];
-    var match_string = subject.slice(matchInfo[CAPTURE0],
-                                     matchInfo[CAPTURE1]);
-    result[i] = match_string;
-  }
-
-  return result;
+  // lastMatchInfo is defined in regexp-delay.js.
+  return %StringMatch(subject, regexp, lastMatchInfo);
  }



Modified: branches/bleeding_edge/test/mjsunit/regexp-indexof.js
==============================================================================
--- branches/bleeding_edge/test/mjsunit/regexp-indexof.js       (original)
+++ branches/bleeding_edge/test/mjsunit/regexp-indexof.js       Wed Mar 25  
05:14:10 2009
@@ -30,15 +30,25 @@
    var result = str.match(re);
    if (matches.length > 0) {
      assertEquals(matches.length, result.length);
-    for (idx in matches) {
+    var lastExpected;
+    var lastFrom;
+    var lastLength;
+    for (var idx = 0; idx < matches.length; idx++) {
        var from = matches[idx][0];
        var length = matches[idx][1];
        var expected = str.substr(from, length);
-      assertEquals(expected, result[idx]);
+      var name = str + "[" + from + ".." + (from+length) + "]";
+      assertEquals(expected, result[idx], name);
+      if (re.global || idx == 0) {
+        lastExpected = expected;
+        lastFrom = from;
+        lastLength = length;
+      }
      }
-    assertEquals(expected, RegExp.lastMatch);
-    assertEquals(str.substr(0, from), RegExp.leftContext);
-    assertEquals(str.substr(from + length), RegExp.rightContext);
+    assertEquals(lastExpected, RegExp.lastMatch, "lastMatch");
+    assertEquals(str.substr(0, lastFrom),  
RegExp.leftContext, "leftContext");
+    assertEquals(
+        str.substr(lastFrom + lastLength),  
RegExp.rightContext, "rightContext");
    } else {
      assertTrue(result === null);
    }
@@ -58,3 +68,10 @@
  assertEquals("o-o-oofo-ofo", "ofooofoooofofooofo".replace(/foo/g, "-"));
  assertEquals("deded", "deded".replace(/x/g, "-"));
  assertEquals("-a-b-c-d-e-f-", "abcdef".replace(new RegExp("", "g"), "-"));
+
+CheckMatch(/a(.)/, "xyzzyabxyzzzyacxyzzy", [[5, 2], [6, 1]]);
+CheckMatch(/a(.)/g, "xyzzyabxyzzyacxyzzy", [[5, 2], [12, 2]]);
+
+CheckMatch(/a|(?:)/g, "aba", [[0, 1], [1, 0], [2, 1], [3, 0]]);
+CheckMatch(/a|(?:)/g, "baba", [[0, 0], [1, 1], [2, 0], [3, 1], [4, 0]]);
+CheckMatch(/a|(?:)/g, "bab", [[0, 0], [1, 1], [2, 0], [3, 0]]);
\ No newline at end of file

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
v8-dev@googlegroups.com
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to