Author: l...@chromium.org Date: Wed Mar 25 05:14:10 2009 New Revision: 1608
Modified: branches/bleeding_edge/src/jsregexp.cc branches/bleeding_edge/src/jsregexp.h branches/bleeding_edge/src/platform-win32.cc branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc branches/bleeding_edge/src/runtime.cc branches/bleeding_edge/src/runtime.h branches/bleeding_edge/src/string.js branches/bleeding_edge/test/mjsunit/regexp-indexof.js Log: Moved String.prototype.match implementation to C++. Some extra runtime assertions added. Modified: branches/bleeding_edge/src/jsregexp.cc ============================================================================== --- branches/bleeding_edge/src/jsregexp.cc (original) +++ branches/bleeding_edge/src/jsregexp.cc Wed Mar 25 05:14:10 2009 @@ -205,25 +205,6 @@ } -Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, - Handle<String> subject, - Handle<JSArray> last_match_info) { - switch (regexp->TypeTag()) { - case JSRegExp::ATOM: - return AtomExecGlobal(regexp, subject, last_match_info); - case JSRegExp::IRREGEXP: { - Handle<Object> result = - IrregexpExecGlobal(regexp, subject, last_match_info); - ASSERT(!result.is_null() || Top::has_pending_exception()); - return result; - } - default: - UNREACHABLE(); - return Handle<Object>::null(); - } -} - - // RegExp Atom implementation: Simple string search using indexOf. @@ -273,55 +254,6 @@ } -Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, - Handle<String> subject, - Handle<JSArray> last_match_info) { - Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); - ASSERT(last_match_info->HasFastElements()); - Handle<JSArray> result = Factory::NewJSArray(1); - int index = 0; - int match_count = 0; - int subject_length = subject->length(); - int needle_length = needle->length(); - int last_value = -1; - while (true) { - HandleScope scope; - int value = -1; - if (index + needle_length <= subject_length) { - value = Runtime::StringMatch(subject, needle, index); - } - if (value == -1) { - if (last_value != -1) { - Handle<FixedArray> array(last_match_info->elements()); - SetAtomLastCapture(*array, - *subject, - last_value, - last_value + needle->length()); - } - break; - } - - int end = value + needle_length; - - // Create an array that looks like the static last_match_info array - // that is attached to the global RegExp object. We will be returning - // an array of these. - Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2); - SetLastCaptureCount(*array, 2); - // Ignore subject and input fields. - SetCapture(*array, 0, value); - SetCapture(*array, 1, end); - Handle<JSArray> pair = Factory::NewJSArrayWithElements(array); - SetElement(result, match_count, pair); - match_count++; - index = end; - if (needle_length == 0) index++; - last_value = value; - } - return result; -} - - // Irregexp implementation. @@ -331,8 +263,7 @@ // from the source pattern. // If compilation fails, an exception is thrown and this function // returns false. -bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, - bool is_ascii) { +bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) { int index; if (is_ascii) { index = JSRegExp::kIrregexpASCIICodeIndex; @@ -460,22 +391,20 @@ } -Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, +Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, Handle<String> subject, - int index, + int previous_index, Handle<JSArray> last_match_info) { - ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); + ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); // Prepare space for the return values. int number_of_capture_registers = - (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; + (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; OffsetsVector offsets(number_of_capture_registers); - int previous_index = index; - #ifdef DEBUG if (FLAG_trace_regexp_bytecodes) { - String* pattern = regexp->Pattern(); + String* pattern = jsregexp->Pattern(); PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); } @@ -487,104 +416,12 @@ last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); - return IrregexpExecOnce(regexp, - number_of_capture_registers, - last_match_info, - subject, - previous_index, - offsets.vector(), - offsets.length()); -} - - -Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, - Handle<String> subject, - Handle<JSArray> last_match_info) { - ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); - - // Prepare space for the return values. - int number_of_capture_registers = - (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; - OffsetsVector offsets(number_of_capture_registers); - - int previous_index = 0; - - Handle<JSArray> result = Factory::NewJSArray(0); - int result_length = 0; - Handle<Object> matches; - - if (!subject->IsFlat()) { - FlattenString(subject); - } - - last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); - - while (true) { - if (previous_index > subject->length() || previous_index < 0) { - // Per ECMA-262 15.10.6.2, if the previous index is greater than the - // string length, there is no match. - return result; - } else { -#ifdef DEBUG - if (FLAG_trace_regexp_bytecodes) { - String* pattern = regexp->Pattern(); - PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); - PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); - } -#endif - HandleScope scope; - matches = IrregexpExecOnce(regexp, - number_of_capture_registers, - last_match_info, - subject, - previous_index, - offsets.vector(), - offsets.length()); - - if (matches.is_null()) { - ASSERT(Top::has_pending_exception()); - return matches; - } - - if (matches->IsJSArray()) { - // Create an array that looks like the static last_match_info array - // that is attached to the global RegExp object. We will be returning - // an array of these. - int match_length = kFirstCapture + number_of_capture_registers; - Handle<JSArray> latest_match = - Factory::NewJSArray(match_length); - - AssertNoAllocation no_allocation; - FixedArray* match_array = JSArray::cast(*matches)->elements(); - match_array->CopyTo(0, - latest_match->elements(), - 0, - match_length); - SetElement(result, result_length, latest_match); - result_length++; - previous_index = GetCapture(match_array, 1); - if (GetCapture(match_array, 0) == previous_index) { - previous_index++; - } - } else { - ASSERT(matches->IsNull()); - return result; - } - } - } -} - - -Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> jsregexp, - int number_of_capture_registers, - Handle<JSArray> last_match_info, - Handle<String> subject, - int previous_index, - int* offsets_vector, - int offsets_vector_length) { - ASSERT(subject->IsFlat()); + int* offsets_vector = offsets.vector(); + int offsets_vector_length = offsets.length(); bool rc; + // Dispatch to the correct RegExp implementation. + Handle<String> original_subject = subject; Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data())); if (UseNativeRegexp()) { @@ -630,6 +467,8 @@ offsets_vector, previous_index); } + + // Handle results from RegExp implementation. if (!rc) { return Factory::null_value(); Modified: branches/bleeding_edge/src/jsregexp.h ============================================================================== --- branches/bleeding_edge/src/jsregexp.h (original) +++ branches/bleeding_edge/src/jsregexp.h Wed Mar 25 05:14:10 2009 @@ -94,20 +94,15 @@ int index, Handle<JSArray> lastMatchInfo); - static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp, - Handle<String> subject, - Handle<JSArray> lastMatchInfo); - // Execute an Irregexp bytecode pattern. + // On a successful match, the result is a JSArray containing + // captured positions. On a failure, the result is the null value. + // Returns an empty handle in case of an exception. static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp, Handle<String> subject, int index, Handle<JSArray> lastMatchInfo); - static Handle<Object> IrregexpExecGlobal(Handle<JSRegExp> regexp, - Handle<String> subject, - Handle<JSArray> lastMatchInfo); - // Offsets in the lastMatchInfo array. static const int kLastCaptureCount = 0; static const int kLastSubject = 1; @@ -154,17 +149,6 @@ static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii); - - // On a successful match, the result is a JSArray containing - // captured positions. On a failure, the result is the null value. - // Returns an empty handle in case of an exception. - static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> jsregexp, - int num_captures, - Handle<JSArray> lastMatchInfo, - Handle<String> subject16, - int previous_index, - int* ovector, - int ovector_length); // Set the subject cache. The previous string buffer is not deleted, so the // caller should ensure that it doesn't leak. Modified: branches/bleeding_edge/src/platform-win32.cc ============================================================================== --- branches/bleeding_edge/src/platform-win32.cc (original) +++ branches/bleeding_edge/src/platform-win32.cc Wed Mar 25 05:14:10 2009 @@ -314,7 +314,7 @@ // Initialize timestamp from a JavaScript timestamp. Time::Time(double jstime) { - t() = static_cast<uint64_t>(jstime) * kTimeScaler + kTimeEpoc; + t() = static_cast<int64_t>(jstime) * kTimeScaler + kTimeEpoc; } Modified: branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc ============================================================================== --- branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc (original) +++ branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc Wed Mar 25 05:14:10 2009 @@ -962,6 +962,8 @@ int previous_index) { ASSERT(subject->IsFlat()); + ASSERT(previous_index >= 0); + ASSERT(previous_index <= subject->length()); // No allocations before calling the regexp, but we can't use // AssertNoAllocation, since regexps might be preempted, and another thread Modified: branches/bleeding_edge/src/runtime.cc ============================================================================== --- branches/bleeding_edge/src/runtime.cc (original) +++ branches/bleeding_edge/src/runtime.cc Wed Mar 25 05:14:10 2009 @@ -1038,7 +1038,9 @@ CONVERT_CHECKED(Smi, index, args[2]); CONVERT_CHECKED(JSArray, raw_last_match_info, args[3]); Handle<JSArray> last_match_info(raw_last_match_info); - CHECK(last_match_info->HasFastElements()); + RUNTIME_ASSERT(last_match_info->HasFastElements()); + RUNTIME_ASSERT(index->value() >= 0); + RUNTIME_ASSERT(index->value() <= subject->length()); Handle<Object> result = RegExpImpl::Exec(regexp, subject, index->value(), @@ -1048,23 +1050,6 @@ } -static Object* Runtime_RegExpExecGlobal(Arguments args) { - HandleScope scope; - ASSERT(args.length() == 3); - CONVERT_CHECKED(JSRegExp, raw_regexp, args[0]); - Handle<JSRegExp> regexp(raw_regexp); - CONVERT_CHECKED(String, raw_subject, args[1]); - Handle<String> subject(raw_subject); - CONVERT_CHECKED(JSArray, raw_last_match_info, args[2]); - Handle<JSArray> last_match_info(raw_last_match_info); - CHECK(last_match_info->HasFastElements()); - Handle<Object> result = - RegExpImpl::ExecGlobal(regexp, subject, last_match_info); - if (result.is_null()) return Failure::Exception(); - return *result; -} - - static Object* Runtime_MaterializeRegExpLiteral(Arguments args) { HandleScope scope; ASSERT(args.length() == 4); @@ -2334,6 +2319,57 @@ RUNTIME_ASSERT(start >= 0); RUNTIME_ASSERT(end <= value->length()); return value->Slice(start, end); +} + + +static Object* Runtime_StringMatch(Arguments args) { + ASSERT_EQ(3, args.length()); + + CONVERT_ARG_CHECKED(String, subject, 0); + CONVERT_ARG_CHECKED(JSRegExp, regexp, 1); + CONVERT_ARG_CHECKED(JSArray, regexp_info, 2); + HandleScope handles; + + Handle<Object> match = RegExpImpl::Exec(regexp, subject, 0, regexp_info); + + if (match.is_null()) { + return Failure::Exception(); + } + if (match->IsNull()) { + return Heap::null_value(); + } + int length = subject->length(); + + ZoneScope zone_space(DELETE_ON_EXIT); + ZoneList<int> offsets(8); + do { + int start; + int end; + { + AssertNoAllocation no_alloc; + FixedArray* elements = regexp_info->elements(); + start = Smi::cast(elements->get(RegExpImpl::kFirstCapture))->value(); + end = Smi::cast(elements->get(RegExpImpl::kFirstCapture + 1))->value(); + } + offsets.Add(start); + offsets.Add(end); + int index = start < end ? end : end + 1; + if (index > length) break; + match = RegExpImpl::Exec(regexp, subject, index, regexp_info); + if (match.is_null()) { + return Failure::Exception(); + } + } while (!match->IsNull()); + int matches = offsets.length() / 2; + Handle<FixedArray> elements = Factory::NewFixedArray(matches); + for (int i = 0; i < matches ; i++) { + int from = offsets.at(i * 2); + int to = offsets.at(i * 2 + 1); + elements->set(i, *Factory::NewStringSlice(subject, from, to)); + } + Handle<JSArray> result = Factory::NewJSArrayWithElements(elements); + result->set_length(Smi::FromInt(matches)); + return *result; } Modified: branches/bleeding_edge/src/runtime.h ============================================================================== --- branches/bleeding_edge/src/runtime.h (original) +++ branches/bleeding_edge/src/runtime.h Wed Mar 25 05:14:10 2009 @@ -138,7 +138,6 @@ /* Regular expressions */ \ F(RegExpCompile, 3) \ F(RegExpExec, 4) \ - F(RegExpExecGlobal, 3) \ \ /* Strings */ \ F(StringCharCodeAt, 2) \ @@ -147,6 +146,7 @@ F(StringLocaleCompare, 2) \ F(StringSlice, 3) \ F(StringReplaceRegExpWithString, 4) \ + F(StringMatch, 3) \ \ /* Numbers */ \ F(NumberToRadixString, 2) \ Modified: branches/bleeding_edge/src/string.js ============================================================================== --- branches/bleeding_edge/src/string.js (original) +++ branches/bleeding_edge/src/string.js Wed Mar 25 05:14:10 2009 @@ -157,21 +157,8 @@ if (!regexp.global) return regexp.exec(subject); %_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]); - var matches = DoRegExpExecGlobal(regexp, subject); - - // If the regexp did not match, return null. - if (matches.length == 0) return null; - - // Build the result array. - var result = new $Array(match_string); - for (var i = 0; i < matches.length; ++i) { - var matchInfo = matches[i]; - var match_string = subject.slice(matchInfo[CAPTURE0], - matchInfo[CAPTURE1]); - result[i] = match_string; - } - - return result; + // lastMatchInfo is defined in regexp-delay.js. + return %StringMatch(subject, regexp, lastMatchInfo); } Modified: branches/bleeding_edge/test/mjsunit/regexp-indexof.js ============================================================================== --- branches/bleeding_edge/test/mjsunit/regexp-indexof.js (original) +++ branches/bleeding_edge/test/mjsunit/regexp-indexof.js Wed Mar 25 05:14:10 2009 @@ -30,15 +30,25 @@ var result = str.match(re); if (matches.length > 0) { assertEquals(matches.length, result.length); - for (idx in matches) { + var lastExpected; + var lastFrom; + var lastLength; + for (var idx = 0; idx < matches.length; idx++) { var from = matches[idx][0]; var length = matches[idx][1]; var expected = str.substr(from, length); - assertEquals(expected, result[idx]); + var name = str + "[" + from + ".." + (from+length) + "]"; + assertEquals(expected, result[idx], name); + if (re.global || idx == 0) { + lastExpected = expected; + lastFrom = from; + lastLength = length; + } } - assertEquals(expected, RegExp.lastMatch); - assertEquals(str.substr(0, from), RegExp.leftContext); - assertEquals(str.substr(from + length), RegExp.rightContext); + assertEquals(lastExpected, RegExp.lastMatch, "lastMatch"); + assertEquals(str.substr(0, lastFrom), RegExp.leftContext, "leftContext"); + assertEquals( + str.substr(lastFrom + lastLength), RegExp.rightContext, "rightContext"); } else { assertTrue(result === null); } @@ -58,3 +68,10 @@ assertEquals("o-o-oofo-ofo", "ofooofoooofofooofo".replace(/foo/g, "-")); assertEquals("deded", "deded".replace(/x/g, "-")); assertEquals("-a-b-c-d-e-f-", "abcdef".replace(new RegExp("", "g"), "-")); + +CheckMatch(/a(.)/, "xyzzyabxyzzzyacxyzzy", [[5, 2], [6, 1]]); +CheckMatch(/a(.)/g, "xyzzyabxyzzyacxyzzy", [[5, 2], [12, 2]]); + +CheckMatch(/a|(?:)/g, "aba", [[0, 1], [1, 0], [2, 1], [3, 0]]); +CheckMatch(/a|(?:)/g, "baba", [[0, 0], [1, 1], [2, 0], [3, 1], [4, 0]]); +CheckMatch(/a|(?:)/g, "bab", [[0, 0], [1, 1], [2, 0], [3, 0]]); \ No newline at end of file --~--~---------~--~----~------------~-------~--~----~ v8-dev mailing list v8-dev@googlegroups.com http://groups.google.com/group/v8-dev -~----------~----~----~----~------~----~------~--~---