Revision: 4325
Author: [email protected]
Date: Tue Mar 30 07:14:28 2010
Log: Reapply svn r4269 plus fixes for issues 665 and 667.
Reapply svn r4269, the C++ implementation of String.prototype.replace.
Cherry pick fixes for V8 issues 665 (svn r4312) and 667 (svn r4322)
from bleeding_edge.
Review URL: http://codereview.chromium.org/1574003
http://code.google.com/p/v8/source/detail?r=4325
Modified:
/branches/2.1/src/regexp.js
/branches/2.1/src/runtime.cc
/branches/2.1/src/runtime.h
/branches/2.1/src/string.js
/branches/2.1/src/version.cc
/branches/2.1/test/mjsunit/string-replace.js
=======================================
--- /branches/2.1/src/regexp.js Tue Mar 30 01:57:48 2010
+++ /branches/2.1/src/regexp.js Tue Mar 30 07:14:28 2010
@@ -344,6 +344,7 @@
// on the captures array of the last successful match and the subject
string
// of the last successful match.
function RegExpGetLastMatch() {
+ if (lastMatchInfoOverride) { return lastMatchInfoOverride[0]; }
var regExpSubject = LAST_SUBJECT(lastMatchInfo);
return SubString(regExpSubject,
lastMatchInfo[CAPTURE0],
@@ -352,6 +353,11 @@
function RegExpGetLastParen() {
+ if (lastMatchInfoOverride) {
+ var override = lastMatchInfoOverride;
+ if (override.length <= 3) return '';
+ return override[override.length - 3];
+ }
var length = NUMBER_OF_CAPTURES(lastMatchInfo);
if (length <= 2) return ''; // There were no captures.
// We match the SpiderMonkey behavior: return the substring defined by
the
@@ -368,17 +374,32 @@
function RegExpGetLeftContext() {
- return SubString(LAST_SUBJECT(lastMatchInfo),
- 0,
- lastMatchInfo[CAPTURE0]);
+ var start_index;
+ var subject;
+ if (!lastMatchInfoOverride) {
+ start_index = lastMatchInfo[CAPTURE0];
+ subject = LAST_SUBJECT(lastMatchInfo);
+ } else {
+ var override = lastMatchInfoOverride;
+ start_index = override[override.length - 2];
+ subject = override[override.length - 1];
+ }
+ return SubString(subject, 0, start_index);
}
function RegExpGetRightContext() {
- var subject = LAST_SUBJECT(lastMatchInfo);
- return SubString(subject,
- lastMatchInfo[CAPTURE1],
- subject.length);
+ var start_index;
+ var subject;
+ if (!lastMatchInfoOverride) {
+ start_index = lastMatchInfo[CAPTURE1];
+ subject = LAST_SUBJECT(lastMatchInfo);
+ } else {
+ var override = lastMatchInfoOverride;
+ subject = override[override.length - 1];
+ start_index = override[override.length - 2] + subject.length;
+ }
+ return SubString(subject, start_index, subject.length);
}
@@ -387,6 +408,10 @@
// called with indices from 1 to 9.
function RegExpMakeCaptureGetter(n) {
return function() {
+ if (lastMatchInfoOverride) {
+ if (n < lastMatchInfoOverride.length - 2) return
lastMatchInfoOverride[n];
+ return '';
+ }
var index = n * 2;
if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
var matchStart = lastMatchInfo[CAPTURE(index)];
@@ -411,6 +436,12 @@
0, // REGEXP_FIRST_CAPTURE + 1
];
+// Override last match info with an array of actual substrings.
+// Used internally by replace regexp with function.
+// The array has the format of an "apply" argument for a replacement
+// function.
+var lastMatchInfoOverride = null;
+
// -------------------------------------------------------------------
function SetupRegExp() {
=======================================
--- /branches/2.1/src/runtime.cc Tue Mar 30 01:57:48 2010
+++ /branches/2.1/src/runtime.cc Tue Mar 30 07:14:28 2010
@@ -1566,10 +1566,92 @@
ASSERT(args.length() == 1);
return CharFromCode(args[0]);
}
+
+
+class FixedArrayBuilder {
+ public:
+ explicit FixedArrayBuilder(int initial_capacity)
+ : array_(Factory::NewFixedArrayWithHoles(initial_capacity)),
+ length_(0) {
+ // Require a non-zero initial size. Ensures that doubling the size to
+ // extend the array will work.
+ ASSERT(initial_capacity > 0);
+ }
+
+ explicit FixedArrayBuilder(Handle<FixedArray> backing_store)
+ : array_(backing_store),
+ length_(0) {
+ // Require a non-zero initial size. Ensures that doubling the size to
+ // extend the array will work.
+ ASSERT(backing_store->length() > 0);
+ }
+
+ bool HasCapacity(int elements) {
+ int length = array_->length();
+ int required_length = length_ + elements;
+ return (length >= required_length);
+ }
+
+ void EnsureCapacity(int elements) {
+ int length = array_->length();
+ int required_length = length_ + elements;
+ if (length < required_length) {
+ int new_length = length;
+ do {
+ new_length *= 2;
+ } while (new_length < required_length);
+ Handle<FixedArray> extended_array =
+ Factory::NewFixedArrayWithHoles(new_length);
+ array_->CopyTo(0, *extended_array, 0, length_);
+ array_ = extended_array;
+ }
+ }
+
+ void Add(Object* value) {
+ ASSERT(length_ < capacity());
+ array_->set(length_, value);
+ length_++;
+ }
+
+ void Add(Smi* value) {
+ ASSERT(length_ < capacity());
+ array_->set(length_, value);
+ length_++;
+ }
+
+ Handle<FixedArray> array() {
+ return array_;
+ }
+
+ int length() {
+ return length_;
+ }
+
+ int capacity() {
+ return array_->length();
+ }
+
+ Handle<JSArray> ToJSArray() {
+ Handle<JSArray> result_array = Factory::NewJSArrayWithElements(array_);
+ result_array->set_length(Smi::FromInt(length_));
+ return result_array;
+ }
+
+ Handle<JSArray> ToJSArray(Handle<JSArray> target_array) {
+ target_array->set_elements(*array_);
+ target_array->set_length(Smi::FromInt(length_));
+ return target_array;
+ }
+
+ private:
+ Handle<FixedArray> array_;
+ int length_;
+};
+
// Forward declarations.
-static const int kStringBuilderConcatHelperLengthBits = 11;
-static const int kStringBuilderConcatHelperPositionBits = 19;
+const int kStringBuilderConcatHelperLengthBits = 11;
+const int kStringBuilderConcatHelperPositionBits = 19;
template <typename schar>
static inline void StringBuilderConcatHelper(String*,
@@ -1577,15 +1659,19 @@
FixedArray*,
int);
-typedef BitField<int, 0, 11> StringBuilderSubstringLength;
-typedef BitField<int, 11, 19> StringBuilderSubstringPosition;
+typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits>
+ StringBuilderSubstringLength;
+typedef BitField<int,
+ kStringBuilderConcatHelperLengthBits,
+ kStringBuilderConcatHelperPositionBits>
+ StringBuilderSubstringPosition;
+
class ReplacementStringBuilder {
public:
ReplacementStringBuilder(Handle<String> subject, int
estimated_part_count)
- : subject_(subject),
- parts_(Factory::NewFixedArray(estimated_part_count)),
- part_count_(0),
+ : array_builder_(estimated_part_count),
+ subject_(subject),
character_count_(0),
is_ascii_(subject->IsAsciiRepresentation()) {
// Require a non-zero initial size. Ensures that doubling the size to
@@ -1593,38 +1679,35 @@
ASSERT(estimated_part_count > 0);
}
- void EnsureCapacity(int elements) {
- int length = parts_->length();
- int required_length = part_count_ + elements;
- if (length < required_length) {
- int new_length = length;
- do {
- new_length *= 2;
- } while (new_length < required_length);
- Handle<FixedArray> extended_array =
- Factory::NewFixedArray(new_length);
- parts_->CopyTo(0, *extended_array, 0, part_count_);
- parts_ = extended_array;
- }
- }
-
- void AddSubjectSlice(int from, int to) {
+ static inline void AddSubjectSlice(FixedArrayBuilder* builder,
+ int from,
+ int to) {
ASSERT(from >= 0);
int length = to - from;
ASSERT(length > 0);
- // Can we encode the slice in 11 bits for length and 19 bits for
- // start position - as used by StringBuilderConcatHelper?
if (StringBuilderSubstringLength::is_valid(length) &&
StringBuilderSubstringPosition::is_valid(from)) {
int encoded_slice = StringBuilderSubstringLength::encode(length) |
StringBuilderSubstringPosition::encode(from);
- AddElement(Smi::FromInt(encoded_slice));
+ builder->Add(Smi::FromInt(encoded_slice));
} else {
// Otherwise encode as two smis.
- AddElement(Smi::FromInt(-length));
- AddElement(Smi::FromInt(from));
- }
- IncrementCharacterCount(length);
+ builder->Add(Smi::FromInt(-length));
+ builder->Add(Smi::FromInt(from));
+ }
+ }
+
+
+ void EnsureCapacity(int elements) {
+ array_builder_.EnsureCapacity(elements);
+ }
+
+
+ void AddSubjectSlice(int from, int to) {
+ AddSubjectSlice(&array_builder_, from, to);
+ // Can we encode the slice in 11 bits for length and 19 bits for
+ // start position - as used by StringBuilderConcatHelper?
+ IncrementCharacterCount(to - from);
}
@@ -1640,7 +1723,7 @@
Handle<String> ToString() {
- if (part_count_ == 0) {
+ if (array_builder_.length() == 0) {
return Factory::empty_string();
}
@@ -1652,8 +1735,8 @@
char* char_buffer = seq->GetChars();
StringBuilderConcatHelper(*subject_,
char_buffer,
- *parts_,
- part_count_);
+ *array_builder_.array(),
+ array_builder_.length());
} else {
// Non-ASCII.
joined_string = NewRawTwoByteString(character_count_);
@@ -1662,8 +1745,8 @@
uc16* char_buffer = seq->GetChars();
StringBuilderConcatHelper(*subject_,
char_buffer,
- *parts_,
- part_count_);
+ *array_builder_.array(),
+ array_builder_.length());
}
return joined_string;
}
@@ -1676,8 +1759,14 @@
character_count_ += by;
}
- private:
-
+ Handle<JSArray> GetParts() {
+ Handle<JSArray> result =
+ Factory::NewJSArrayWithElements(array_builder_.array());
+ result->set_length(Smi::FromInt(array_builder_.length()));
+ return result;
+ }
+
+ private:
Handle<String> NewRawAsciiString(int size) {
CALL_HEAP_FUNCTION(Heap::AllocateRawAsciiString(size), String);
}
@@ -1690,14 +1779,12 @@
void AddElement(Object* element) {
ASSERT(element->IsSmi() || element->IsString());
- ASSERT(parts_->length() > part_count_);
- parts_->set(part_count_, element);
- part_count_++;
+ ASSERT(array_builder_.capacity() > array_builder_.length());
+ array_builder_.Add(element);
}
+ FixedArrayBuilder array_builder_;
Handle<String> subject_;
- Handle<FixedArray> parts_;
- int part_count_;
int character_count_;
bool is_ascii_;
};
@@ -2103,7 +2190,6 @@
replacement,
last_match_info);
}
-
// Cap on the maximal shift in the Boyer-Moore implementation. By setting a
@@ -2867,6 +2953,474 @@
result->set_length(Smi::FromInt(matches));
return *result;
}
+
+
+// Two smis before and after the match, for very long strings.
+const int kMaxBuilderEntriesPerRegExpMatch = 5;
+
+
+static void SetLastMatchInfoNoCaptures(Handle<String> subject,
+ Handle<JSArray> last_match_info,
+ int match_start,
+ int match_end) {
+ // Fill last_match_info with a single capture.
+ last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead);
+ AssertNoAllocation no_gc;
+ FixedArray* elements = FixedArray::cast(last_match_info->elements());
+ RegExpImpl::SetLastCaptureCount(elements, 2);
+ RegExpImpl::SetLastInput(elements, *subject);
+ RegExpImpl::SetLastSubject(elements, *subject);
+ RegExpImpl::SetCapture(elements, 0, match_start);
+ RegExpImpl::SetCapture(elements, 1, match_end);
+}
+
+
+template <typename schar>
+static bool SearchCharMultiple(Vector<schar> subject,
+ String* pattern,
+ schar pattern_char,
+ FixedArrayBuilder* builder,
+ int* match_pos) {
+ // Position of last match.
+ int pos = *match_pos;
+ int subject_length = subject.length();
+ while (pos < subject_length) {
+ int match_end = pos + 1;
+ if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
+ *match_pos = pos;
+ return false;
+ }
+ int new_pos = SingleCharIndexOf(subject, pattern_char, match_end);
+ if (new_pos >= 0) {
+ // Match has been found.
+ if (new_pos > match_end) {
+ ReplacementStringBuilder::AddSubjectSlice(builder, match_end,
new_pos);
+ }
+ pos = new_pos;
+ builder->Add(pattern);
+ } else {
+ break;
+ }
+ }
+ if (pos + 1 < subject_length) {
+ ReplacementStringBuilder::AddSubjectSlice(builder, pos + 1,
subject_length);
+ }
+ *match_pos = pos;
+ return true;
+}
+
+
+static bool SearchCharMultiple(Handle<String> subject,
+ Handle<String> pattern,
+ Handle<JSArray> last_match_info,
+ FixedArrayBuilder* builder) {
+ ASSERT(subject->IsFlat());
+ ASSERT_EQ(1, pattern->length());
+ uc16 pattern_char = pattern->Get(0);
+ // Treating position before first as initial "previous match position".
+ int match_pos = -1;
+
+ for (;;) { // Break when search complete.
+ builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+ AssertNoAllocation no_gc;
+ if (subject->IsAsciiRepresentation()) {
+ if (pattern_char > String::kMaxAsciiCharCode) {
+ break;
+ }
+ Vector<const char> subject_vector = subject->ToAsciiVector();
+ char pattern_ascii_char = static_cast<char>(pattern_char);
+ bool complete = SearchCharMultiple<const char>(subject_vector,
+ *pattern,
+ pattern_ascii_char,
+ builder,
+ &match_pos);
+ if (complete) break;
+ } else {
+ Vector<const uc16> subject_vector = subject->ToUC16Vector();
+ bool complete = SearchCharMultiple<const uc16>(subject_vector,
+ *pattern,
+ pattern_char,
+ builder,
+ &match_pos);
+ if (complete) break;
+ }
+ }
+
+ if (match_pos >= 0) {
+ SetLastMatchInfoNoCaptures(subject,
+ last_match_info,
+ match_pos,
+ match_pos + 1);
+ return true;
+ }
+ return false; // No matches at all.
+}
+
+
+template <typename schar, typename pchar>
+static bool SearchStringMultiple(Vector<schar> subject,
+ String* pattern,
+ Vector<pchar> pattern_string,
+ FixedArrayBuilder* builder,
+ int* match_pos) {
+ int pos = *match_pos;
+ int subject_length = subject.length();
+ int pattern_length = pattern_string.length();
+ int max_search_start = subject_length - pattern_length;
+ bool is_ascii = (sizeof(schar) == 1);
+ StringSearchStrategy strategy =
+ InitializeStringSearch(pattern_string, is_ascii);
+ switch (strategy) {
+ case SEARCH_FAIL: return false;
+ case SEARCH_SHORT:
+ while (pos <= max_search_start) {
+ if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
+ *match_pos = pos;
+ return false;
+ }
+ // Position of end of previous match.
+ int match_end = pos + pattern_length;
+ int new_pos = SimpleIndexOf(subject, pattern_string, match_end);
+ if (new_pos >= 0) {
+ // A match.
+ if (new_pos > match_end) {
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ match_end,
+ new_pos);
+ }
+ pos = new_pos;
+ builder->Add(pattern);
+ } else {
+ break;
+ }
+ }
+ break;
+ case SEARCH_LONG:
+ while (pos <= max_search_start) {
+ if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
+ *match_pos = pos;
+ return false;
+ }
+ int new_pos = ComplexIndexOf(subject,
+ pattern_string,
+ pos + pattern_length);
+ if (new_pos >= 0) {
+ // A match has been found.
+ if (new_pos > pos) {
+ ReplacementStringBuilder::AddSubjectSlice(builder, pos,
new_pos);
+ }
+ pos = new_pos;
+ builder->Add(pattern);
+ } else {
+ break;
+ }
+ }
+ break;
+ }
+ if (pos < max_search_start) {
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ pos + pattern_length,
+ subject_length);
+ }
+ *match_pos = pos;
+ return true;
+}
+
+
+static bool SearchStringMultiple(Handle<String> subject,
+ Handle<String> pattern,
+ Handle<JSArray> last_match_info,
+ FixedArrayBuilder* builder) {
+ ASSERT(subject->IsFlat());
+ ASSERT(pattern->IsFlat());
+ ASSERT(pattern->length() > 1);
+
+ // Treating as if a previous match was before first character.
+ int match_pos = -pattern->length();
+
+ for (;;) { // Break when search complete.
+ builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+ AssertNoAllocation no_gc;
+ if (subject->IsAsciiRepresentation()) {
+ Vector<const char> subject_vector = subject->ToAsciiVector();
+ if (pattern->IsAsciiRepresentation()) {
+ if (SearchStringMultiple(subject_vector,
+ *pattern,
+ pattern->ToAsciiVector(),
+ builder,
+ &match_pos)) break;
+ } else {
+ if (SearchStringMultiple(subject_vector,
+ *pattern,
+ pattern->ToUC16Vector(),
+ builder,
+ &match_pos)) break;
+ }
+ } else {
+ Vector<const uc16> subject_vector = subject->ToUC16Vector();
+ if (pattern->IsAsciiRepresentation()) {
+ if (SearchStringMultiple(subject_vector,
+ *pattern,
+ pattern->ToAsciiVector(),
+ builder,
+ &match_pos)) break;
+ } else {
+ if (SearchStringMultiple(subject_vector,
+ *pattern,
+ pattern->ToUC16Vector(),
+ builder,
+ &match_pos)) break;
+ }
+ }
+ }
+
+ if (match_pos >= 0) {
+ SetLastMatchInfoNoCaptures(subject,
+ last_match_info,
+ match_pos,
+ match_pos + pattern->length());
+ return true;
+ }
+ return false; // No matches at all.
+}
+
+
+static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple(
+ Handle<String> subject,
+ Handle<JSRegExp> regexp,
+ Handle<JSArray> last_match_array,
+ FixedArrayBuilder* builder) {
+ ASSERT(subject->IsFlat());
+ int match_start = -1;
+ int match_end = 0;
+ int pos = 0;
+ int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
+ if (required_registers < 0) return RegExpImpl::RE_EXCEPTION;
+
+ OffsetsVector registers(required_registers);
+ Vector<int> register_vector(registers.vector(), registers.length());
+ int subject_length = subject->length();
+
+ for (;;) { // Break on failure, return on exception.
+ RegExpImpl::IrregexpResult result =
+ RegExpImpl::IrregexpExecOnce(regexp,
+ subject,
+ pos,
+ register_vector);
+ if (result == RegExpImpl::RE_SUCCESS) {
+ match_start = register_vector[0];
+ builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+ if (match_end < match_start) {
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ match_end,
+ match_start);
+ }
+ match_end = register_vector[1];
+ HandleScope loop_scope;
+ builder->Add(*Factory::NewSubString(subject, match_start,
match_end));
+ if (match_start != match_end) {
+ pos = match_end;
+ } else {
+ pos = match_end + 1;
+ if (pos > subject_length) break;
+ }
+ } else if (result == RegExpImpl::RE_FAILURE) {
+ break;
+ } else {
+ ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
+ return result;
+ }
+ }
+
+ if (match_start >= 0) {
+ if (match_end < subject_length) {
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ match_end,
+ subject_length);
+ }
+ SetLastMatchInfoNoCaptures(subject,
+ last_match_array,
+ match_start,
+ match_end);
+ return RegExpImpl::RE_SUCCESS;
+ } else {
+ return RegExpImpl::RE_FAILURE; // No matches at all.
+ }
+}
+
+
+static RegExpImpl::IrregexpResult SearchRegExpMultiple(
+ Handle<String> subject,
+ Handle<JSRegExp> regexp,
+ Handle<JSArray> last_match_array,
+ FixedArrayBuilder* builder) {
+
+ ASSERT(subject->IsFlat());
+ int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
+ if (required_registers < 0) return RegExpImpl::RE_EXCEPTION;
+
+ OffsetsVector registers(required_registers);
+ Vector<int> register_vector(registers.vector(), registers.length());
+
+ RegExpImpl::IrregexpResult result =
+ RegExpImpl::IrregexpExecOnce(regexp,
+ subject,
+ 0,
+ register_vector);
+
+ int capture_count = regexp->CaptureCount();
+ int subject_length = subject->length();
+
+ // Position to search from.
+ int pos = 0;
+ // End of previous match. Differs from pos if match was empty.
+ int match_end = 0;
+ if (result == RegExpImpl::RE_SUCCESS) {
+ // Need to keep a copy of the previous match for creating
last_match_info
+ // at the end, so we have two vectors that we swap between.
+ OffsetsVector registers2(required_registers);
+ Vector<int> prev_register_vector(registers2.vector(),
registers2.length());
+
+ do {
+ int match_start = register_vector[0];
+ builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+ if (match_end < match_start) {
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ match_end,
+ match_start);
+ }
+ match_end = register_vector[1];
+
+ {
+ // Avoid accumulating new handles inside loop.
+ HandleScope temp_scope;
+ // Arguments array to replace function is match, captures, index
and
+ // subject, i.e., 3 + capture count in total.
+ Handle<FixedArray> elements = Factory::NewFixedArray(3 +
capture_count);
+ elements->set(0, *Factory::NewSubString(subject,
+ match_start,
+ match_end));
+ for (int i = 1; i <= capture_count; i++) {
+ int start = register_vector[i * 2];
+ if (start >= 0) {
+ int end = register_vector[i * 2 + 1];
+ ASSERT(start <= end);
+ Handle<String> substring =
+ Factory::NewSubString(subject, start, end);
+ elements->set(i, *substring);
+ } else {
+ ASSERT(register_vector[i * 2 + 1] < 0);
+ elements->set(i, Heap::undefined_value());
+ }
+ }
+ elements->set(capture_count + 1, Smi::FromInt(match_start));
+ elements->set(capture_count + 2, *subject);
+ builder->Add(*Factory::NewJSArrayWithElements(elements));
+ }
+ // Swap register vectors, so the last successful match is in
+ // prev_register_vector.
+ Vector<int> tmp = prev_register_vector;
+ prev_register_vector = register_vector;
+ register_vector = tmp;
+
+ if (match_end > match_start) {
+ pos = match_end;
+ } else {
+ pos = match_end + 1;
+ if (pos > subject_length) {
+ break;
+ }
+ }
+
+ result = RegExpImpl::IrregexpExecOnce(regexp,
+ subject,
+ pos,
+ register_vector);
+ } while (result == RegExpImpl::RE_SUCCESS);
+
+ if (result != RegExpImpl::RE_EXCEPTION) {
+ // Finished matching, with at least one match.
+ if (match_end < subject_length) {
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ match_end,
+ subject_length);
+ }
+
+ int last_match_capture_count = (capture_count + 1) * 2;
+ int last_match_array_size =
+ last_match_capture_count + RegExpImpl::kLastMatchOverhead;
+ last_match_array->EnsureSize(last_match_array_size);
+ AssertNoAllocation no_gc;
+ FixedArray* elements =
FixedArray::cast(last_match_array->elements());
+ RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count);
+ RegExpImpl::SetLastSubject(elements, *subject);
+ RegExpImpl::SetLastInput(elements, *subject);
+ for (int i = 0; i < last_match_capture_count; i++) {
+ RegExpImpl::SetCapture(elements, i, prev_register_vector[i]);
+ }
+ return RegExpImpl::RE_SUCCESS;
+ }
+ }
+ // No matches at all, return failure or exception result directly.
+ return result;
+}
+
+
+static Object* Runtime_RegExpExecMultiple(Arguments args) {
+ ASSERT(args.length() == 4);
+ HandleScope handles;
+
+ CONVERT_ARG_CHECKED(String, subject, 1);
+ if (!subject->IsFlat()) { FlattenString(subject); }
+ CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
+ CONVERT_ARG_CHECKED(JSArray, last_match_info, 2);
+ CONVERT_ARG_CHECKED(JSArray, result_array, 3);
+
+ ASSERT(last_match_info->HasFastElements());
+ ASSERT(regexp->GetFlags().is_global());
+ Handle<FixedArray> result_elements;
+ if (result_array->HasFastElements()) {
+ result_elements =
+ Handle<FixedArray>(FixedArray::cast(result_array->elements()));
+ } else {
+ result_elements = Factory::NewFixedArrayWithHoles(16);
+ }
+ FixedArrayBuilder builder(result_elements);
+
+ if (regexp->TypeTag() == JSRegExp::ATOM) {
+ Handle<String> pattern(
+ String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)));
+ int pattern_length = pattern->length();
+ if (pattern_length == 1) {
+ if (SearchCharMultiple(subject, pattern, last_match_info, &builder))
{
+ return *builder.ToJSArray(result_array);
+ }
+ return Heap::null_value();
+ }
+
+ if (!pattern->IsFlat()) FlattenString(pattern);
+ if (SearchStringMultiple(subject, pattern, last_match_info, &builder))
{
+ return *builder.ToJSArray(result_array);
+ }
+ return Heap::null_value();
+ }
+
+ ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+
+ RegExpImpl::IrregexpResult result;
+ if (regexp->CaptureCount() == 0) {
+ result = SearchRegExpNoCaptureMultiple(subject,
+ regexp,
+ last_match_info,
+ &builder);
+ } else {
+ result = SearchRegExpMultiple(subject, regexp, last_match_info,
&builder);
+ }
+ if (result == RegExpImpl::RE_SUCCESS) return
*builder.ToJSArray(result_array);
+ if (result == RegExpImpl::RE_FAILURE) return Heap::null_value();
+ ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
+ return Failure::Exception();
+}
static Object* Runtime_NumberToRadixString(Arguments args) {
=======================================
--- /branches/2.1/src/runtime.h Tue Mar 30 01:57:48 2010
+++ /branches/2.1/src/runtime.h Tue Mar 30 07:14:28 2010
@@ -153,6 +153,7 @@
/* Regular expressions */ \
F(RegExpCompile, 3, 1) \
F(RegExpExec, 4, 1) \
+ F(RegExpExecMultiple, 4, 1) \
\
/* Strings */ \
F(StringCharCodeAt, 2, 1) \
=======================================
--- /branches/2.1/src/string.js Tue Mar 30 01:57:48 2010
+++ /branches/2.1/src/string.js Tue Mar 30 07:14:28 2010
@@ -405,97 +405,95 @@
builder.addSpecialSlice(start, end);
};
+// TODO(lrn): This array will survive indefinitely if replace is never
+// called again. However, it will be empty, since the contents are cleared
+// in the finally block.
+var reusableReplaceArray = $Array(16);
// Helper function for replacing regular expressions with the result of a
-// function application in String.prototype.replace. The function
application
-// must be interleaved with the regexp matching (contrary to ECMA-262
-// 15.5.4.11) to mimic SpiderMonkey and KJS behavior when the function uses
-// the static properties of the RegExp constructor. Example:
-// 'abcd'.replace(/(.)/g, function() { return RegExp.$1; }
-// should be 'abcd' and not 'dddd' (or anything else).
+// function application in String.prototype.replace.
function StringReplaceRegExpWithFunction(subject, regexp, replace) {
- var matchInfo = DoRegExpExec(regexp, subject, 0);
- if (IS_NULL(matchInfo)) return subject;
-
- var result = new ReplaceResultBuilder(subject);
- // There's at least one match. If the regexp is global, we have to loop
- // over all matches. The loop is not in C++ code here like the one in
- // RegExp.prototype.exec, because of the interleaved function
application.
- // Unfortunately, that means this code is nearly duplicated, here and in
- // jsregexp.cc.
if (regexp.global) {
- var previous = 0;
- var startOfMatch;
- if (NUMBER_OF_CAPTURES(matchInfo) == 2) {
- // Both branches contain essentially the same loop except for the
call
- // to the replace function. The branch is put outside of the loop for
- // speed
- do {
- startOfMatch = matchInfo[CAPTURE0];
- result.addSpecialSlice(previous, startOfMatch);
- previous = matchInfo[CAPTURE1];
- var match = SubString(subject, startOfMatch, previous);
- // Don't call directly to avoid exposing the built-in global
object.
- result.add(replace.call(null, match, startOfMatch, subject));
- // Can't use matchInfo any more from here, since the function could
- // overwrite it.
- // Continue with the next match.
- // Increment previous if we matched an empty string, as per
ECMA-262
- // 15.5.4.10.
- if (previous == startOfMatch) {
- // Add the skipped character to the output, if any.
- if (previous < subject.length) {
- result.addSpecialSlice(previous, previous + 1);
- }
- previous++;
- // Per ECMA-262 15.10.6.2, if the previous index is greater than
the
- // string length, there is no match
- if (previous > subject.length) {
- return result.generate();
- }
- }
- matchInfo = DoRegExpExec(regexp, subject, previous);
- } while (!IS_NULL(matchInfo));
+ var resultArray = reusableReplaceArray;
+ if (resultArray) {
+ reusableReplaceArray = null;
} else {
- do {
- startOfMatch = matchInfo[CAPTURE0];
- result.addSpecialSlice(previous, startOfMatch);
- previous = matchInfo[CAPTURE1];
- result.add(ApplyReplacementFunction(replace, matchInfo, subject));
- // Can't use matchInfo any more from here, since the function could
- // overwrite it.
- // Continue with the next match.
- // Increment previous if we matched an empty string, as per
ECMA-262
- // 15.5.4.10.
- if (previous == startOfMatch) {
- // Add the skipped character to the output, if any.
- if (previous < subject.length) {
- result.addSpecialSlice(previous, previous + 1);
- }
- previous++;
- // Per ECMA-262 15.10.6.2, if the previous index is greater than
the
- // string length, there is no match
- if (previous > subject.length) {
- return result.generate();
- }
- }
- matchInfo = DoRegExpExec(regexp, subject, previous);
- } while (!IS_NULL(matchInfo));
- }
-
- // Tack on the final right substring after the last match.
- result.addSpecialSlice(previous, subject.length);
-
+ // Inside a nested replace (replace called from the replacement
function
+ // of another replace) or we have failed to set the reusable array
+ // back due to an exception in a replacement function. Create a new
+ // array to use in the future, or until the original is written back.
+ resultArray = $Array(16);
+ }
+ try {
+ // Must handle exceptions thrown by the replace functions correctly,
+ // including unregistering global regexps.
+ var res = %RegExpExecMultiple(regexp,
+ subject,
+ lastMatchInfo,
+ resultArray);
+ regexp.lastIndex = 0;
+ if (IS_NULL(res)) {
+ // No matches at all.
+ return subject;
+ }
+ var len = res.length;
+ var i = 0;
+ if (NUMBER_OF_CAPTURES(lastMatchInfo) == 2) {
+ var match_start = 0;
+ while (i < len) {
+ var elem = res[i];
+ if (%_IsSmi(elem)) {
+ if (elem > 0) {
+ match_start = (elem >> 11) + (elem & 0x7ff);
+ } else {
+ match_start = res[++i] - elem;
+ }
+ } else {
+ var func_result = replace.call(null, elem, match_start,
subject);
+ if (!IS_STRING(func_result)) {
+ func_result = NonStringToString(func_result);
+ }
+ res[i] = func_result;
+ match_start += elem.length;
+ }
+ i++;
+ }
+ } else {
+ while (i < len) {
+ var elem = res[i];
+ if (!%_IsSmi(elem)) {
+ // elem must be an Array.
+ // Use the apply argument as backing for global RegExp
properties.
+ lastMatchInfoOverride = elem;
+ var func_result = replace.apply(null, elem);
+ if (!IS_STRING(func_result)) {
+ func_result = NonStringToString(func_result);
+ }
+ res[i] = func_result;
+ }
+ i++;
+ }
+ }
+ var result = new ReplaceResultBuilder(subject, res);
+ return result.generate();
+ } finally {
+ lastMatchInfoOverride = null;
+ resultArray.length = 0;
+ reusableReplaceArray = resultArray;
+ }
} else { // Not a global regexp, no need to loop.
+ var matchInfo = DoRegExpExec(regexp, subject, 0);
+ if (IS_NULL(matchInfo)) return subject;
+
+ var result = new ReplaceResultBuilder(subject);
result.addSpecialSlice(0, matchInfo[CAPTURE0]);
var endOfMatch = matchInfo[CAPTURE1];
result.add(ApplyReplacementFunction(replace, matchInfo, subject));
// Can't use matchInfo any more from here, since the function could
// overwrite it.
result.addSpecialSlice(endOfMatch, subject.length);
- }
-
- return result.generate();
+ return result.generate();
+ }
}
@@ -894,8 +892,11 @@
// ReplaceResultBuilder support.
function ReplaceResultBuilder(str) {
- this.__proto__ = void 0;
- this.elements = new $Array();
+ if (%_ArgumentsLength() > 1) {
+ this.elements = %_Arguments(1);
+ } else {
+ this.elements = new $Array();
+ }
this.special_string = str;
}
=======================================
--- /branches/2.1/src/version.cc Tue Mar 30 01:57:48 2010
+++ /branches/2.1/src/version.cc Tue Mar 30 07:14:28 2010
@@ -35,7 +35,7 @@
#define MAJOR_VERSION 2
#define MINOR_VERSION 1
#define BUILD_NUMBER 10
-#define PATCH_LEVEL 1
+#define PATCH_LEVEL 2
#define CANDIDATE_VERSION false
// Define SONAME to have the SCons build the put a specific SONAME into the
=======================================
--- /branches/2.1/test/mjsunit/string-replace.js Wed Apr 22 04:54:55 2009
+++ /branches/2.1/test/mjsunit/string-replace.js Tue Mar 30 07:14:28 2010
@@ -178,5 +178,16 @@
longstring = longstring + longstring;
// longstring.length == 5 << 11
-replaceTest(longstring + longstring,
+replaceTest(longstring + longstring,
"<" + longstring + ">", /<(.*)>/g, "$1$1");
+
+replaceTest("string 42", "string x", /x/g, function() { return 42; });
+replaceTest("string 42", "string x", /x/, function() { return 42; });
+replaceTest("string 42", "string x", /[xy]/g, function() { return 42; });
+replaceTest("string 42", "string x", /[xy]/, function() { return 42; });
+replaceTest("string true", "string x", /x/g, function() { return true; });
+replaceTest("string null", "string x", /x/g, function() { return null; });
+replaceTest("string undefined", "string x", /x/g, function() { return
undefined; });
+
+replaceTest("aundefinedbundefinedcundefined",
+ "abc", /(.)|(.)/g, function(m, m1, m2, i, s) { return m1+m2;
});
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
To unsubscribe from this group, send email to v8-dev+unsubscribegooglegroups.com or reply
to this email with the words "REMOVE ME" as the subject.