Author: [EMAIL PROTECTED]
Date: Wed Nov 19 02:16:41 2008
New Revision: 793
Modified:
branches/experimental/regexp2000/src/heap.cc
branches/experimental/regexp2000/src/heap.h
branches/experimental/regexp2000/src/jsregexp.cc
branches/experimental/regexp2000/src/objects-inl.h
branches/experimental/regexp2000/src/objects.cc
branches/experimental/regexp2000/src/objects.h
branches/experimental/regexp2000/src/parser.cc
branches/experimental/regexp2000/src/parser.h
branches/experimental/regexp2000/test/cctest/test-regexp.cc
Log:
Introduced flat string reader abstraction which reads directly from a
flat string independent of the character width. Replaced the
stream-based input to the regexp parser with a flat string reader and
dropped the 'next' field; now Next() reads directly from the string.
Modified: branches/experimental/regexp2000/src/heap.cc
==============================================================================
--- branches/experimental/regexp2000/src/heap.cc (original)
+++ branches/experimental/regexp2000/src/heap.cc Wed Nov 19 02:16:41 2008
@@ -390,8 +390,7 @@
}
Counters::objs_since_last_young.Set(0);
- // Process weak handles post gc.
- GlobalHandles::PostGarbageCollectionProcessing();
+ PostGarbageCollectionProcessing();
if (collector == MARK_COMPACTOR) {
// Register the amount of external allocated memory.
@@ -403,6 +402,14 @@
ASSERT(!allocation_allowed_);
global_gc_epilogue_callback_();
}
+}
+
+
+void Heap::PostGarbageCollectionProcessing() {
+ // Process weak handles post gc.
+ GlobalHandles::PostGarbageCollectionProcessing();
+ // Update flat string readers.
+ FlatStringReader::PostGarbageCollectionProcessing();
}
Modified: branches/experimental/regexp2000/src/heap.h
==============================================================================
--- branches/experimental/regexp2000/src/heap.h (original)
+++ branches/experimental/regexp2000/src/heap.h Wed Nov 19 02:16:41 2008
@@ -578,6 +578,9 @@
static void GarbageCollectionPrologue();
static void GarbageCollectionEpilogue();
+ // Code that should be executed after the garbage collection proper.
+ static void PostGarbageCollectionProcessing();
+
// Performs garbage collection operation.
// Returns whether required_space bytes are available after the
collection.
static bool CollectGarbage(int required_space, AllocationSpace space);
Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc Wed Nov 19 02:16:41
2008
@@ -203,14 +203,14 @@
Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern,
flags);
bool in_cache = !cached.is_null();
Handle<Object> result;
- StringShape shape(*pattern);
if (in_cache) {
re->set_data(*cached);
result = re;
} else {
- SafeStringInputBuffer buffer(pattern.location());
+ FlattenString(pattern);
RegExpParseResult parse_result;
- if (!ParseRegExp(&buffer, &parse_result)) {
+ FlatStringReader reader(pattern);
+ if (!ParseRegExp(&reader, &parse_result)) {
// Throw an exception if we fail to parse the pattern.
ThrowRegExpException(re,
pattern,
Modified: branches/experimental/regexp2000/src/objects-inl.h
==============================================================================
--- branches/experimental/regexp2000/src/objects-inl.h (original)
+++ branches/experimental/regexp2000/src/objects-inl.h Wed Nov 19 02:16:41
2008
@@ -279,6 +279,16 @@
}
+uc32 FlatStringReader::Get(int index) {
+ ASSERT(0 <= index && index <= length_);
+ if (is_ascii_) {
+ return static_cast<const byte*>(start_)[index];
+ } else {
+ return static_cast<const uc16*>(start_)[index];
+ }
+}
+
+
bool Object::IsNumber() {
return IsSmi() || IsHeapNumber();
}
Modified: branches/experimental/regexp2000/src/objects.cc
==============================================================================
--- branches/experimental/regexp2000/src/objects.cc (original)
+++ branches/experimental/regexp2000/src/objects.cc Wed Nov 19 02:16:41 2008
@@ -3501,6 +3501,57 @@
}
+FlatStringReader* FlatStringReader::top_ = NULL;
+
+
+FlatStringReader::FlatStringReader(Handle<String> str)
+ : str_(str.location()),
+ length_(str->length()),
+ prev_(top_) {
+ top_ = this;
+ RefreshState();
+}
+
+
+FlatStringReader::FlatStringReader(Vector<const char> input)
+ : str_(NULL),
+ is_ascii_(true),
+ length_(input.length()),
+ start_(input.start()),
+ prev_(top_) {
+ top_ = this;
+}
+
+
+FlatStringReader::~FlatStringReader() {
+ ASSERT_EQ(top_, this);
+ top_ = prev_;
+}
+
+
+void FlatStringReader::RefreshState() {
+ if (str_ == NULL) return;
+ Handle<String> str(str_);
+ StringShape shape(*str);
+ ASSERT(str->IsFlat(shape));
+ is_ascii_ = shape.IsAsciiRepresentation();
+ if (is_ascii_) {
+ start_ = str->ToAsciiVector().start();
+ } else {
+ start_ = str->ToUC16Vector().start();
+ }
+}
+
+
+void FlatStringReader::PostGarbageCollectionProcessing() {
+ FlatStringReader* current = top_;
+ while (current != NULL) {
+ current->RefreshState();
+ current = current->prev_;
+ }
+}
+
+
void StringInputBuffer::Seek(unsigned pos) {
Reset(pos, input_);
}
Modified: branches/experimental/regexp2000/src/objects.h
==============================================================================
--- branches/experimental/regexp2000/src/objects.h (original)
+++ branches/experimental/regexp2000/src/objects.h Wed Nov 19 02:16:41 2008
@@ -3590,6 +3590,28 @@
};
+// A flat string reader provides random access to the contents of a
+// string independent of the character width of the string. The handle
+// must be valid as long as the reader is being used.
+class FlatStringReader BASE_EMBEDDED {
+ public:
+ explicit FlatStringReader(Handle<String> str);
+ explicit FlatStringReader(Vector<const char> input);
+ ~FlatStringReader();
+ void RefreshState();
+ inline uc32 Get(int index);
+ int length() { return length_; }
+ static void PostGarbageCollectionProcessing();
+ private:
+ String** str_;
+ bool is_ascii_;
+ int length_;
+ const void* start_;
+ FlatStringReader* prev_;
+ static FlatStringReader* top_;
+};
+
+
// Note that StringInputBuffers are not valid across a GC! To fix this
// it would have to store a String Handle instead of a String* and
// AsciiStringReadBlock would have to be modified to use memcpy.
Modified: branches/experimental/regexp2000/src/parser.cc
==============================================================================
--- branches/experimental/regexp2000/src/parser.cc (original)
+++ branches/experimental/regexp2000/src/parser.cc Wed Nov 19 02:16:41 2008
@@ -496,7 +496,7 @@
class RegExpParser {
public:
- RegExpParser(unibrow::CharacterStream* in,
+ RegExpParser(FlatStringReader* in,
Handle<String>* error,
bool multiline_mode);
RegExpTree* ParsePattern(bool* ok);
@@ -531,36 +531,26 @@
RegExpTree* ReportError(Vector<const char> message, bool* ok);
void Advance();
void Advance(int dist);
- // Pushes a read character (or potentially some other character) back
- // on the input stream. After pushing it back, it becomes the character
- // returned by current(). There is a limited amount of push-back buffer.
- // A function using PushBack should check that it doesn't push back more
- // than kMaxPushback characters, and it should not push back more
characters
- // than it has read.
- void PushBack(uc32 character);
- bool CanPushBack();
+ void Reset(int pos);
bool HasCharacterEscapes();
int captures_started() { return captures_ == NULL ? 0 :
captures_->length(); }
+ int position() { return next_pos_ - 1; }
static const uc32 kEndMarker = unibrow::Utf8::kBadChar;
private:
uc32 current() { return current_; }
- uc32 next() { return next_; }
bool has_more() { return has_more_; }
- bool has_next() { return has_next_; }
- unibrow::CharacterStream* in() { return in_; }
+ bool has_next() { return next_pos_ < in()->length(); }
+ uc32 Next();
+ FlatStringReader* in() { return in_; }
uc32 current_;
- uc32 next_;
bool has_more_;
- bool has_next_;
bool multiline_mode_;
- unibrow::CharacterStream* in_;
+ int next_pos_;
+ FlatStringReader* in_;
Handle<String>* error_;
- static const int kMaxPushback = 5;
- int pushback_count_;
- uc32 pushback_buffer_[kMaxPushback];
bool has_character_escapes_;
ZoneList<RegExpCapture*>* captures_;
};
@@ -3506,63 +3496,53 @@
// Regular expressions
-RegExpParser::RegExpParser(unibrow::CharacterStream* in,
+RegExpParser::RegExpParser(FlatStringReader* in,
Handle<String>* error,
bool multiline_mode)
: current_(kEndMarker),
- next_(kEndMarker),
has_more_(true),
- has_next_(true),
multiline_mode_(multiline_mode),
+ next_pos_(0),
in_(in),
error_(error),
- pushback_count_(0),
has_character_escapes_(false),
captures_(NULL) {
- Advance(2);
+ Advance(1);
}
-void RegExpParser::Advance() {
- current_ = next_;
- has_more_ = has_next_;
- if (pushback_count_ > 0) {
- pushback_count_--;
- next_ = pushback_buffer_[pushback_count_];
- } else if (in()->has_more()) {
- next_ = in()->GetNext();
+uc32 RegExpParser::Next() {
+ if (has_next()) {
+ return in()->Get(next_pos_);
} else {
- next_ = kEndMarker;
- has_next_ = false;
+ return kEndMarker;
}
}
-void RegExpParser::Advance(int dist) {
- for (int i = 0; i < dist; i++)
- Advance();
-}
-
-
-void RegExpParser::PushBack(uc32 character) {
- if (has_next_) {
- ASSERT(pushback_count_ < kMaxPushback);
- pushback_buffer_[pushback_count_] = next_;
- pushback_count_++;
+void RegExpParser::Advance() {
+ if (next_pos_ < in()->length()) {
+ current_ = in()->Get(next_pos_);
+ next_pos_++;
+ } else {
+ current_ = kEndMarker;
+ has_more_ = false;
}
+}
- next_ = current_;
- has_next_ = has_more_;
- current_ = character;
- has_more_ = true;
+void RegExpParser::Reset(int pos) {
+ next_pos_ = pos;
+ Advance();
}
-bool RegExpParser::CanPushBack() {
- return (pushback_count_ < kMaxPushback);
+void RegExpParser::Advance(int dist) {
+ for (int i = 0; i < dist; i++)
+ Advance();
}
+
// Reports whether the parsed string atoms contain any characters that were
// escaped in the original pattern. If not, all atoms are proper substrings
// of the original pattern.
@@ -3662,7 +3642,7 @@
// Atom ::
// \ AtomEscape
case '\\':
- switch (next()) {
+ switch (Next()) {
case kEndMarker:
ReportError(CStrVector("\\ at end of pattern"), CHECK_OK);
case 'b':
@@ -3681,7 +3661,7 @@
// CharacterClassEscape :: one of
// d D s S w W
case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {
- uc32 c = next();
+ uc32 c = Next();
Advance(2);
ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
CharacterRange::AddClassEscape(c, ranges);
@@ -3703,7 +3683,7 @@
builder.AddAtom(atom);
goto has_read_atom; // Avoid setting has_character_escapes_.
}
- uc32 first_digit = next();
+ uc32 first_digit = Next();
if (first_digit == '8' || first_digit == '9') {
// Treat as identity escape
builder.AddCharacter(first_digit);
@@ -3768,7 +3748,7 @@
}
default:
// Identity escape.
- builder.AddCharacter(next());
+ builder.AddCharacter(Next());
Advance(2);
break;
}
@@ -3861,8 +3841,7 @@
bool RegExpParser::ParseBackreferenceIndex(int* index_out) {
ASSERT_EQ('\\', current());
- ASSERT('1' <= next() && next() <= '9');
- ASSERT_EQ(0, pushback_count_);
+ ASSERT('1' <= Next() && Next() <= '9');
// Try to parse a decimal literal that is no greater than the number
// of previously encountered left capturing parentheses.
// This is a not according the the ECMAScript specification. According to
@@ -3870,30 +3849,19 @@
// parentheses in the entire input, even if they are meaningless.
if (captures_ == NULL)
return false;
- int value = next() - '0';
+ int start = position();
+ int value = Next() - '0';
if (value > captures_->length())
return false;
- static const int kMaxChars = kMaxPushback - 2;
- EmbeddedVector<uc32, kMaxChars> chars_seen;
- chars_seen[0] = next();
- int char_count = 1;
Advance(2);
while (true) {
uc32 c = current();
if (IsDecimalDigit(c)) {
value = 10 * value + (c - '0');
- // To avoid reading past the end of the stack-allocated pushback
- // buffers we only read kMaxChars before giving up.
- if (value > captures_->length() || char_count > kMaxChars) {
- // If we give up we have to push the characters we read back
- // onto the pushback buffer in the reverse order.
- for (int i = 0; i < char_count; i++) {
- PushBack(chars_seen[char_count - i - 1]);
- }
- PushBack('\\');
+ if (value > captures_->length()) {
+ Reset(start);
return false;
}
- chars_seen[char_count++] = current();
Advance();
} else {
break;
@@ -3992,26 +3960,19 @@
bool RegExpParser::ParseHexEscape(int length, uc32 *value) {
- static const int kMaxChars = kMaxPushback;
- EmbeddedVector<uc32, kMaxChars> chars_seen;
- ASSERT(length <= kMaxChars);
+ int start = position();
uc32 val = 0;
bool done = false;
for (int i = 0; !done; i++) {
uc32 c = current();
int d = HexValue(c);
if (d < 0) {
- while (i > 0) {
- i--;
- PushBack(chars_seen[i]);
- }
+ Reset(start);
return false;
}
val = val * 16 + d;
Advance();
- if (i < length - 1) {
- chars_seen[i] = c;
- } else {
+ if (i == length - 1) {
done = true;
}
}
@@ -4022,7 +3983,7 @@
uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) {
ASSERT(current() == '\\');
- ASSERT(has_next() && !IsSpecialClassEscape(next()));
+ ASSERT(has_next() && !IsSpecialClassEscape(Next()));
Advance();
switch (current()) {
case 'b':
@@ -4091,9 +4052,9 @@
char type = '(';
Advance();
if (current() == '?') {
- switch (next()) {
+ switch (Next()) {
case ':': case '=': case '!':
- type = next();
+ type = Next();
Advance(2);
break;
default:
@@ -4153,10 +4114,10 @@
ASSERT_EQ(false, *is_char_class);
uc32 first = current();
if (first == '\\') {
- switch (next()) {
+ switch (Next()) {
case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {
*is_char_class = true;
- uc32 c = next();
+ uc32 c = Next();
CharacterRange::AddClassEscape(c, ranges);
Advance(2);
return NULL;
@@ -4270,10 +4231,10 @@
}
-bool ParseRegExp(unibrow::CharacterStream* stream, RegExpParseResult*
result) {
+bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result) {
ASSERT(result != NULL);
// Get multiline flag somehow
- RegExpParser parser(stream, &result->error, false);
+ RegExpParser parser(input, &result->error, false);
bool ok = true;
result->tree = parser.ParsePattern(&ok);
if (!ok) {
Modified: branches/experimental/regexp2000/src/parser.h
==============================================================================
--- branches/experimental/regexp2000/src/parser.h (original)
+++ branches/experimental/regexp2000/src/parser.h Wed Nov 19 02:16:41 2008
@@ -145,7 +145,7 @@
v8::Extension* extension);
-bool ParseRegExp(unibrow::CharacterStream* stream, RegExpParseResult*
result);
+bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result);
// Support for doing lazy compilation. The script is the script containing
full
Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Wed Nov 19
02:16:41 2008
@@ -47,10 +47,10 @@
static SmartPointer<const char> Parse(const char* input) {
v8::HandleScope scope;
- unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
ZoneScope zone_scope(DELETE_ON_EXIT);
+ FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
- CHECK(v8::internal::ParseRegExp(&buffer, &result));
+ CHECK(v8::internal::ParseRegExp(&reader, &result));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
SmartPointer<const char> output = result.tree->ToString();
@@ -61,8 +61,9 @@
v8::HandleScope scope;
unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
ZoneScope zone_scope(DELETE_ON_EXIT);
+ FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
- CHECK(v8::internal::ParseRegExp(&buffer, &result));
+ CHECK(v8::internal::ParseRegExp(&reader, &result));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
return result.has_character_escapes;
@@ -227,10 +228,10 @@
static void ExpectError(const char* input,
const char* expected) {
v8::HandleScope scope;
- unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
ZoneScope zone_scope(DELETE_ON_EXIT);
+ FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
- CHECK_EQ(false, v8::internal::ParseRegExp(&buffer, &result));
+ CHECK_EQ(false, v8::internal::ParseRegExp(&reader, &result));
CHECK(result.tree == NULL);
CHECK(!result.error.is_null());
SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
@@ -343,9 +344,9 @@
static RegExpNode* Compile(const char* input) {
- unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
+ FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
- if (!v8::internal::ParseRegExp(&buffer, &result))
+ if (!v8::internal::ParseRegExp(&reader, &result))
return NULL;
RegExpNode* node = NULL;
RegExpEngine::Compile(&result, &node, false);
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---