Author: [EMAIL PROTECTED] Date: Mon Dec 1 07:32:20 2008 New Revision: 879
Modified: branches/bleeding_edge/src/parser.cc branches/bleeding_edge/src/top.cc branches/bleeding_edge/src/top.h branches/bleeding_edge/src/zone-inl.h branches/bleeding_edge/src/zone.cc branches/bleeding_edge/src/zone.h branches/bleeding_edge/test/cctest/test-regexp.cc Log: Added checking in the regular expression parser that we're below the stack limit and that the zone allocation limit hasn't been met. Modified: branches/bleeding_edge/src/parser.cc ============================================================================== --- branches/bleeding_edge/src/parser.cc (original) +++ branches/bleeding_edge/src/parser.cc Mon Dec 1 07:32:20 2008 @@ -495,10 +495,10 @@ RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode); - RegExpTree* ParsePattern(bool* ok); - RegExpTree* ParseDisjunction(bool* ok); - RegExpTree* ParseGroup(bool* ok); - RegExpTree* ParseCharacterClass(bool* ok); + RegExpTree* ParsePattern(); + RegExpTree* ParseDisjunction(); + RegExpTree* ParseGroup(); + RegExpTree* ParseCharacterClass(); // Parses a {...,...} quantifier and stores the range in the given // out parameters. @@ -506,13 +506,13 @@ // Parses and returns a single escaped character. The character // must not be 'b' or 'B' since they are usually handle specially. - uc32 ParseClassCharacterEscape(bool* ok); + uc32 ParseClassCharacterEscape(); // Checks whether the following is a length-digit hexadecimal number, // and sets the value if it is. bool ParseHexEscape(int length, uc32* value); - uc32 ParseControlLetterEscape(bool* ok); + uc32 ParseControlLetterEscape(); uc32 ParseOctalLiteral(); // Tries to parse the input as a back reference. If successful it @@ -521,9 +521,8 @@ // can be reparsed. bool ParseBackReferenceIndex(int* index_out); - CharacterRange ParseClassAtom(uc16* char_class, - bool* ok); - RegExpTree* ReportError(Vector<const char> message, bool* ok); + CharacterRange ParseClassAtom(uc16* char_class); + RegExpTree* ReportError(Vector<const char> message); void Advance(); void Advance(int dist); void Reset(int pos); @@ -532,6 +531,7 @@ int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } int position() { return next_pos_ - 1; } + bool failed() { return failed_; } static const uc32 kEndMarker = (1 << 21); private: @@ -553,6 +553,7 @@ bool is_scanned_for_captures_; // The capture count is only valid after we have scanned for captures. int capture_count_; + bool failed_; }; @@ -1027,6 +1028,11 @@ #define DUMMY ) // to make indentation work #undef DUMMY +#define CHECK_FAILED ); \ + if (failed_) return NULL; \ + ((void)0 +#define DUMMY ) // to make indentation work +#undef DUMMY // ---------------------------------------------------------------------------- // Implementation of Parser @@ -3499,7 +3505,8 @@ has_character_escapes_(false), captures_(NULL), is_scanned_for_captures_(false), - capture_count_(0) { + capture_count_(0), + failed_(false) { Advance(1); } @@ -3515,8 +3522,15 @@ void RegExpParser::Advance() { if (next_pos_ < in()->length()) { - current_ = in()->Get(next_pos_); - next_pos_++; + StackLimitCheck check; + if (check.HasOverflowed()) { + ReportError(CStrVector(Top::kStackOverflowMessage)); + } else if (Zone::excess_allocation()) { + ReportError(CStrVector("Regular expression too large")); + } else { + current_ = in()->Get(next_pos_); + next_pos_++; + } } else { current_ = kEndMarker; has_more_ = false; @@ -3543,19 +3557,22 @@ return has_character_escapes_; } -RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) { - *ok = false; +RegExpTree* RegExpParser::ReportError(Vector<const char> message) { + failed_ = true; *error_ = Factory::NewStringFromAscii(message, NOT_TENURED); + // Zip to the end to make sure the no more input is read. + current_ = kEndMarker; + next_pos_ = in()->length(); return NULL; } // Pattern :: // Disjunction -RegExpTree* RegExpParser::ParsePattern(bool* ok) { - RegExpTree* result = ParseDisjunction(CHECK_OK); +RegExpTree* RegExpParser::ParsePattern() { + RegExpTree* result = ParseDisjunction(CHECK_FAILED); if (has_more()) { - ReportError(CStrVector("Unmatched ')'"), CHECK_OK); + ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); } return result; } @@ -3579,7 +3596,7 @@ // Assertion // Atom // Atom Quantifier -RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { +RegExpTree* RegExpParser::ParseDisjunction() { RegExpBuilder builder; int capture_start_index = captures_started(); while (true) { @@ -3603,7 +3620,7 @@ case '*': case '+': case '?': - ReportError(CStrVector("Nothing to repeat"), CHECK_OK); + ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); case '^': { Advance(); RegExpAssertion::Type type = @@ -3630,12 +3647,12 @@ break; } case '(': { - RegExpTree* atom = ParseGroup(CHECK_OK); + RegExpTree* atom = ParseGroup(CHECK_FAILED); builder.AddAtom(atom); break; } case '[': { - RegExpTree* atom = ParseCharacterClass(CHECK_OK); + RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); builder.AddAtom(atom); break; } @@ -3644,7 +3661,7 @@ case '\\': switch (Next()) { case kEndMarker: - ReportError(CStrVector("\\ at end of pattern"), CHECK_OK); + ReportError(CStrVector("\\ at end of pattern") CHECK_FAILED); case 'b': Advance(2); builder.AddAssertion( @@ -3722,7 +3739,7 @@ break; case 'c': { Advance(2); - uc32 control = ParseControlLetterEscape(ok); + uc32 control = ParseControlLetterEscape(); builder.AddCharacter(control); break; } @@ -3757,7 +3774,7 @@ case '{': { int dummy; if (ParseIntervalQuantifier(&dummy, &dummy)) { - ReportError(CStrVector("Nothing to repeat"), CHECK_OK); + ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); } // fallthrough } @@ -3974,9 +3991,9 @@ // Upper and lower case letters differ by one bit. STATIC_CHECK(('a' ^ 'A') == 0x20); -uc32 RegExpParser::ParseControlLetterEscape(bool* ok) { +uc32 RegExpParser::ParseControlLetterEscape() { if (!has_more()) { - ReportError(CStrVector("\\c at end of pattern"), ok); + ReportError(CStrVector("\\c at end of pattern")); return '\0'; } uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters. @@ -4030,7 +4047,7 @@ } -uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) { +uc32 RegExpParser::ParseClassCharacterEscape() { ASSERT(current() == '\\'); ASSERT(has_next() && !IsSpecialClassEscape(Next())); Advance(); @@ -4056,7 +4073,7 @@ Advance(); return '\v'; case 'c': - return ParseControlLetterEscape(ok); + return ParseControlLetterEscape(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': // For compatibility, we interpret a decimal escape that isn't @@ -4096,7 +4113,7 @@ } -RegExpTree* RegExpParser::ParseGroup(bool* ok) { +RegExpTree* RegExpParser::ParseGroup() { ASSERT_EQ(current(), '('); char type = '('; Advance(); @@ -4107,7 +4124,7 @@ Advance(2); break; default: - ReportError(CStrVector("Invalid group"), CHECK_OK); + ReportError(CStrVector("Invalid group") CHECK_FAILED); break; } } else { @@ -4117,9 +4134,9 @@ captures_->Add(NULL); } int capture_index = captures_started(); - RegExpTree* body = ParseDisjunction(CHECK_OK); + RegExpTree* body = ParseDisjunction(CHECK_FAILED); if (current() != ')') { - ReportError(CStrVector("Unterminated group"), CHECK_OK); + ReportError(CStrVector("Unterminated group") CHECK_FAILED); } Advance(); @@ -4157,7 +4174,7 @@ } -CharacterRange RegExpParser::ParseClassAtom(uc16* char_class, bool* ok) { +CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { ASSERT_EQ(0, *char_class); uc32 first = current(); if (first == '\\') { @@ -4168,7 +4185,7 @@ return CharacterRange::Singleton(0); // Return dummy value. } default: - uc32 c = ParseClassCharacterEscape(CHECK_OK); + uc32 c = ParseClassCharacterEscape(CHECK_FAILED); return CharacterRange::Singleton(c); } } else { @@ -4178,7 +4195,7 @@ } -RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) { +RegExpTree* RegExpParser::ParseCharacterClass() { static const char* kUnterminated = "Unterminated character class"; static const char* kRangeOutOfOrder = "Range out of order in character class"; @@ -4192,7 +4209,7 @@ ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); while (has_more() && current() != ']') { uc16 char_class = 0; - CharacterRange first = ParseClassAtom(&char_class, CHECK_OK); + CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); if (char_class) { CharacterRange::AddClassEscape(char_class, ranges); continue; @@ -4208,7 +4225,7 @@ ranges->Add(CharacterRange::Singleton('-')); break; } - CharacterRange next = ParseClassAtom(&char_class, CHECK_OK); + CharacterRange next = ParseClassAtom(&char_class CHECK_FAILED); if (char_class) { ranges->Add(first); ranges->Add(CharacterRange::Singleton('-')); @@ -4216,7 +4233,7 @@ continue; } if (first.from() > next.to()) { - return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK); + return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED); } ranges->Add(CharacterRange::Range(first.from(), next.to())); } else { @@ -4224,7 +4241,7 @@ } } if (!has_more()) { - return ReportError(CStrVector(kUnterminated), CHECK_OK); + return ReportError(CStrVector(kUnterminated) CHECK_FAILED); } Advance(); if (ranges->length() == 0) { @@ -4287,21 +4304,20 @@ bool multiline, RegExpParseResult* result) { ASSERT(result != NULL); + // Make sure we have a stack guard. + StackGuard guard; RegExpParser parser(input, &result->error, multiline); - bool ok = true; - result->tree = parser.ParsePattern(&ok); - if (!ok) { + result->tree = parser.ParsePattern(); + if (parser.failed()) { ASSERT(result->tree == NULL); ASSERT(!result->error.is_null()); } else { ASSERT(result->tree != NULL); ASSERT(result->error.is_null()); - } - if (ok) { result->has_character_escapes = parser.HasCharacterEscapes(); result->capture_count = parser.captures_started(); } - return ok; + return !parser.failed(); } Modified: branches/bleeding_edge/src/top.cc ============================================================================== --- branches/bleeding_edge/src/top.cc (original) +++ branches/bleeding_edge/src/top.cc Mon Dec 1 07:32:20 2008 @@ -603,6 +603,10 @@ } +const char* Top::kStackOverflowMessage = + "Uncaught RangeError: Maximum call stack size exceeded"; + + Failure* Top::StackOverflow() { HandleScope scope; Handle<String> key = Factory::stack_overflow_symbol(); @@ -616,9 +620,7 @@ // doesn't use ReportUncaughtException to determine the location // from where the exception occurred. It should probably be // reworked. - static const char* kMessage = - "Uncaught RangeError: Maximum call stack size exceeded"; - DoThrow(*exception, NULL, kMessage); + DoThrow(*exception, NULL, kStackOverflowMessage); return Failure::Exception(); } Modified: branches/bleeding_edge/src/top.h ============================================================================== --- branches/bleeding_edge/src/top.h (original) +++ branches/bleeding_edge/src/top.h Mon Dec 1 07:32:20 2008 @@ -271,6 +271,8 @@ static char* ArchiveThread(char* to); static char* RestoreThread(char* from); + static const char* kStackOverflowMessage; + private: // The context that initiated this JS execution. static ThreadLocalTop thread_local_; Modified: branches/bleeding_edge/src/zone-inl.h ============================================================================== --- branches/bleeding_edge/src/zone-inl.h (original) +++ branches/bleeding_edge/src/zone-inl.h Mon Dec 1 07:32:20 2008 @@ -48,6 +48,11 @@ } +bool Zone::excess_allocation() { + return segment_bytes_allocated_ > zone_excess_limit_; +} + + } } // namespace v8::internal #endif // V8_ZONE_INL_H_ Modified: branches/bleeding_edge/src/zone.cc ============================================================================== --- branches/bleeding_edge/src/zone.cc (original) +++ branches/bleeding_edge/src/zone.cc Mon Dec 1 07:32:20 2008 @@ -34,6 +34,8 @@ Address Zone::position_ = 0; Address Zone::limit_ = 0; +int Zone::zone_excess_limit_ = 256 * MB; +int Zone::segment_bytes_allocated_ = 0; bool AssertNoZoneAllocation::allow_allocation_ = true; @@ -63,6 +65,7 @@ // of the segment chain. Returns the new segment. static Segment* New(int size) { Segment* result = reinterpret_cast<Segment*>(Malloced::New(size)); + Zone::segment_bytes_allocated_ += size; if (result != NULL) { result->next_ = head_; result->size_ = size; @@ -72,10 +75,13 @@ } // Deletes the given segment. Does not touch the segment chain. - static void Delete(Segment* segment) { + static void Delete(Segment* segment, int size) { + Zone::segment_bytes_allocated_ -= size; Malloced::Delete(segment); } + static int bytes_allocated() { return bytes_allocated_; } + private: // Computes the address of the nth byte in this segment. Address address(int n) const { @@ -83,12 +89,14 @@ } static Segment* head_; + static int bytes_allocated_; Segment* next_; int size_; }; Segment* Segment::head_ = NULL; +int Segment::bytes_allocated_ = 0; void Zone::DeleteAll() { @@ -112,11 +120,12 @@ // Unlink the segment we wish to keep from the list. current->clear_next(); } else { + int size = current->size(); #ifdef DEBUG // Zap the entire current segment (including the header). - memset(current, kZapDeadByte, current->size()); + memset(current, kZapDeadByte, size); #endif - Segment::Delete(current); + Segment::Delete(current, size); } current = next; } Modified: branches/bleeding_edge/src/zone.h ============================================================================== --- branches/bleeding_edge/src/zone.h (original) +++ branches/bleeding_edge/src/zone.h Mon Dec 1 07:32:20 2008 @@ -61,7 +61,13 @@ // Delete all objects and free all memory allocated in the Zone. static void DeleteAll(); + // Returns true if more memory has been allocated in zones than + // the limit allows. + static inline bool excess_allocation(); + private: + friend class Segment; + // All pointers returned from New() have this alignment. static const int kAlignment = kPointerSize; @@ -71,6 +77,13 @@ // Never keep segments larger than this size in bytes around. static const int kMaximumKeptSegmentSize = 64 * KB; + // Report zone excess when allocation exceeds this limit. + static int zone_excess_limit_; + + // The number of bytes allocated in segments. Note that this number + // includes memory allocated from the OS but not yet allocated from + // the zone. + static int segment_bytes_allocated_; // The Zone is intentionally a singleton; you should not try to // allocate instances of the class. Modified: branches/bleeding_edge/test/cctest/test-regexp.cc ============================================================================== --- branches/bleeding_edge/test/cctest/test-regexp.cc (original) +++ branches/bleeding_edge/test/cctest/test-regexp.cc Mon Dec 1 07:32:20 2008 @@ -51,6 +51,7 @@ static SmartPointer<const char> Parse(const char* input) { + V8::Initialize(NULL); v8::HandleScope scope; ZoneScope zone_scope(DELETE_ON_EXIT); FlatStringReader reader(CStrVector(input)); @@ -63,6 +64,7 @@ } static bool ParseEscapes(const char* input) { + V8::Initialize(NULL); v8::HandleScope scope; unibrow::Utf8InputBuffer<> buffer(input, strlen(input)); ZoneScope zone_scope(DELETE_ON_EXIT); @@ -253,6 +255,7 @@ static void ExpectError(const char* input, const char* expected) { + V8::Initialize(NULL); v8::HandleScope scope; ZoneScope zone_scope(DELETE_ON_EXIT); FlatStringReader reader(CStrVector(input)); @@ -372,6 +375,7 @@ static RegExpNode* Compile(const char* input, bool multiline) { + V8::Initialize(NULL); FlatStringReader reader(CStrVector(input)); RegExpParseResult result; if (!v8::internal::ParseRegExp(&reader, multiline, &result)) --~--~---------~--~----~------------~-------~--~----~ v8-dev mailing list v8-dev@googlegroups.com http://groups.google.com/group/v8-dev -~----------~----~----~----~------~----~------~--~---