Reviewers: ,

Message:
Committed patchset #2 manually as r19989 (tree was closed).

Description:
Experimental parser: more correct utf8 handling

[email protected]

BUG=

Committed: https://code.google.com/p/v8/source/detail?r=19989

Please review this at https://codereview.chromium.org/201693003/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+26, -4 lines):
  M src/lexer/lexer.cc


Index: src/lexer/lexer.cc
diff --git a/src/lexer/lexer.cc b/src/lexer/lexer.cc
index 6e74bb8032b05821ac797caff727e933788e7c64..9140f99b5be1f9e801af2961e5f7b953cbff18f4 100644
--- a/src/lexer/lexer.cc
+++ b/src/lexer/lexer.cc
@@ -164,6 +164,30 @@ Token::Value LexerBase::Next() {
 }


+static uint32_t Advance(const int8_t** buffer, const int8_t* end) {
+  unsigned bytes_read = 0;
+ uint32_t c = unibrow::Utf8::ValueOf(reinterpret_cast<const uint8_t*>(*buffer),
+                                      end - *buffer,
+                                      &bytes_read);
+  *buffer += bytes_read;
+  return c;
+}
+
+
+static inline uint32_t Advance(const uint8_t** buffer, const uint8_t* end) {
+  uint32_t c = **buffer;
+  (*buffer)++;
+  return c;
+}
+
+
+static inline uint32_t Advance(const uint16_t** buffer, const uint16_t* end) {
+  uint32_t c = **buffer;
+  (*buffer)++;
+  return c;
+}
+
+
 template<typename Char>
 Lexer<Char>::Lexer(UnicodeCache* unicode_cache,
                    const Char* source_ptr,
@@ -654,7 +678,7 @@ bool Lexer<Char>::CopyToLiteralBuffer(const TokenDesc& token,
   if (token.has_escapes) {
     for (const Char* cursor = start; cursor != end;) {
       if (*cursor != '\\') {
-        literal->buffer.AddChar(*cursor++);
+        literal->buffer.AddChar(Advance(&cursor, end));
       } else if (token.token == Token::IDENTIFIER) {
         uc32 c;
         cursor = ScanIdentifierUnicodeEscape(cursor, end, &c);
@@ -668,10 +692,8 @@ bool Lexer<Char>::CopyToLiteralBuffer(const TokenDesc& token,
       }
     }
   } else {
-    // TODO(dcarney): This can only happen for utf8 strings
-    // use a helper function.
     for (const Char* cursor = start; cursor != end;) {
-        literal->buffer.AddChar(*cursor++);
+      literal->buffer.AddChar(Advance(&cursor, end));
     }
   }
   literal->SetStringFromLiteralBuffer();


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to