Revision: 18903
Author:   [email protected]
Date:     Wed Jan 29 13:19:19 2014 UTC
Log:      Experimental parser: remove bom handling

[email protected]

BUG=

Review URL: https://codereview.chromium.org/148283007
http://code.google.com/p/v8/source/detail?r=18903

Modified:
 /branches/experimental/parser/src/scanner.h
 /branches/experimental/parser/tools/lexer_generator/code_generator.jinja
 /branches/experimental/parser/tools/lexer_generator/code_generator.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /branches/experimental/parser/src/scanner.h Mon Nov 25 10:36:04 2013 UTC
+++ /branches/experimental/parser/src/scanner.h Wed Jan 29 13:19:19 2014 UTC
@@ -139,8 +139,10 @@
bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); } bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); }
   bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); }
+ bool IsByteOrderMark(unibrow::uchar c) { return c == 0xfffe || c == 0xfeff; }
   bool IsWhiteSpaceNotLineTerminator(unibrow::uchar c) {
-    return !kIsLineTerminator.get(c) && kIsWhiteSpace.get(c);
+    return (kIsWhiteSpace.get(c) && !kIsLineTerminator.get(c)) ||
+        IsByteOrderMark(c);
   }
   bool IsLetter(unibrow::uchar c) { return kIsLetter.get(c); }
   bool IsIdentifierPartNotLetter(unibrow::uchar c) {
=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Fri Jan 24 08:28:11 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Wed Jan 29 13:19:19 2014 UTC
@@ -47,9 +47,7 @@
       {%- endif -%}
{# These classes require long_char and to be outside the primary range #} {%- elif r[0] == 'LONG_CHAR_CLASS' and encoding in ['utf16', 'utf8'] -%}
-      {%- if r[1] == 'byte_order_mark' -%}
-        (long_char == 0xfffe || long_char == 0xfeff)
-      {%- elif r[1] == 'call' -%}
+      {%- if r[1] == 'call' -%}
         unicode_cache_->{{r[2]}}(long_char)
       {%- elif r[1] == 'invert' -%}
         !({{do_key(r[2])}})
=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.py Fri Jan 24 08:36:03 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/code_generator.py Wed Jan 29 13:19:19 2014 UTC
@@ -205,15 +205,12 @@
     if not transitions:
       return
     encoding = self.__dfa.encoding()
-    bom = 'byte_order_mark'
     catch_all = 'non_primary_everything_else'
     all_classes = set(encoding.class_name_iter())
-    call_classes = all_classes - set([bom, catch_all])
+    call_classes = all_classes - set([catch_all])
     def remap_transition(class_name):
       if class_name in call_classes:
         return ('LONG_CHAR_CLASS', 'call', self.__call_map[class_name])
-      if class_name == bom:
-        return ('LONG_CHAR_CLASS', class_name)
       raise Exception(class_name)
     long_class_transitions = []
     long_class_map = {}
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Thu Jan 23 11:39:04 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Wed Jan 29 13:19:19 2014 UTC
@@ -493,8 +493,7 @@
     super(Utf16Encoding, self).__init__(
       'utf16',
       (0, 255),
-      ['byte_order_mark',
-       'non_primary_whitespace',
+      ['non_primary_whitespace',
        'non_primary_letter',
        'non_primary_identifier_part_not_letter',
        'non_primary_line_terminator',
@@ -502,7 +501,6 @@
     self.add_predefined_range(
       'whitespace',
       [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160),
-       self.class_range('byte_order_mark'),
        self.class_range('non_primary_whitespace')])
     self.add_predefined_range(
       'letter', [
@@ -523,8 +521,7 @@
     super(Utf8Encoding, self).__init__(
       'utf8',
       (0, 127),
-      ['byte_order_mark',
-       'non_primary_whitespace',
+      ['non_primary_whitespace',
        'non_primary_letter',
        'non_primary_identifier_part_not_letter',
        'non_primary_line_terminator',
@@ -532,7 +529,6 @@
     self.add_predefined_range(
       'whitespace',
       [(9, 9), (11, 12), (32, 32),
-        self.class_range('byte_order_mark'),
         self.class_range('non_primary_whitespace')])
     self.add_predefined_range(
'letter', [(65, 90), (97, 122), self.class_range('non_primary_letter')])

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to