Reviewers: marja,
Message:
Committed patchset #1 manually as r18903 (presubmit successful).
Description:
Experimental parser: remove bom handling
[email protected]
BUG=
Committed: https://code.google.com/p/v8/source/detail?r=18903
Please review this at https://codereview.chromium.org/148283007/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+7, -14 lines):
M src/scanner.h
M tools/lexer_generator/code_generator.jinja
M tools/lexer_generator/code_generator.py
M tools/lexer_generator/transition_keys.py
Index: src/scanner.h
diff --git a/src/scanner.h b/src/scanner.h
index
f6f797e22f1e08dce696b9141fb7d7440195a85d..f19689755c0c3bcf215e35d49b56f6a23d6487fb
100644
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -139,8 +139,10 @@ class UnicodeCache {
bool IsIdentifierPart(unibrow::uchar c) { return
kIsIdentifierPart.get(c); }
bool IsLineTerminator(unibrow::uchar c) { return
kIsLineTerminator.get(c); }
bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); }
+ bool IsByteOrderMark(unibrow::uchar c) { return c == 0xfffe || c ==
0xfeff; }
bool IsWhiteSpaceNotLineTerminator(unibrow::uchar c) {
- return !kIsLineTerminator.get(c) && kIsWhiteSpace.get(c);
+ return (kIsWhiteSpace.get(c) && !kIsLineTerminator.get(c)) ||
+ IsByteOrderMark(c);
}
bool IsLetter(unibrow::uchar c) { return kIsLetter.get(c); }
bool IsIdentifierPartNotLetter(unibrow::uchar c) {
Index: tools/lexer_generator/code_generator.jinja
diff --git a/tools/lexer_generator/code_generator.jinja
b/tools/lexer_generator/code_generator.jinja
index
ab1c05720a086b2c524931c40b79f1c190202fd2..a7b759a307b3d2b8a9671e39b8b2e602ef3ea255
100644
--- a/tools/lexer_generator/code_generator.jinja
+++ b/tools/lexer_generator/code_generator.jinja
@@ -47,9 +47,7 @@
{%- endif -%}
{# These classes require long_char and to be outside the primary range
#}
{%- elif r[0] == 'LONG_CHAR_CLASS' and encoding in ['utf16', 'utf8']
-%}
- {%- if r[1] == 'byte_order_mark' -%}
- (long_char == 0xfffe || long_char == 0xfeff)
- {%- elif r[1] == 'call' -%}
+ {%- if r[1] == 'call' -%}
unicode_cache_->{{r[2]}}(long_char)
{%- elif r[1] == 'invert' -%}
!({{do_key(r[2])}})
Index: tools/lexer_generator/code_generator.py
diff --git a/tools/lexer_generator/code_generator.py
b/tools/lexer_generator/code_generator.py
index
c98110fa2b8d84add2cf890956f4d35067b1da29..907d7e6c22c6f94e7a5ce4b6b39d70aa07a2a859
100644
--- a/tools/lexer_generator/code_generator.py
+++ b/tools/lexer_generator/code_generator.py
@@ -205,15 +205,12 @@ class CodeGenerator:
if not transitions:
return
encoding = self.__dfa.encoding()
- bom = 'byte_order_mark'
catch_all = 'non_primary_everything_else'
all_classes = set(encoding.class_name_iter())
- call_classes = all_classes - set([bom, catch_all])
+ call_classes = all_classes - set([catch_all])
def remap_transition(class_name):
if class_name in call_classes:
return ('LONG_CHAR_CLASS', 'call', self.__call_map[class_name])
- if class_name == bom:
- return ('LONG_CHAR_CLASS', class_name)
raise Exception(class_name)
long_class_transitions = []
long_class_map = {}
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py
b/tools/lexer_generator/transition_keys.py
index
e3664d4ea7e1415555c73ed783cd8dd9c1506c5e..33019b99a09ba92c78244a84553debdc265ec1d4
100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -493,8 +493,7 @@ class Utf16Encoding(KeyEncoding):
super(Utf16Encoding, self).__init__(
'utf16',
(0, 255),
- ['byte_order_mark',
- 'non_primary_whitespace',
+ ['non_primary_whitespace',
'non_primary_letter',
'non_primary_identifier_part_not_letter',
'non_primary_line_terminator',
@@ -502,7 +501,6 @@ class Utf16Encoding(KeyEncoding):
self.add_predefined_range(
'whitespace',
[(9, 9), (11, 12), (32, 32), (133, 133), (160, 160),
- self.class_range('byte_order_mark'),
self.class_range('non_primary_whitespace')])
self.add_predefined_range(
'letter', [
@@ -523,8 +521,7 @@ class Utf8Encoding(KeyEncoding):
super(Utf8Encoding, self).__init__(
'utf8',
(0, 127),
- ['byte_order_mark',
- 'non_primary_whitespace',
+ ['non_primary_whitespace',
'non_primary_letter',
'non_primary_identifier_part_not_letter',
'non_primary_line_terminator',
@@ -532,7 +529,6 @@ class Utf8Encoding(KeyEncoding):
self.add_predefined_range(
'whitespace',
[(9, 9), (11, 12), (32, 32),
- self.class_range('byte_order_mark'),
self.class_range('non_primary_whitespace')])
self.add_predefined_range(
'letter', [(65, 90), (97, 122),
self.class_range('non_primary_letter')])
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.