Revision: 19181
Author: [email protected]
Date: Fri Feb 7 08:31:41 2014 UTC
Log: Experimental parser: cleanup RegexParser
[email protected]
BUG=
Review URL: https://codereview.chromium.org/157013003
http://code.google.com/p/v8/source/detail?r=19181
Modified:
/branches/experimental/parser/tools/lexer_generator/nfa_builder.py
/branches/experimental/parser/tools/lexer_generator/regex_parser.py
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Thu
Feb 6 16:38:00 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Fri
Feb 7 08:31:41 2014 UTC
@@ -295,16 +295,6 @@
def join_subtree(tree, subtree_name):
return Term('JOIN', tree, subtree_name)
- @staticmethod
- def or_terms(terms):
- if len(terms) == 1: return terms[0]
- return Term('OR', *terms) if terms else Term.empty()
-
- @staticmethod
- def cat_terms(terms):
- if len(terms) == 1: return terms[0]
- return Term('CAT', *terms) if terms else Term.empty()
-
__modifer_map = {
'+': 'ONE_OR_MORE',
'?': 'ZERO_OR_ONE',
@@ -314,3 +304,50 @@
@staticmethod
def apply_modifier(modifier, term):
return Term(NfaBuilder.__modifer_map[modifier], term)
+
+ @staticmethod
+ def __flatten_terms(terms, name):
+ for term in terms:
+ assert isinstance(term, Term)
+ if not term:
+ continue
+ if term.name() == name:
+ for arg in term.args():
+ if arg:
+ yield arg
+ else:
+ yield term
+
+ @staticmethod
+ def __flatten_literals(terms):
+ literal = None
+ for term in terms:
+ assert isinstance(term, Term)
+ if not term:
+ continue
+ if term.name() == 'LITERAL':
+ if literal:
+ literal += term.args()[0]
+ else:
+ literal = term.args()[0]
+ else:
+ if literal:
+ yield Term('LITERAL', literal)
+ literal = None
+ if term:
+ yield term
+ if literal:
+ yield Term('LITERAL', literal)
+
+ @staticmethod
+ def or_terms(terms):
+ terms = list(NfaBuilder.__flatten_terms(terms, 'OR'))
+ assert terms
+ return terms[0] if len(terms) == 1 else Term('OR', *terms)
+
+ @staticmethod
+ def cat_terms(terms):
+ terms = NfaBuilder.__flatten_terms(terms, 'CAT')
+ terms = list(NfaBuilder.__flatten_literals(terms))
+ assert terms
+ return terms[0] if len(terms) == 1 else Term('CAT', *terms)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Thu
Feb 6 19:35:17 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Fri
Feb 7 08:31:41 2014 UTC
@@ -27,9 +27,8 @@
import ply.lex as lex
import ply.yacc as yacc
-from types import ListType, TupleType
-from regex_lexer import RegexLexer
from action import Term
+from nfa_builder import NfaBuilder
class ParserBuilder:
@@ -41,11 +40,10 @@
pass
def warning(self,msg,*args,**kwargs):
- pass
- # assert False, "warning: "+ (msg % args) + "\n"
+ raise Exception("warning: "+ (msg % args) + "\n")
def error(self,msg,*args,**kwargs):
- assert False, "error: "+ (msg % args) + "\n"
+ raise Exception("error: "+ (msg % args) + "\n")
__static_instances = {}
@staticmethod
@@ -105,7 +103,6 @@
'RANGE',
'NOT',
'CLASS_LITERAL',
- 'CLASS_LITERAL_AS_OCTAL',
'CHARACTER_CLASS',
)
@@ -150,6 +147,8 @@
def t_class_CLASS_LITERAL_AS_OCTAL(self, t):
r'\\\d+'
+ t.type = 'CLASS_LITERAL'
+ t.value = chr(int(t.value[1:], 8))
return t
__escaped_class_literals = build_escape_map("^[]-:\\")
@@ -188,7 +187,6 @@
'+': 'ONE_OR_MORE',
'?': 'ZERO_OR_ONE',
'*': 'ZERO_OR_MORE',
- '|': 'OR',
'.': 'ANY',
}
@@ -198,7 +196,7 @@
if len(p) == 2:
p[0] = p[1]
else:
- p[0] = Term(self.token_map[p[2]], p[1], p[3])
+ p[0] = NfaBuilder.or_terms([p[1], p[3]])
def p_fragments(self, p):
'''fragments : fragment
@@ -209,16 +207,16 @@
p[0] = self.__cat(p[1], p[2])
def p_fragment(self, p):
- '''fragment : literal_array maybe_modifier
+ '''fragment : literal maybe_modifier
| class maybe_modifier
| group maybe_modifier
| any maybe_modifier
'''
- if p[2] != None:
- if isinstance(p[2], tuple) and p[2][0] == 'REPEAT':
+ if p[2]:
+ if p[2][0] == 'REPEAT':
p[0] = Term(p[2][0], p[2][1], p[2][2], p[1])
else:
- p[0] = Term(p[2], p[1])
+ p[0] = Term(p[2][0], p[1])
else:
p[0] = p[1]
@@ -226,36 +224,21 @@
'''maybe_modifier : ONE_OR_MORE
| ZERO_OR_ONE
| ZERO_OR_MORE
- | repetition
+ | REPEAT_BEGIN NUMBER REPEAT_END
+ | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END
| empty'''
- p[0] = p[1]
- if p[1] in self.token_map:
- p[0] = self.token_map[p[1]]
-
- def p_repetition(self, p):
- '''repetition : REPEAT_BEGIN NUMBER REPEAT_END
- | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END'''
if len(p) == 4:
p[0] = ("REPEAT", p[2], p[2])
- else:
+ elif len(p) == 5:
p[0] = ("REPEAT", p[2], p[4])
-
- def p_literal_array(self, p):
- '''literal_array : literals'''
- p[0] = Term('LITERAL', ''.join(reversed(p[1])))
-
- def p_literals(self, p):
- '''literals : LITERAL maybe_literals'''
- if not p[2]:
- p[0] = [p[1]]
+ elif p[1]:
+ p[0] = (self.token_map[p[1]],)
else:
- p[2].append(p[1])
- p[0] = p[2]
+ p[0] = None
- def p_maybe_literals(self, p):
- '''maybe_literals : literals
- | empty'''
- p[0] = p[1]
+ def p_literal(self, p):
+ '''literal : LITERAL'''
+ p[0] = Term('LITERAL', p[1])
def p_any(self, p):
'''any : ANY'''
@@ -277,15 +260,12 @@
'''class_content : CLASS_LITERAL RANGE CLASS_LITERAL
maybe_class_content
| CLASS_LITERAL maybe_class_content
| CHARACTER_CLASS maybe_class_content
- | CLASS_LITERAL_AS_OCTAL maybe_class_content
'''
if len(p) == 5:
left = Term("RANGE", p[1], p[3])
else:
if len(p[1]) == 1:
left = Term('LITERAL', p[1])
- elif p[1][0] == '\\':
- left = Term('LITERAL', chr(int(p[1][1:], 8)))
else:
left = Term('CHARACTER_CLASS', p[1][1:-1])
p[0] = self.__cat(left, p[len(p)-1])
@@ -304,7 +284,7 @@
@staticmethod
def __cat(left, right):
assert left
- return left if not right else Term('CAT', left, right)
+ return NfaBuilder.cat_terms([left] if not right else [left, right])
@staticmethod
def parse(string):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Mon Feb 3 21:28:33 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Fri Feb 7 08:31:41 2014 UTC
@@ -188,9 +188,10 @@
if key == 'RANGE':
ranges.append((ord(args[0]), ord(args[1])))
elif key == 'LITERAL':
- ranges.append((ord(args[0]), ord(args[0])))
+ for char in args[0]:
+ ranges.append((ord(char), ord(char)))
elif key == 'CAT':
- for x in [args[0], args[1]]:
+ for x in args:
TransitionKey.__process_term(encoding, x, ranges, key_map)
elif key == 'CHARACTER_CLASS':
class_name = args[0]
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.