Revision: 19055
Author: [email protected]
Date: Tue Feb 4 10:36:32 2014 UTC
Log: Experimental parser: cleanup NfaBuilder a bit
[email protected]
BUG=
Review URL: https://codereview.chromium.org/144643008
http://code.google.com/p/v8/source/detail?r=19055
Modified:
/branches/experimental/parser/tools/lexer_generator/nfa_builder.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Tue
Feb 4 08:58:24 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Tue
Feb 4 10:36:32 2014 UTC
@@ -41,14 +41,14 @@
class NfaBuilder(object):
- def __init__(self, encoding, character_classes):
+ def __init__(self, encoding, character_classes, subtree_map):
self.__node_number = 0
- self.__operation_map = {}
- self.__members = getmembers(self)
self.__encoding = encoding
self.__character_classes = character_classes
self.__states = []
self.__global_end_node = None
+ self.__operation_map = None
+ self.__subtree_map = subtree_map
def __new_state(self):
self.__node_number += 1
@@ -89,16 +89,18 @@
return (start, ends + [start])
def __repeat(self, param_min, param_max, subtree):
- param_min = int(param_min)
- param_max = int(param_max)
+ 'process regex of form subtree{param_min, param_max}'
+ (param_min, param_max) = (int(param_min), int(param_max))
+ assert param_min > 1 and param_min <= param_max
(start, ends) = self.__process(subtree)
+ # create a chain of param_min subtrees
for i in xrange(1, param_min):
(start2, ends2) = self.__process(subtree)
self.__patch_ends(ends, start2)
ends = ends2
if param_min == param_max:
return (start, ends)
-
+ # join in (param_max - param_min) optional subtrees
midpoints = []
for i in xrange(param_min, param_max):
midpoint = self.__new_state()
@@ -106,7 +108,6 @@
(start2, ends) = self.__process(subtree)
midpoint.add_epsilon_transition(start2)
midpoints.append(midpoint)
-
return (start, ends + midpoints)
def __cat(self, left_tree, right_tree):
@@ -140,12 +141,14 @@
end = self.__new_state()
self.__patch_ends(ends, end)
end.set_action(action)
+ # Force all match actions to be terminal.
if action.match_action():
global_end = self.__global_end()
end.add_epsilon_transition(global_end)
return (start, [end])
def __continue(self, subtree):
+ 'add an epsilon transitions to the start node of the current subtree'
(start, ends) = self.__process(subtree)
state = self.__peek_state()
if not state['start_node']:
@@ -156,8 +159,8 @@
def __unique_key(self, name):
return self.__key_state(TransitionKey.unique(name))
- def __join(self, tree, subtree):
- (subtree_start, subtree_end, nodes_in_subtree) = self.__nfa(subtree)
+ def __join(self, tree, name):
+ (subtree_start, subtree_end, nodes_in_subtree) = self.__nfa(name)
(start, ends) = self.__process(tree)
self.__patch_ends(ends, subtree_start)
if subtree_end:
@@ -165,22 +168,29 @@
else:
return (start, [])
+ def __get_method(self, name):
+ 'lazily build map of all private bound methods and return the one
requested'
+ if not self.__operation_map:
+ prefix = "_NfaBuilder__"
+ self.__operation_map = {name[len(prefix):] : f for (name, f) in
+ filter(lambda (name, f): name.startswith(prefix),
getmembers(self))}
+ return self.__operation_map[name]
+
def __process(self, term):
assert isinstance(term, Term)
- method = "_NfaBuilder__" + term.name().lower()
- if not method in self.__operation_map:
- matches = filter(lambda (name, func): name == method, self.__members)
- assert len(matches) == 1
- self.__operation_map[method] = matches[0][1]
- return self.__operation_map[method](*term.args())
+ method = self.__get_method(term.name().lower())
+ return method(*term.args())
def __patch_ends(self, ends, new_end):
for end in ends:
end.close(new_end)
- def __push_state(self):
+ def __push_state(self, name):
+ for state in self.__states:
+ assert state['name'] != name, 'recursive subgraph'
self.__states.append({
'start_node' : None,
+ 'name' : name
})
def __pop_state(self):
@@ -189,11 +199,13 @@
def __peek_state(self):
return self.__states[-1]
- def __nfa(self, term):
+ def __nfa(self, name):
+ tree = self.__subtree_map[name]
start_node_number = self.__node_number
- self.__push_state()
- (start, ends) = self.__process(term)
+ self.__push_state(name)
+ (start, ends) = self.__process(tree)
state = self.__pop_state()
+ # move saved start node into start
if state['start_node']:
state['start_node'].close(start)
start = state['start_node']
@@ -239,22 +251,19 @@
del transitions[catch_all]
@staticmethod
- def nfa(encoding, character_classes, rule_term):
-
- self = NfaBuilder(encoding, character_classes)
- (start, end, nodes_created) = self.__nfa(rule_term)
-
+ def nfa(encoding, character_classes, subtree_map, name):
+ self = NfaBuilder(encoding, character_classes, subtree_map)
+ (start, end, nodes_created) = self.__nfa(name)
+ # close all ends
global_end_to_return = self.__global_end_node or end
- assert global_end_to_return
+ assert global_end_to_return, "no terminal nodes, default tree
continues"
if end and self.__global_end_node:
end.close(self.__global_end_node)
-
global_end_to_return.close(None)
-
+ # Prepare the nfa
self.__compute_epsilon_closures(start)
f = lambda node, state: self.__replace_catch_all(self.__encoding, node)
Automaton.visit_states(set([start]), f)
-
return Nfa(self.__encoding, start, global_end_to_return, nodes_created)
@staticmethod
@@ -270,8 +279,8 @@
return Term('UNIQUE_KEY', name)
@staticmethod
- def join_subgraph(tree, subtree):
- return Term('JOIN', tree, subtree)
+ def join_subtree(tree, subtree_name):
+ return Term('JOIN', tree, subtree_name)
@staticmethod
def or_terms(terms):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue
Feb 4 08:58:24 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue
Feb 4 10:36:32 2014 UTC
@@ -249,7 +249,6 @@
def __init__(self, parser_state):
self.__automata = {}
- self.__rule_trees = {}
self.__default_action = None
self.__process_parser_state(parser_state)
@@ -269,22 +268,28 @@
return self.__default_action
class Automata(object):
+ 'a container for the resulting automata, which are lazily built'
- def __init__(self, encoding, character_classes, rule_term):
+ def __init__(self, encoding, character_classes, rule_map, name):
self.__encoding = encoding
self.__character_classes = character_classes
- self.__rule_term = rule_term
+ self.__rule_map = rule_map
+ self.__name = name
self.__nfa = None
self.__dfa = None
self.__minimial_dfa = None
+ def name(self):
+ return self.__name
+
def rule_term(self):
- return self.__rule_term
+ return self.__rule_map[self.__name]
def nfa(self):
if not self.__nfa:
self.__nfa = NfaBuilder.nfa(
- self.__encoding, self.__character_classes, self.__rule_term)
+ self.__encoding, self.__character_classes,
+ self.__rule_map, self.__name)
return self.__nfa
def dfa(self):
@@ -305,37 +310,30 @@
return self.__minimial_dfa
def __process_parser_state(self, parser_state):
+ assert 'default' in parser_state.rules
rule_map = {}
- assert 'default' in parser_state.rules
- def process(subgraph, v):
- graphs = []
- for graph, precedence, action in v['regex']:
+ def process(tree_name, v):
+ trees = []
+ for tree, precedence, action in v['regex']:
(entry_action, match_action, transition) = action
if entry_action or match_action:
- graph = NfaBuilder.add_action(
- graph, Action(entry_action, match_action, precedence))
+ tree = NfaBuilder.add_action(
+ tree, Action(entry_action, match_action, precedence))
if not transition:
pass
elif transition == 'continue':
- assert not subgraph == 'default', 'unimplemented'
- graph = NfaBuilder.add_continue(graph)
+ tree = NfaBuilder.add_continue(tree)
else:
- assert subgraph == 'default', 'unimplemented'
- graph = NfaBuilder.join_subgraph(
- graph, rule_map[transition])
- graphs.append(graph)
- graph = NfaBuilder.or_terms(graphs)
- rule_map[subgraph] = graph
- # process first the subgraphs, then the default graph
+ tree = NfaBuilder.join_subtree(tree, transition)
+ trees.append(tree)
+ rule_map[tree_name] = NfaBuilder.or_terms(trees)
+ # process all subgraphs
for k, v in parser_state.rules.items():
- if k == 'default': continue
process(k, v)
- process('default', parser_state.rules['default'])
# build the automata
- for rule_name, graph in rule_map.items():
- self.__automata[rule_name] = RuleProcessor.Automata(
- parser_state.encoding, parser_state.character_classes, graph)
- self.__rule_trees[rule_name] = graph
+ for name, tree in rule_map.items():
+ self.__automata[name] = RuleProcessor.Automata(
+ parser_state.encoding, parser_state.character_classes, rule_map,
name)
# process default_action
default_action = parser_state.rules['default']['default_action']
self.__default_action = Action(Term.empty_term(), default_action)
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.