Revision: 19055
Author:   [email protected]
Date:     Tue Feb  4 10:36:32 2014 UTC
Log:      Experimental parser: cleanup NfaBuilder a bit

[email protected]

BUG=

Review URL: https://codereview.chromium.org/144643008
http://code.google.com/p/v8/source/detail?r=19055

Modified:
 /branches/experimental/parser/tools/lexer_generator/nfa_builder.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Tue Feb 4 08:58:24 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Tue Feb 4 10:36:32 2014 UTC
@@ -41,14 +41,14 @@

 class NfaBuilder(object):

-  def __init__(self, encoding, character_classes):
+  def __init__(self, encoding, character_classes, subtree_map):
     self.__node_number = 0
-    self.__operation_map = {}
-    self.__members = getmembers(self)
     self.__encoding = encoding
     self.__character_classes = character_classes
     self.__states = []
     self.__global_end_node = None
+    self.__operation_map = None
+    self.__subtree_map = subtree_map

   def __new_state(self):
     self.__node_number += 1
@@ -89,16 +89,18 @@
     return (start, ends + [start])

   def __repeat(self, param_min, param_max, subtree):
-    param_min = int(param_min)
-    param_max = int(param_max)
+    'process regex of form subtree{param_min, param_max}'
+    (param_min, param_max) = (int(param_min), int(param_max))
+    assert param_min > 1 and param_min <= param_max
     (start, ends) = self.__process(subtree)
+    # create a chain of param_min subtrees
     for i in xrange(1, param_min):
       (start2, ends2) = self.__process(subtree)
       self.__patch_ends(ends, start2)
       ends = ends2
     if param_min == param_max:
       return (start, ends)
-
+    # join in (param_max - param_min) optional subtrees
     midpoints = []
     for i in xrange(param_min, param_max):
       midpoint =  self.__new_state()
@@ -106,7 +108,6 @@
       (start2, ends) = self.__process(subtree)
       midpoint.add_epsilon_transition(start2)
       midpoints.append(midpoint)
-
     return (start, ends + midpoints)

   def __cat(self, left_tree, right_tree):
@@ -140,12 +141,14 @@
     end = self.__new_state()
     self.__patch_ends(ends, end)
     end.set_action(action)
+    # Force all match actions to be terminal.
     if action.match_action():
       global_end = self.__global_end()
       end.add_epsilon_transition(global_end)
     return (start, [end])

   def __continue(self, subtree):
+    'add an epsilon transitions to the start node of the current subtree'
     (start, ends) = self.__process(subtree)
     state = self.__peek_state()
     if not state['start_node']:
@@ -156,8 +159,8 @@
   def __unique_key(self, name):
     return self.__key_state(TransitionKey.unique(name))

-  def __join(self, tree, subtree):
-    (subtree_start, subtree_end, nodes_in_subtree) = self.__nfa(subtree)
+  def __join(self, tree, name):
+    (subtree_start, subtree_end, nodes_in_subtree) = self.__nfa(name)
     (start, ends) = self.__process(tree)
     self.__patch_ends(ends, subtree_start)
     if subtree_end:
@@ -165,22 +168,29 @@
     else:
       return (start, [])

+  def __get_method(self, name):
+ 'lazily build map of all private bound methods and return the one requested'
+    if not self.__operation_map:
+      prefix = "_NfaBuilder__"
+      self.__operation_map = {name[len(prefix):] : f for (name, f) in
+ filter(lambda (name, f): name.startswith(prefix), getmembers(self))}
+    return self.__operation_map[name]
+
   def __process(self, term):
     assert isinstance(term, Term)
-    method = "_NfaBuilder__" + term.name().lower()
-    if not method in self.__operation_map:
-      matches = filter(lambda (name, func): name == method, self.__members)
-      assert len(matches) == 1
-      self.__operation_map[method] = matches[0][1]
-    return self.__operation_map[method](*term.args())
+    method = self.__get_method(term.name().lower())
+    return method(*term.args())

   def __patch_ends(self, ends, new_end):
     for end in ends:
       end.close(new_end)

-  def __push_state(self):
+  def __push_state(self, name):
+    for state in self.__states:
+      assert state['name'] != name, 'recursive subgraph'
     self.__states.append({
       'start_node' : None,
+      'name' : name
     })

   def __pop_state(self):
@@ -189,11 +199,13 @@
   def __peek_state(self):
     return self.__states[-1]

-  def __nfa(self, term):
+  def __nfa(self, name):
+    tree = self.__subtree_map[name]
     start_node_number = self.__node_number
-    self.__push_state()
-    (start, ends) = self.__process(term)
+    self.__push_state(name)
+    (start, ends) = self.__process(tree)
     state = self.__pop_state()
+    # move saved start node into start
     if state['start_node']:
       state['start_node'].close(start)
       start = state['start_node']
@@ -239,22 +251,19 @@
     del transitions[catch_all]

   @staticmethod
-  def nfa(encoding, character_classes, rule_term):
-
-    self = NfaBuilder(encoding, character_classes)
-    (start, end, nodes_created) = self.__nfa(rule_term)
-
+  def nfa(encoding, character_classes, subtree_map, name):
+    self = NfaBuilder(encoding, character_classes, subtree_map)
+    (start, end, nodes_created) = self.__nfa(name)
+    # close all ends
     global_end_to_return = self.__global_end_node or end
-    assert global_end_to_return
+ assert global_end_to_return, "no terminal nodes, default tree continues"
     if end and self.__global_end_node:
       end.close(self.__global_end_node)
-
     global_end_to_return.close(None)
-
+    # Prepare the nfa
     self.__compute_epsilon_closures(start)
     f = lambda node, state: self.__replace_catch_all(self.__encoding, node)
     Automaton.visit_states(set([start]), f)
-
     return Nfa(self.__encoding, start, global_end_to_return, nodes_created)

   @staticmethod
@@ -270,8 +279,8 @@
     return Term('UNIQUE_KEY', name)

   @staticmethod
-  def join_subgraph(tree, subtree):
-    return Term('JOIN', tree, subtree)
+  def join_subtree(tree, subtree_name):
+    return Term('JOIN', tree, subtree_name)

   @staticmethod
   def or_terms(terms):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue Feb 4 08:58:24 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue Feb 4 10:36:32 2014 UTC
@@ -249,7 +249,6 @@

   def __init__(self, parser_state):
     self.__automata = {}
-    self.__rule_trees = {}
     self.__default_action = None
     self.__process_parser_state(parser_state)

@@ -269,22 +268,28 @@
     return self.__default_action

   class Automata(object):
+    'a container for the resulting automata, which are lazily built'

-    def __init__(self, encoding, character_classes, rule_term):
+    def __init__(self, encoding, character_classes, rule_map, name):
       self.__encoding = encoding
       self.__character_classes = character_classes
-      self.__rule_term = rule_term
+      self.__rule_map = rule_map
+      self.__name = name
       self.__nfa = None
       self.__dfa = None
       self.__minimial_dfa = None

+    def name(self):
+      return self.__name
+
     def rule_term(self):
-      return self.__rule_term
+      return self.__rule_map[self.__name]

     def nfa(self):
       if not self.__nfa:
         self.__nfa = NfaBuilder.nfa(
-          self.__encoding, self.__character_classes, self.__rule_term)
+          self.__encoding, self.__character_classes,
+          self.__rule_map, self.__name)
       return self.__nfa

     def dfa(self):
@@ -305,37 +310,30 @@
       return self.__minimial_dfa

   def __process_parser_state(self, parser_state):
+    assert 'default' in parser_state.rules
     rule_map = {}
-    assert 'default' in parser_state.rules
-    def process(subgraph, v):
-      graphs = []
-      for graph, precedence, action in v['regex']:
+    def process(tree_name, v):
+      trees = []
+      for tree, precedence, action in v['regex']:
         (entry_action, match_action, transition) = action
         if entry_action or match_action:
-          graph = NfaBuilder.add_action(
-            graph, Action(entry_action, match_action, precedence))
+          tree = NfaBuilder.add_action(
+            tree, Action(entry_action, match_action, precedence))
         if not transition:
           pass
         elif transition == 'continue':
-          assert not subgraph == 'default', 'unimplemented'
-          graph = NfaBuilder.add_continue(graph)
+          tree = NfaBuilder.add_continue(tree)
         else:
-          assert subgraph == 'default', 'unimplemented'
-          graph = NfaBuilder.join_subgraph(
-            graph, rule_map[transition])
-        graphs.append(graph)
-      graph = NfaBuilder.or_terms(graphs)
-      rule_map[subgraph] = graph
-    # process first the subgraphs, then the default graph
+          tree = NfaBuilder.join_subtree(tree, transition)
+        trees.append(tree)
+      rule_map[tree_name] = NfaBuilder.or_terms(trees)
+    # process all subgraphs
     for k, v in parser_state.rules.items():
-      if k == 'default': continue
       process(k, v)
-    process('default', parser_state.rules['default'])
     # build the automata
-    for rule_name, graph in rule_map.items():
-      self.__automata[rule_name] = RuleProcessor.Automata(
-        parser_state.encoding, parser_state.character_classes, graph)
-      self.__rule_trees[rule_name] = graph
+    for name, tree in rule_map.items():
+      self.__automata[name] = RuleProcessor.Automata(
+ parser_state.encoding, parser_state.character_classes, rule_map, name)
     # process default_action
     default_action = parser_state.rules['default']['default_action']
     self.__default_action = Action(Term.empty_term(), default_action)

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to