Revision: 19181
Author:   [email protected]
Date:     Fri Feb  7 08:31:41 2014 UTC
Log:      Experimental parser: cleanup RegexParser

[email protected]

BUG=

Review URL: https://codereview.chromium.org/157013003
http://code.google.com/p/v8/source/detail?r=19181

Modified:
 /branches/experimental/parser/tools/lexer_generator/nfa_builder.py
 /branches/experimental/parser/tools/lexer_generator/regex_parser.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Thu Feb 6 16:38:00 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Fri Feb 7 08:31:41 2014 UTC
@@ -295,16 +295,6 @@
   def join_subtree(tree, subtree_name):
     return Term('JOIN', tree, subtree_name)

-  @staticmethod
-  def or_terms(terms):
-    if len(terms) == 1: return terms[0]
-    return Term('OR', *terms) if terms else Term.empty()
-
-  @staticmethod
-  def cat_terms(terms):
-    if len(terms) == 1: return terms[0]
-    return Term('CAT', *terms) if terms else Term.empty()
-
   __modifer_map = {
     '+': 'ONE_OR_MORE',
     '?': 'ZERO_OR_ONE',
@@ -314,3 +304,50 @@
   @staticmethod
   def apply_modifier(modifier, term):
     return Term(NfaBuilder.__modifer_map[modifier], term)
+
+  @staticmethod
+  def __flatten_terms(terms, name):
+    for term in terms:
+      assert isinstance(term, Term)
+      if not term:
+        continue
+      if term.name() == name:
+        for arg in term.args():
+          if arg:
+            yield arg
+      else:
+        yield term
+
+  @staticmethod
+  def __flatten_literals(terms):
+    literal = None
+    for term in terms:
+      assert isinstance(term, Term)
+      if not term:
+        continue
+      if term.name() == 'LITERAL':
+        if literal:
+          literal += term.args()[0]
+        else:
+          literal = term.args()[0]
+      else:
+        if literal:
+          yield Term('LITERAL', literal)
+          literal = None
+        if term:
+          yield term
+    if literal:
+      yield Term('LITERAL', literal)
+
+  @staticmethod
+  def or_terms(terms):
+    terms = list(NfaBuilder.__flatten_terms(terms, 'OR'))
+    assert terms
+    return terms[0] if len(terms) == 1 else Term('OR', *terms)
+
+  @staticmethod
+  def cat_terms(terms):
+    terms = NfaBuilder.__flatten_terms(terms, 'CAT')
+    terms = list(NfaBuilder.__flatten_literals(terms))
+    assert terms
+    return terms[0] if len(terms) == 1 else Term('CAT', *terms)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Thu Feb 6 19:35:17 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Fri Feb 7 08:31:41 2014 UTC
@@ -27,9 +27,8 @@

 import ply.lex as lex
 import ply.yacc as yacc
-from types import ListType, TupleType
-from regex_lexer import RegexLexer
 from action import Term
+from nfa_builder import NfaBuilder

 class ParserBuilder:

@@ -41,11 +40,10 @@
       pass

     def warning(self,msg,*args,**kwargs):
-      pass
-      # assert False, "warning: "+ (msg % args) + "\n"
+      raise Exception("warning: "+ (msg % args) + "\n")

     def error(self,msg,*args,**kwargs):
-      assert False, "error: "+ (msg % args) + "\n"
+      raise Exception("error: "+ (msg % args) + "\n")

   __static_instances = {}
   @staticmethod
@@ -105,7 +103,6 @@
     'RANGE',
     'NOT',
     'CLASS_LITERAL',
-    'CLASS_LITERAL_AS_OCTAL',
     'CHARACTER_CLASS',
   )

@@ -150,6 +147,8 @@

   def t_class_CLASS_LITERAL_AS_OCTAL(self, t):
     r'\\\d+'
+    t.type = 'CLASS_LITERAL'
+    t.value = chr(int(t.value[1:], 8))
     return t

   __escaped_class_literals = build_escape_map("^[]-:\\")
@@ -188,7 +187,6 @@
     '+': 'ONE_OR_MORE',
     '?': 'ZERO_OR_ONE',
     '*': 'ZERO_OR_MORE',
-    '|': 'OR',
     '.': 'ANY',
   }

@@ -198,7 +196,7 @@
     if len(p) == 2:
       p[0] = p[1]
     else:
-      p[0] = Term(self.token_map[p[2]], p[1], p[3])
+      p[0] = NfaBuilder.or_terms([p[1], p[3]])

   def p_fragments(self, p):
     '''fragments : fragment
@@ -209,16 +207,16 @@
       p[0] = self.__cat(p[1], p[2])

   def p_fragment(self, p):
-    '''fragment : literal_array maybe_modifier
+    '''fragment : literal maybe_modifier
                 | class maybe_modifier
                 | group maybe_modifier
                 | any maybe_modifier
     '''
-    if p[2] != None:
-      if isinstance(p[2], tuple) and p[2][0] == 'REPEAT':
+    if p[2]:
+      if p[2][0] == 'REPEAT':
         p[0] = Term(p[2][0], p[2][1], p[2][2], p[1])
       else:
-        p[0] = Term(p[2], p[1])
+        p[0] = Term(p[2][0], p[1])
     else:
       p[0] = p[1]

@@ -226,36 +224,21 @@
     '''maybe_modifier : ONE_OR_MORE
                       | ZERO_OR_ONE
                       | ZERO_OR_MORE
-                      | repetition
+                      | REPEAT_BEGIN NUMBER REPEAT_END
+                      | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END
                       | empty'''
-    p[0] = p[1]
-    if p[1] in self.token_map:
-      p[0] = self.token_map[p[1]]
-
-  def p_repetition(self, p):
-    '''repetition : REPEAT_BEGIN NUMBER REPEAT_END
-                  | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END'''
     if len(p) == 4:
       p[0] = ("REPEAT", p[2], p[2])
-    else:
+    elif len(p) == 5:
       p[0] = ("REPEAT", p[2], p[4])
-
-  def p_literal_array(self, p):
-    '''literal_array : literals'''
-    p[0] = Term('LITERAL', ''.join(reversed(p[1])))
-
-  def p_literals(self, p):
-    '''literals : LITERAL maybe_literals'''
-    if not p[2]:
-      p[0] = [p[1]]
+    elif p[1]:
+      p[0] = (self.token_map[p[1]],)
     else:
-      p[2].append(p[1])
-      p[0] = p[2]
+      p[0] = None

-  def p_maybe_literals(self, p):
-    '''maybe_literals : literals
-                      |  empty'''
-    p[0] = p[1]
+  def p_literal(self, p):
+    '''literal : LITERAL'''
+    p[0] = Term('LITERAL', p[1])

   def p_any(self, p):
     '''any : ANY'''
@@ -277,15 +260,12 @@
'''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content
                      | CLASS_LITERAL maybe_class_content
                      | CHARACTER_CLASS maybe_class_content
-                     | CLASS_LITERAL_AS_OCTAL maybe_class_content
     '''
     if len(p) == 5:
       left = Term("RANGE", p[1], p[3])
     else:
       if len(p[1]) == 1:
         left = Term('LITERAL', p[1])
-      elif p[1][0] == '\\':
-        left = Term('LITERAL', chr(int(p[1][1:], 8)))
       else:
         left = Term('CHARACTER_CLASS', p[1][1:-1])
     p[0] = self.__cat(left, p[len(p)-1])
@@ -304,7 +284,7 @@
   @staticmethod
   def __cat(left, right):
     assert left
-    return left if not right else Term('CAT', left, right)
+    return NfaBuilder.cat_terms([left] if not right else [left, right])

   @staticmethod
   def parse(string):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Mon Feb 3 21:28:33 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Fri Feb 7 08:31:41 2014 UTC
@@ -188,9 +188,10 @@
     if key == 'RANGE':
       ranges.append((ord(args[0]), ord(args[1])))
     elif key == 'LITERAL':
-      ranges.append((ord(args[0]), ord(args[0])))
+      for char in args[0]:
+        ranges.append((ord(char), ord(char)))
     elif key == 'CAT':
-      for x in [args[0], args[1]]:
+      for x in args:
         TransitionKey.__process_term(encoding, x, ranges, key_map)
     elif key == 'CHARACTER_CLASS':
       class_name = args[0]

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to