Reviewers: ,

Message:
Committed patchset #3 manually as r17882.

Description:
Experimental lexer generator: refactor(TransitionKey) += comments.

- Add __init__
- Add comments
- Rename key_matches -> is_superset_of
- reduce -> max

BUG=

Committed: https://code.google.com/p/v8/source/detail?r=17882

Please review this at https://codereview.chromium.org/63943005/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+55, -22 lines):
  M tools/lexer_generator/dfa.py
  M tools/lexer_generator/nfa.py
  M tools/lexer_generator/transition_keys.py


Index: tools/lexer_generator/dfa.py
diff --git a/tools/lexer_generator/dfa.py b/tools/lexer_generator/dfa.py
index d648cf6343977379331f05d7bd47ef4039abd4a2..d191e0518550f014d15076cf259b102dfd77786b 100644
--- a/tools/lexer_generator/dfa.py
+++ b/tools/lexer_generator/dfa.py
@@ -66,7 +66,7 @@ class DfaState(AutomatonState):
     return self.__matches(lambda k, v : k.matches_char(v), value)

   def key_matches(self, value):
-    return self.__matches(lambda k, v : k.matches_key(v), value)
+    return self.__matches(lambda k, v : k.is_superset_of_key(v), value)

 class Dfa(Automaton):

Index: tools/lexer_generator/nfa.py
diff --git a/tools/lexer_generator/nfa.py b/tools/lexer_generator/nfa.py
index 73ff0ceb4f6bdcf05a7dda4adf2910138d6f8c0b..24870e5e9036c90b8264b28526842bfa5a5b867f 100644
--- a/tools/lexer_generator/nfa.py
+++ b/tools/lexer_generator/nfa.py
@@ -105,7 +105,7 @@ class NfaState(AutomatonState):
     return self.__matches(lambda k, v : k.matches_char(v), value)

   def key_matches(self, value):
-    return self.__matches(lambda k, v : k.matches_key(v), value)
+    return self.__matches(lambda k, v : k.is_superset_of_key(v), value)

 class Nfa(Automaton):

Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py b/tools/lexer_generator/transition_keys.py index 98332ec846573ddd05dd6dc7ce19963da01895e1..d0eb5b1f16d069ebe5862b749aa907f41ab02ba0 100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -28,6 +28,13 @@
 from string import printable

 class TransitionKey:
+  '''Represents a transition from a state in DFA or NFA to another state.
+
+ A transition key has a list of character ranges and a list of class ranges
+  (e.g., "whitespace"), defining for which characters the transition
+  happens. When we generate code based on the transition key, the character
+ ranges generate simple checks and the class ranges generate more complicated
+  conditions, e.g., function calls.'''

   __class_bounds = {
     'latin_1' : (1, 255),
@@ -39,7 +46,7 @@ class TransitionKey:
     'zero' : (259, 259),
   }
   __lower_bound = 1
- __upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]), __class_bounds.items(), 0)
+  __upper_bound = max(__class_bounds.values(), key=lambda item: item[1])[1]

   __cached_keys = {}

@@ -78,19 +85,6 @@ class TransitionKey:
         assert r_is_class
       last = r

-  @staticmethod
-  def __create(ranges, name = None):
-    if not ranges:
-      assert name == 'epsilon'
-      assert not name in TransitionKey.__cached_keys
-    else:
-      TransitionKey.__verify_ranges(ranges, True)
-    key = TransitionKey()
-    key.__ranges = tuple(ranges) # immutable
-    key.__name = name
-    key.__cached_hash = None
-    return key
-
   def __is_unique(self):
return len(self.__ranges) == 1 and self.__is_unique_range(self.__ranges[0])

@@ -98,15 +92,17 @@ class TransitionKey:
   def __cached_key(name, bounds_getter):
     if not name in TransitionKey.__cached_keys:
       bounds = bounds_getter(name)
- TransitionKey.__cached_keys[name] = TransitionKey.__create(bounds, name)
+      TransitionKey.__cached_keys[name] = TransitionKey(bounds, name)
     return TransitionKey.__cached_keys[name]

   @staticmethod
   def epsilon():
+    '''Returns a TransitionKey for the epsilon (empty) transition.'''
     return TransitionKey.__cached_key('epsilon', lambda name : [])

   @staticmethod
   def any():
+    '''Returns a TransitionKey which matches any character.'''
     def bounds_getter(name):
       bounds = TransitionKey.__class_bounds.values()
       bounds.sort()
@@ -115,12 +111,17 @@ class TransitionKey:

   @staticmethod
   def single_char(char):
+    '''Returns a TransitionKey for a single-character transition.'''
     char = ord(char)
     assert TransitionKey.__in_latin_1(char)
-    return TransitionKey.__create([(char, char)])
+    return TransitionKey([(char, char)])

   @staticmethod
   def unique(name):
+ '''Returns a unique TransitionKey for the given name (and creates it if it + doesn't exist yet). The TransitionKey won't have any real character range, + but a newly-created "mock" character range which is separate from all other
+    character ranges.'''
     def get_bounds(name):
       bound = TransitionKey.__unique_key_counter
       TransitionKey.__unique_key_counter -= 1
@@ -157,6 +158,13 @@ class TransitionKey:

   @staticmethod
   def character_class(graph, key_map):
+    '''Processes 'graph' (a representation of a character class in the rule
+    file), and constructs a TransitionKey based on it. 'key_map' contains
+ already constructed aliases for character classes (they can be used in the + new character class). It is a map from strings (character class names) to
+    TransitionKeys. For example, graph might represent the character class
+ [a-z:digit:] where 'digit' is a previously constructed characte class found
+    in "key_map".'''
     ranges = []
     assert graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS'
     invert = graph[0] == 'NOT_CLASS'
@@ -170,7 +178,8 @@ class TransitionKey:
       if r[0] <= char and char <= r[1]: return True
     return False

-  def matches_key(self, key):
+  def is_superset_of_key(self, key):
+    '''Returns true if 'key' is a sub-key of this TransitionKey.'''
     assert isinstance(key, self.__class__)
     assert key != TransitionKey.epsilon() and not key.__is_unique()
     assert len(key.__ranges) == 1
@@ -247,6 +256,16 @@ class TransitionKey:
     else:
       return '[%s-%s]' % (to_str(r[0]), to_str(r[1]))

+  def __init__(self, ranges, name = None):
+    if not ranges:
+      assert name == 'epsilon'
+      assert not name in TransitionKey.__cached_keys
+    else:
+      TransitionKey.__verify_ranges(ranges, True)
+    self.__name = name
+    self.__ranges = tuple(ranges) # immutable
+    self.__cached_hash = None
+
   def __str__(self):
     if self.__name:
       return self.__name
@@ -254,6 +273,9 @@ class TransitionKey:

   @staticmethod
   def __disjoint_keys(range_map):
+    '''Takes a set of possibly overlapping ranges, returns a list of ranges
+    which don't overlap and which cover the same points as the original
+    set. range_map is a map from lower bounds to a list of upper bounds.'''
     sort = lambda x : sorted(set(x))
     range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items()))
     ranges = []
@@ -294,16 +316,24 @@ class TransitionKey:

   @staticmethod
   def disjoint_keys(key_set):
+ '''Takes a set of possibly overlapping TransitionKeys, returns a list of + TransitionKeys which don't overlap and whose union is the same as the union
+    of the original key_set. key_set is a set of TransitionKeys.'''
     ranges = TransitionKey.__disjoint_ranges_from_key_set(key_set)
-    return map(lambda x : TransitionKey.__create([x]), ranges)
+    return map(lambda x : TransitionKey([x]), ranges)

   @staticmethod
   def inverse_key(key_set):
+ '''Returns a TransitionKey which matches represents the inverse of the union + of 'key_set'. The TransitionKeys contain a set of character ranges and a set + of classes. The character ranges are inverted in relation to the latin_1 + character range, and the character classes are inverted in relation to all
+    character classes in __class_bounds.'''
     ranges = TransitionKey.__disjoint_ranges_from_key_set(key_set)
     inverse = TransitionKey.__invert_ranges(ranges)
     if not inverse:
       return None
-    return TransitionKey.__create(inverse)
+    return TransitionKey(inverse)

   @staticmethod
   def __key_from_ranges(invert, ranges):
@@ -316,7 +346,7 @@ class TransitionKey:
     ranges = TransitionKey.__merge_ranges(ranges)
     if invert:
       ranges = TransitionKey.__invert_ranges(ranges)
-    return TransitionKey.__create(ranges)
+    return TransitionKey(ranges)

   @staticmethod
   def __merge_ranges(ranges):
@@ -344,6 +374,9 @@ class TransitionKey:
   def __invert_ranges(ranges):
     inverted = []
     last = None
+    # Extract character classes (as opposed to character ranges) from
+ # __class_bounds. Since latin_1 is the only real character range, we can do
+    # this.
     classes = set(TransitionKey.__class_bounds.values())
     latin_1 = TransitionKey.__class_bounds['latin_1']
     classes.remove(latin_1)


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to