[pypy-commit] pypy py3.5-fstring-pep498: in-progress

arigo Sun, 22 Jan 2017 11:29:38 -0800

Author: Armin Rigo <[email protected]>
Branch: py3.5-fstring-pep498
Changeset: r89692:533c8eeffd5b
Date: 2017-01-22 20:26 +0100
http://bitbucket.org/pypy/pypy/changeset/533c8eeffd5b/


Log:    in-progress

diff --git a/pypy/interpreter/astcompiler/ast.py 
b/pypy/interpreter/astcompiler/ast.py
--- a/pypy/interpreter/astcompiler/ast.py
+++ b/pypy/interpreter/astcompiler/ast.py
@@ -1670,6 +1670,10 @@
             return Num.from_object(space, w_node)
         if space.isinstance_w(w_node, get(space).w_Str):
             return Str.from_object(space, w_node)
+        if space.isinstance_w(w_node, get(space).w_FormattedValue):
+            return FormattedValue.from_object(space, w_node)
+        if space.isinstance_w(w_node, get(space).w_JoinedStr):
+            return JoinedStr.from_object(space, w_node)
         if space.isinstance_w(w_node, get(space).w_Bytes):
             return Bytes.from_object(space, w_node)
         if space.isinstance_w(w_node, get(space).w_NameConstant):
@@ -2554,6 +2558,98 @@
 State.ast_type('Str', 'expr', ['s'])
 
 
+class FormattedValue(expr):
+
+    def __init__(self, value, conversion, format_spec, lineno, col_offset):
+        self.value = value
+        self.conversion = conversion
+        self.format_spec = format_spec
+        expr.__init__(self, lineno, col_offset)
+
+    def walkabout(self, visitor):
+        visitor.visit_FormattedValue(self)
+
+    def mutate_over(self, visitor):
+        self.value = self.value.mutate_over(visitor)
+        if self.format_spec:
+            self.format_spec = self.format_spec.mutate_over(visitor)
+        return visitor.visit_FormattedValue(self)
+
+    def to_object(self, space):
+        w_node = space.call_function(get(space).w_FormattedValue)
+        w_value = self.value.to_object(space)  # expr
+        space.setattr(w_node, space.wrap('value'), w_value)
+        w_conversion = space.wrap(self.conversion)  # int
+        space.setattr(w_node, space.wrap('conversion'), w_conversion)
+        w_format_spec = self.format_spec.to_object(space) if self.format_spec 
is not None else space.w_None  # expr
+        space.setattr(w_node, space.wrap('format_spec'), w_format_spec)
+        w_lineno = space.wrap(self.lineno)  # int
+        space.setattr(w_node, space.wrap('lineno'), w_lineno)
+        w_col_offset = space.wrap(self.col_offset)  # int
+        space.setattr(w_node, space.wrap('col_offset'), w_col_offset)
+        return w_node
+
+    @staticmethod
+    def from_object(space, w_node):
+        w_value = get_field(space, w_node, 'value', False)
+        w_conversion = get_field(space, w_node, 'conversion', True)
+        w_format_spec = get_field(space, w_node, 'format_spec', True)
+        w_lineno = get_field(space, w_node, 'lineno', False)
+        w_col_offset = get_field(space, w_node, 'col_offset', False)
+        _value = expr.from_object(space, w_value)
+        if _value is None:
+            raise_required_value(space, w_node, 'value')
+        _conversion = space.int_w(w_conversion)
+        _format_spec = expr.from_object(space, w_format_spec)
+        _lineno = space.int_w(w_lineno)
+        _col_offset = space.int_w(w_col_offset)
+        return FormattedValue(_value, _conversion, _format_spec, _lineno, 
_col_offset)
+
+State.ast_type('FormattedValue', 'expr', ['value', 'conversion', 
'format_spec'])
+
+
+class JoinedStr(expr):
+
+    def __init__(self, values, lineno, col_offset):
+        self.values = values
+        expr.__init__(self, lineno, col_offset)
+
+    def walkabout(self, visitor):
+        visitor.visit_JoinedStr(self)
+
+    def mutate_over(self, visitor):
+        if self.values:
+            visitor._mutate_sequence(self.values)
+        return visitor.visit_JoinedStr(self)
+
+    def to_object(self, space):
+        w_node = space.call_function(get(space).w_JoinedStr)
+        if self.values is None:
+            values_w = []
+        else:
+            values_w = [node.to_object(space) for node in self.values] # expr
+        w_values = space.newlist(values_w)
+        space.setattr(w_node, space.wrap('values'), w_values)
+        w_lineno = space.wrap(self.lineno)  # int
+        space.setattr(w_node, space.wrap('lineno'), w_lineno)
+        w_col_offset = space.wrap(self.col_offset)  # int
+        space.setattr(w_node, space.wrap('col_offset'), w_col_offset)
+        return w_node
+
+    @staticmethod
+    def from_object(space, w_node):
+        w_values = get_field(space, w_node, 'values', False)
+        w_lineno = get_field(space, w_node, 'lineno', False)
+        w_col_offset = get_field(space, w_node, 'col_offset', False)
+        values_w = space.unpackiterable(w_values)
+        _values = [expr.from_object(space, w_item) for w_item in values_w]
+        _lineno = space.int_w(w_lineno)
+        _col_offset = space.int_w(w_col_offset)
+        return JoinedStr(_values, _lineno, _col_offset)
+
+State.ast_type('JoinedStr', 'expr', ['values'])
+
+
 class Bytes(expr):
 
     def __init__(self, s, lineno, col_offset):
@@ -3924,6 +4020,10 @@
         return self.default_visitor(node)
     def visit_Str(self, node):
         return self.default_visitor(node)
+    def visit_FormattedValue(self, node):
+        return self.default_visitor(node)
+    def visit_JoinedStr(self, node):
+        return self.default_visitor(node)
     def visit_Bytes(self, node):
         return self.default_visitor(node)
     def visit_NameConstant(self, node):
@@ -4153,6 +4253,14 @@
     def visit_Str(self, node):
         pass
 
+    def visit_FormattedValue(self, node):
+        node.value.walkabout(self)
+        if node.format_spec:
+            node.format_spec.walkabout(self)
+
+    def visit_JoinedStr(self, node):
+        self.visit_sequence(node.values)
+
     def visit_Bytes(self, node):
         pass
 
diff --git a/pypy/interpreter/astcompiler/astbuilder.py 
b/pypy/interpreter/astcompiler/astbuilder.py
--- a/pypy/interpreter/astcompiler/astbuilder.py
+++ b/pypy/interpreter/astcompiler/astbuilder.py
@@ -1189,7 +1189,58 @@
             value = self.handle_expr(node.get_child(i+2))
             i += 3
         return (i,key,value)
-    
+
+    def _add_constant_string(self, joined_pieces, w_string, atom_node):
+        space = self.space
+        is_unicode = space.isinstance_w(w_string, space.w_unicode)
+        # Implement implicit string concatenation.
+        if joined_pieces:
+            prev = joined_pieces[-1]
+            if is_unicode and isinstance(prev, ast.Str):
+                w_string = space.add(prev.s, w_string)
+                del joined_pieces[-1]
+            elif not is_unicode and isinstance(prev, ast.Bytes):
+                w_string = space.add(prev.s, w_string)
+                del joined_pieces[-1]
+        node = ast.Str if is_unicode else ast.Bytes
+        joined_pieces.append(node(w_string, atom_node.get_lineno(),
+                                            atom_node.get_column()))
+
+    def _f_string_expr(self, joined_pieces, u, start, atom_node):
+        # Note: a f-string is kept as a single literal up to here.
+        # At this point only, we recursively call the AST compiler
+        # on all the '{expr}' parts.  The 'expr' part is not parsed
+        # or even tokenized together with the rest of the source code!
+        ...
+
+    def _parse_f_string(self, joined_pieces, w_string, atom_node):
+        space = self.space
+        u = space.unicode_w(w_string)
+        conversion = -1     # the conversion char.  -1 if not specified.
+        nested_depth = 0    # nesting level for braces/parens/brackets in exprs
+        start = 0
+        p1 = u.find(u'{')
+        p2 = u.find(u'}')
+        while p1 >= 0 or p2 >= 0:
+            if p1 >= 0 and (p2 < 0 or p1 < p2):
+                pn = p1 + 1
+                if pn < len(u) and u[pn] == u'{':    # '{{' => single '{'
+                    self._add_constant_string(space.newunicode(u[start:pn]))
+                    start = pn + 1
+                else:
+                    start = self._f_string_expr(joined_pieces, u, pn, 
atom_node)
+                p1 = u.find(u'{', start)
+            else:
+                assert p2 >= 0 and (p1 < 0 or p2 < p1)
+                pn = p2 + 1
+                if pn < len(u) and u[pn] == u'}':    # '}}' => single '}'
+                    self._add_constant_string(space.newunicode(u[start:pn]))
+                    start = pn + 1
+                else:
+                    self.error("unexpected '}' in f-string", atom_node)
+                p2 = u.find(u'}', start)
+        self._add_constant_string(space.newunicode(u[start:]))
+
     def handle_atom(self, atom_node):
         first_child = atom_node.get_child(0)
         first_child_type = first_child.type
@@ -1207,35 +1258,45 @@
                                 first_child.get_column())
             return ast.NameConstant(w_singleton, first_child.get_lineno(),
                                 first_child.get_column())
+        #
         elif first_child_type == tokens.STRING:
             space = self.space
             encoding = self.compile_info.encoding
-            try:
-                sub_strings_w = [
-                    parsestring.parsestr(
+            joined_pieces = []
+            for i in range(atom_node.num_children()):
+                try:
+                    w_next, saw_f = parsestring.parsestr(
                             space, encoding, 
atom_node.get_child(i).get_value())
-                        for i in range(atom_node.num_children())]
-            except error.OperationError as e:
-                if not (e.match(space, space.w_UnicodeError) or
-                        e.match(space, space.w_ValueError)):
-                    raise
-                # Unicode/ValueError in literal: turn into SyntaxError
-                self.error(e.errorstr(space), atom_node)
-                sub_strings_w = [] # please annotator
-            # Implement implicit string concatenation.
-            w_string = sub_strings_w[0]
-            for i in range(1, len(sub_strings_w)):
-                try:
-                    w_string = space.add(w_string, sub_strings_w[i])
                 except error.OperationError as e:
-                    if not e.match(space, space.w_TypeError):
+                    if not (e.match(space, space.w_UnicodeError) or
+                            e.match(space, space.w_ValueError)):
                         raise
+                    # Unicode/ValueError in literal: turn into SyntaxError
+                    raise self.error(e.errorstr(space), atom_node)
+                if not saw_f:
+                    self._add_constant_string(joined_pieces, w_next, atom_node)
+                else:
+                    self._parse_f_string(joined_pieces, w_next, atom_node)
+            if len(joined_pieces) == 1:   # <= the common path
+                return joined_pieces[0]   # ast.Str, Bytes or FormattedValue
+            # with more than one piece, it is a combination of Str and
+            # FormattedValue pieces---if there is a Bytes, then we got
+            # an invalid mixture of bytes and unicode literals
+            for node in joined_pieces:
+                if isinstance(node, ast.Bytes):
                     self.error("cannot mix bytes and nonbytes literals",
-                              atom_node)
-                # UnicodeError in literal: turn into SyntaxError
-            strdata = space.isinstance_w(w_string, space.w_unicode)
-            node = ast.Str if strdata else ast.Bytes
-            return node(w_string, atom_node.get_lineno(), 
atom_node.get_column())
+                               atom_node)
+            # remove empty Strs
+            values = [node for node in joined_pieces
+                           if not (isinstance(node, ast.Str) and not node.s)]
+            if len(values) > 1:
+                return ast.JoinedStr(values)
+            elif len(values) == 1:
+                return values[0]
+            else:
+                assert len(joined_pieces) > 0    # but all empty strings
+                return joined_pieces[0]
+        #
         elif first_child_type == tokens.NUMBER:
             num_value = self.parse_number(first_child.get_value())
             return ast.Num(num_value, atom_node.get_lineno(), 
atom_node.get_column())
diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py 
b/pypy/interpreter/astcompiler/test/test_compiler.py
--- a/pypy/interpreter/astcompiler/test/test_compiler.py
+++ b/pypy/interpreter/astcompiler/test/test_compiler.py
@@ -1384,3 +1384,9 @@
         code, blocks = generate_function_code(source, self.space)
         # there is a stack computation error
         assert blocks[0].instructions[3].arg == 0
+
+    def test_fstring(self):
+        source = """def f(x):
+            return f'ab{x}cd'
+        """
+        code, blocks = generate_function_code(source, self.space)
diff --git a/pypy/interpreter/astcompiler/tools/Python.asdl 
b/pypy/interpreter/astcompiler/tools/Python.asdl
--- a/pypy/interpreter/astcompiler/tools/Python.asdl
+++ b/pypy/interpreter/astcompiler/tools/Python.asdl
@@ -70,6 +70,8 @@
          | Call(expr func, expr* args, keyword* keywords)
          | Num(object n) -- a number as a PyObject.
          | Str(string s) -- need to specify raw, unicode, etc?
+         | FormattedValue(expr value, int? conversion, expr? format_spec)
+         | JoinedStr(expr* values)
          | Bytes(bytes s)
          -- PyPy mod. first argument name must not be value
          | NameConstant(singleton single)
diff --git a/pypy/interpreter/pyparser/dfa_generated.py 
b/pypy/interpreter/pyparser/dfa_generated.py
--- a/pypy/interpreter/pyparser/dfa_generated.py
+++ b/pypy/interpreter/pyparser/dfa_generated.py
@@ -23,7 +23,7 @@
      '8': 6, '9': 6, ':': 15, ';': 15,
      '<': 10, '=': 14, '>': 9, '@': 14,
      'A': 1, 'B': 2, 'C': 1, 'D': 1,
-     'E': 1, 'F': 1, 'G': 1, 'H': 1,
+     'E': 1, 'F': 2, 'G': 1, 'H': 1,
      'I': 1, 'J': 1, 'K': 1, 'L': 1,
      'M': 1, 'N': 1, 'O': 1, 'P': 1,
      'Q': 1, 'R': 3, 'S': 1, 'T': 1,
@@ -31,7 +31,7 @@
      'Y': 1, 'Z': 1, '[': 15, '\\': 19,
      ']': 15, '^': 14, '_': 1, '`': 15,
      'a': 1, 'b': 2, 'c': 1, 'd': 1,
-     'e': 1, 'f': 1, 'g': 1, 'h': 1,
+     'e': 1, 'f': 2, 'g': 1, 'h': 1,
      'i': 1, 'j': 1, 'k': 1, 'l': 1,
      'm': 1, 'n': 1, 'o': 1, 'p': 1,
      'q': 1, 'r': 3, 's': 1, 't': 1,
@@ -78,14 +78,14 @@
      '2': 1, '3': 1, '4': 1, '5': 1,
      '6': 1, '7': 1, '8': 1, '9': 1,
      'A': 1, 'B': 4, 'C': 1, 'D': 1,
-     'E': 1, 'F': 1, 'G': 1, 'H': 1,
+     'E': 1, 'F': 4, 'G': 1, 'H': 1,
      'I': 1, 'J': 1, 'K': 1, 'L': 1,
      'M': 1, 'N': 1, 'O': 1, 'P': 1,
      'Q': 1, 'R': 1, 'S': 1, 'T': 1,
      'U': 1, 'V': 1, 'W': 1, 'X': 1,
      'Y': 1, 'Z': 1, '_': 1, 'a': 1,
      'b': 4, 'c': 1, 'd': 1, 'e': 1,
-     'f': 1, 'g': 1, 'h': 1, 'i': 1,
+     'f': 4, 'g': 1, 'h': 1, 'i': 1,
      'j': 1, 'k': 1, 'l': 1, 'm': 1,
      'n': 1, 'o': 1, 'p': 1, 'q': 1,
      'r': 1, 's': 1, 't': 1, 'u': 1,
diff --git a/pypy/interpreter/pyparser/gendfa.py 
b/pypy/interpreter/pyparser/gendfa.py
--- a/pypy/interpreter/pyparser/gendfa.py
+++ b/pypy/interpreter/pyparser/gendfa.py
@@ -152,9 +152,9 @@
         return group(states,
                      chain(states,
                            maybe(states, groupStr(states, "rR")),
-                           maybe(states, groupStr(states, "bB"))),
+                           maybe(states, groupStr(states, "bBfF"))),
                      chain(states,
-                           maybe(states, groupStr(states, "bB")),
+                           maybe(states, groupStr(states, "bBfF")),
                            maybe(states, groupStr(states, "rR"))),
                      maybe(states, groupStr(states, "uU")))
     # ____________________________________________________________
diff --git a/pypy/interpreter/pyparser/parsestring.py 
b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -5,7 +5,8 @@
 
 
 def parsestr(space, encoding, s):
-    """Parses a string or unicode literal, and return a wrapped value.
+    """Parses a string or unicode literal, and return a pair
+    (wrapped value, f_string_flag).
 
     If encoding=None, the source string is ascii only.
     In other cases, the source string is in utf-8 encoding.
@@ -23,6 +24,7 @@
     rawmode = False
     unicode_literal = True
     saw_u = False
+    saw_f = False
 
     # string decoration handling
     if quote == 'b' or quote == 'B':
@@ -37,6 +39,10 @@
         ps += 1
         quote = s[ps]
         rawmode = True
+    elif quote == 'f' or quote == 'F':
+        ps += 1
+        quote = s[ps]
+        saw_f = True
 
     if not saw_u:
         if quote == 'r' or quote == 'R':
@@ -47,6 +53,10 @@
             ps += 1
             quote = s[ps]
             unicode_literal = False
+        elif quote == 'f' or quote == 'F':
+            ps += 1
+            quote = s[ps]
+            saw_f = True
 
     if quote != "'" and quote != '"':
         raise_app_valueerror(space,
@@ -64,6 +74,10 @@
                                         'unmatched triple quotes in literal')
         q -= 2
 
+    if saw_f:
+        # forbid any '\' inside '{' and '}' pairs
+        pass # XXX DO IT
+
     if unicode_literal and not rawmode: # XXX Py_UnicodeFlag is ignored for now
         if encoding is None:
             assert 0 <= ps <= q
@@ -71,7 +85,7 @@
         else:
             substr = decode_unicode_utf8(space, s, ps, q)
         v = unicodehelper.decode_unicode_escape(space, substr)
-        return space.wrap(v)
+        return space.wrap(v), saw_f
 
     assert 0 <= ps <= q
     substr = s[ps : q]
@@ -85,13 +99,13 @@
 
     if rawmode or '\\' not in substr:
         if not unicode_literal:
-            return space.newbytes(substr)
+            return space.newbytes(substr), saw_f
         else:
             v = unicodehelper.decode_utf8(space, substr)
-            return space.wrap(v)
+            return space.wrap(v), saw_f
 
     v = PyString_DecodeEscape(space, substr, 'strict', encoding)
-    return space.newbytes(v)
+    return space.newbytes(v), saw_f
 
 def decode_unicode_utf8(space, s, ps, q):
     # ****The Python 2.7 version, producing UTF-32 escapes****
diff --git a/pypy/interpreter/pyparser/pytokenize.py 
b/pypy/interpreter/pyparser/pytokenize.py
--- a/pypy/interpreter/pyparser/pytokenize.py
+++ b/pypy/interpreter/pyparser/pytokenize.py
@@ -27,10 +27,12 @@
            'R' : None,
            "u" : None,
            "U" : None,
+           'f' : None,
+           'F' : None,
            'b' : None,
            'B' : None}
 
-for uniPrefix in ("", "b", "B"):
+for uniPrefix in ("", "b", "B", "f", "F"):
     for rawPrefix in ("", "r", "R"):
         prefix_1 = uniPrefix + rawPrefix
         prefix_2 = rawPrefix + uniPrefix
@@ -55,6 +57,11 @@
 for t in ("'''", '"""',
           "r'''", 'r"""', "R'''", 'R"""',
           "u'''", 'u"""', "U'''", 'U"""',
+          "f'''", 'f"""', "F'''", 'F"""',
+          "fr'''", 'fr"""', "Fr'''", 'Fr"""',
+          "fR'''", 'fR"""', "FR'''", 'FR"""',
+          "rf'''", 'rf"""', "rF'''", 'rF"""',
+          "Rf'''", 'Rf"""', "RF'''", 'RF"""',
           "b'''", 'b"""', "B'''", 'B"""',
           "br'''", 'br"""', "Br'''", 'Br"""',
           "bR'''", 'bR"""', "BR'''", 'BR"""',
@@ -65,6 +72,11 @@
 for t in ("'", '"',
           "r'", 'r"', "R'", 'R"',
           "u'", 'u"', "U'", 'U"',
+          "f'", 'f"', "F'", 'F"',
+          "fr'", 'fr"', "Fr'", 'Fr"',
+          "fR'", 'fR"', "FR'", 'FR"',
+          "rf'", 'rf"', "rF'", 'rF"',
+          "Rf'", 'Rf"', "RF'", 'RF"',
           "b'", 'b"', "B'", 'B"',
           "br'", 'br"', "Br'", 'Br"',
           "bR'", 'bR"', "BR'", 'BR"',
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy py3.5-fstring-pep498: in-progress

Reply via email to