Author: Armin Rigo <[email protected]>
Branch: py3.5-fstring-pep498
Changeset: r89692:533c8eeffd5b
Date: 2017-01-22 20:26 +0100
http://bitbucket.org/pypy/pypy/changeset/533c8eeffd5b/
Log: in-progress
diff --git a/pypy/interpreter/astcompiler/ast.py
b/pypy/interpreter/astcompiler/ast.py
--- a/pypy/interpreter/astcompiler/ast.py
+++ b/pypy/interpreter/astcompiler/ast.py
@@ -1670,6 +1670,10 @@
return Num.from_object(space, w_node)
if space.isinstance_w(w_node, get(space).w_Str):
return Str.from_object(space, w_node)
+ if space.isinstance_w(w_node, get(space).w_FormattedValue):
+ return FormattedValue.from_object(space, w_node)
+ if space.isinstance_w(w_node, get(space).w_JoinedStr):
+ return JoinedStr.from_object(space, w_node)
if space.isinstance_w(w_node, get(space).w_Bytes):
return Bytes.from_object(space, w_node)
if space.isinstance_w(w_node, get(space).w_NameConstant):
@@ -2554,6 +2558,98 @@
State.ast_type('Str', 'expr', ['s'])
+class FormattedValue(expr):
+
+ def __init__(self, value, conversion, format_spec, lineno, col_offset):
+ self.value = value
+ self.conversion = conversion
+ self.format_spec = format_spec
+ expr.__init__(self, lineno, col_offset)
+
+ def walkabout(self, visitor):
+ visitor.visit_FormattedValue(self)
+
+ def mutate_over(self, visitor):
+ self.value = self.value.mutate_over(visitor)
+ if self.format_spec:
+ self.format_spec = self.format_spec.mutate_over(visitor)
+ return visitor.visit_FormattedValue(self)
+
+ def to_object(self, space):
+ w_node = space.call_function(get(space).w_FormattedValue)
+ w_value = self.value.to_object(space) # expr
+ space.setattr(w_node, space.wrap('value'), w_value)
+ w_conversion = space.wrap(self.conversion) # int
+ space.setattr(w_node, space.wrap('conversion'), w_conversion)
+ w_format_spec = self.format_spec.to_object(space) if self.format_spec
is not None else space.w_None # expr
+ space.setattr(w_node, space.wrap('format_spec'), w_format_spec)
+ w_lineno = space.wrap(self.lineno) # int
+ space.setattr(w_node, space.wrap('lineno'), w_lineno)
+ w_col_offset = space.wrap(self.col_offset) # int
+ space.setattr(w_node, space.wrap('col_offset'), w_col_offset)
+ return w_node
+
+ @staticmethod
+ def from_object(space, w_node):
+ w_value = get_field(space, w_node, 'value', False)
+ w_conversion = get_field(space, w_node, 'conversion', True)
+ w_format_spec = get_field(space, w_node, 'format_spec', True)
+ w_lineno = get_field(space, w_node, 'lineno', False)
+ w_col_offset = get_field(space, w_node, 'col_offset', False)
+ _value = expr.from_object(space, w_value)
+ if _value is None:
+ raise_required_value(space, w_node, 'value')
+ _conversion = space.int_w(w_conversion)
+ _format_spec = expr.from_object(space, w_format_spec)
+ _lineno = space.int_w(w_lineno)
+ _col_offset = space.int_w(w_col_offset)
+ return FormattedValue(_value, _conversion, _format_spec, _lineno,
_col_offset)
+
+State.ast_type('FormattedValue', 'expr', ['value', 'conversion',
'format_spec'])
+
+
+class JoinedStr(expr):
+
+ def __init__(self, values, lineno, col_offset):
+ self.values = values
+ expr.__init__(self, lineno, col_offset)
+
+ def walkabout(self, visitor):
+ visitor.visit_JoinedStr(self)
+
+ def mutate_over(self, visitor):
+ if self.values:
+ visitor._mutate_sequence(self.values)
+ return visitor.visit_JoinedStr(self)
+
+ def to_object(self, space):
+ w_node = space.call_function(get(space).w_JoinedStr)
+ if self.values is None:
+ values_w = []
+ else:
+ values_w = [node.to_object(space) for node in self.values] # expr
+ w_values = space.newlist(values_w)
+ space.setattr(w_node, space.wrap('values'), w_values)
+ w_lineno = space.wrap(self.lineno) # int
+ space.setattr(w_node, space.wrap('lineno'), w_lineno)
+ w_col_offset = space.wrap(self.col_offset) # int
+ space.setattr(w_node, space.wrap('col_offset'), w_col_offset)
+ return w_node
+
+ @staticmethod
+ def from_object(space, w_node):
+ w_values = get_field(space, w_node, 'values', False)
+ w_lineno = get_field(space, w_node, 'lineno', False)
+ w_col_offset = get_field(space, w_node, 'col_offset', False)
+ values_w = space.unpackiterable(w_values)
+ _values = [expr.from_object(space, w_item) for w_item in values_w]
+ _lineno = space.int_w(w_lineno)
+ _col_offset = space.int_w(w_col_offset)
+ return JoinedStr(_values, _lineno, _col_offset)
+
+State.ast_type('JoinedStr', 'expr', ['values'])
+
+
class Bytes(expr):
def __init__(self, s, lineno, col_offset):
@@ -3924,6 +4020,10 @@
return self.default_visitor(node)
def visit_Str(self, node):
return self.default_visitor(node)
+ def visit_FormattedValue(self, node):
+ return self.default_visitor(node)
+ def visit_JoinedStr(self, node):
+ return self.default_visitor(node)
def visit_Bytes(self, node):
return self.default_visitor(node)
def visit_NameConstant(self, node):
@@ -4153,6 +4253,14 @@
def visit_Str(self, node):
pass
+ def visit_FormattedValue(self, node):
+ node.value.walkabout(self)
+ if node.format_spec:
+ node.format_spec.walkabout(self)
+
+ def visit_JoinedStr(self, node):
+ self.visit_sequence(node.values)
+
def visit_Bytes(self, node):
pass
diff --git a/pypy/interpreter/astcompiler/astbuilder.py
b/pypy/interpreter/astcompiler/astbuilder.py
--- a/pypy/interpreter/astcompiler/astbuilder.py
+++ b/pypy/interpreter/astcompiler/astbuilder.py
@@ -1189,7 +1189,58 @@
value = self.handle_expr(node.get_child(i+2))
i += 3
return (i,key,value)
-
+
+ def _add_constant_string(self, joined_pieces, w_string, atom_node):
+ space = self.space
+ is_unicode = space.isinstance_w(w_string, space.w_unicode)
+ # Implement implicit string concatenation.
+ if joined_pieces:
+ prev = joined_pieces[-1]
+ if is_unicode and isinstance(prev, ast.Str):
+ w_string = space.add(prev.s, w_string)
+ del joined_pieces[-1]
+ elif not is_unicode and isinstance(prev, ast.Bytes):
+ w_string = space.add(prev.s, w_string)
+ del joined_pieces[-1]
+ node = ast.Str if is_unicode else ast.Bytes
+ joined_pieces.append(node(w_string, atom_node.get_lineno(),
+ atom_node.get_column()))
+
+ def _f_string_expr(self, joined_pieces, u, start, atom_node):
+ # Note: a f-string is kept as a single literal up to here.
+ # At this point only, we recursively call the AST compiler
+ # on all the '{expr}' parts. The 'expr' part is not parsed
+ # or even tokenized together with the rest of the source code!
+ ...
+
+ def _parse_f_string(self, joined_pieces, w_string, atom_node):
+ space = self.space
+ u = space.unicode_w(w_string)
+ conversion = -1 # the conversion char. -1 if not specified.
+ nested_depth = 0 # nesting level for braces/parens/brackets in exprs
+ start = 0
+ p1 = u.find(u'{')
+ p2 = u.find(u'}')
+ while p1 >= 0 or p2 >= 0:
+ if p1 >= 0 and (p2 < 0 or p1 < p2):
+ pn = p1 + 1
+ if pn < len(u) and u[pn] == u'{': # '{{' => single '{'
+ self._add_constant_string(space.newunicode(u[start:pn]))
+ start = pn + 1
+ else:
+ start = self._f_string_expr(joined_pieces, u, pn,
atom_node)
+ p1 = u.find(u'{', start)
+ else:
+ assert p2 >= 0 and (p1 < 0 or p2 < p1)
+ pn = p2 + 1
+ if pn < len(u) and u[pn] == u'}': # '}}' => single '}'
+ self._add_constant_string(space.newunicode(u[start:pn]))
+ start = pn + 1
+ else:
+ self.error("unexpected '}' in f-string", atom_node)
+ p2 = u.find(u'}', start)
+ self._add_constant_string(space.newunicode(u[start:]))
+
def handle_atom(self, atom_node):
first_child = atom_node.get_child(0)
first_child_type = first_child.type
@@ -1207,35 +1258,45 @@
first_child.get_column())
return ast.NameConstant(w_singleton, first_child.get_lineno(),
first_child.get_column())
+ #
elif first_child_type == tokens.STRING:
space = self.space
encoding = self.compile_info.encoding
- try:
- sub_strings_w = [
- parsestring.parsestr(
+ joined_pieces = []
+ for i in range(atom_node.num_children()):
+ try:
+ w_next, saw_f = parsestring.parsestr(
space, encoding,
atom_node.get_child(i).get_value())
- for i in range(atom_node.num_children())]
- except error.OperationError as e:
- if not (e.match(space, space.w_UnicodeError) or
- e.match(space, space.w_ValueError)):
- raise
- # Unicode/ValueError in literal: turn into SyntaxError
- self.error(e.errorstr(space), atom_node)
- sub_strings_w = [] # please annotator
- # Implement implicit string concatenation.
- w_string = sub_strings_w[0]
- for i in range(1, len(sub_strings_w)):
- try:
- w_string = space.add(w_string, sub_strings_w[i])
except error.OperationError as e:
- if not e.match(space, space.w_TypeError):
+ if not (e.match(space, space.w_UnicodeError) or
+ e.match(space, space.w_ValueError)):
raise
+ # Unicode/ValueError in literal: turn into SyntaxError
+ raise self.error(e.errorstr(space), atom_node)
+ if not saw_f:
+ self._add_constant_string(joined_pieces, w_next, atom_node)
+ else:
+ self._parse_f_string(joined_pieces, w_next, atom_node)
+ if len(joined_pieces) == 1: # <= the common path
+ return joined_pieces[0] # ast.Str, Bytes or FormattedValue
+ # with more than one piece, it is a combination of Str and
+ # FormattedValue pieces---if there is a Bytes, then we got
+ # an invalid mixture of bytes and unicode literals
+ for node in joined_pieces:
+ if isinstance(node, ast.Bytes):
self.error("cannot mix bytes and nonbytes literals",
- atom_node)
- # UnicodeError in literal: turn into SyntaxError
- strdata = space.isinstance_w(w_string, space.w_unicode)
- node = ast.Str if strdata else ast.Bytes
- return node(w_string, atom_node.get_lineno(),
atom_node.get_column())
+ atom_node)
+ # remove empty Strs
+ values = [node for node in joined_pieces
+ if not (isinstance(node, ast.Str) and not node.s)]
+ if len(values) > 1:
+ return ast.JoinedStr(values)
+ elif len(values) == 1:
+ return values[0]
+ else:
+ assert len(joined_pieces) > 0 # but all empty strings
+ return joined_pieces[0]
+ #
elif first_child_type == tokens.NUMBER:
num_value = self.parse_number(first_child.get_value())
return ast.Num(num_value, atom_node.get_lineno(),
atom_node.get_column())
diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py
b/pypy/interpreter/astcompiler/test/test_compiler.py
--- a/pypy/interpreter/astcompiler/test/test_compiler.py
+++ b/pypy/interpreter/astcompiler/test/test_compiler.py
@@ -1384,3 +1384,9 @@
code, blocks = generate_function_code(source, self.space)
# there is a stack computation error
assert blocks[0].instructions[3].arg == 0
+
+ def test_fstring(self):
+ source = """def f(x):
+ return f'ab{x}cd'
+ """
+ code, blocks = generate_function_code(source, self.space)
diff --git a/pypy/interpreter/astcompiler/tools/Python.asdl
b/pypy/interpreter/astcompiler/tools/Python.asdl
--- a/pypy/interpreter/astcompiler/tools/Python.asdl
+++ b/pypy/interpreter/astcompiler/tools/Python.asdl
@@ -70,6 +70,8 @@
| Call(expr func, expr* args, keyword* keywords)
| Num(object n) -- a number as a PyObject.
| Str(string s) -- need to specify raw, unicode, etc?
+ | FormattedValue(expr value, int? conversion, expr? format_spec)
+ | JoinedStr(expr* values)
| Bytes(bytes s)
-- PyPy mod. first argument name must not be value
| NameConstant(singleton single)
diff --git a/pypy/interpreter/pyparser/dfa_generated.py
b/pypy/interpreter/pyparser/dfa_generated.py
--- a/pypy/interpreter/pyparser/dfa_generated.py
+++ b/pypy/interpreter/pyparser/dfa_generated.py
@@ -23,7 +23,7 @@
'8': 6, '9': 6, ':': 15, ';': 15,
'<': 10, '=': 14, '>': 9, '@': 14,
'A': 1, 'B': 2, 'C': 1, 'D': 1,
- 'E': 1, 'F': 1, 'G': 1, 'H': 1,
+ 'E': 1, 'F': 2, 'G': 1, 'H': 1,
'I': 1, 'J': 1, 'K': 1, 'L': 1,
'M': 1, 'N': 1, 'O': 1, 'P': 1,
'Q': 1, 'R': 3, 'S': 1, 'T': 1,
@@ -31,7 +31,7 @@
'Y': 1, 'Z': 1, '[': 15, '\\': 19,
']': 15, '^': 14, '_': 1, '`': 15,
'a': 1, 'b': 2, 'c': 1, 'd': 1,
- 'e': 1, 'f': 1, 'g': 1, 'h': 1,
+ 'e': 1, 'f': 2, 'g': 1, 'h': 1,
'i': 1, 'j': 1, 'k': 1, 'l': 1,
'm': 1, 'n': 1, 'o': 1, 'p': 1,
'q': 1, 'r': 3, 's': 1, 't': 1,
@@ -78,14 +78,14 @@
'2': 1, '3': 1, '4': 1, '5': 1,
'6': 1, '7': 1, '8': 1, '9': 1,
'A': 1, 'B': 4, 'C': 1, 'D': 1,
- 'E': 1, 'F': 1, 'G': 1, 'H': 1,
+ 'E': 1, 'F': 4, 'G': 1, 'H': 1,
'I': 1, 'J': 1, 'K': 1, 'L': 1,
'M': 1, 'N': 1, 'O': 1, 'P': 1,
'Q': 1, 'R': 1, 'S': 1, 'T': 1,
'U': 1, 'V': 1, 'W': 1, 'X': 1,
'Y': 1, 'Z': 1, '_': 1, 'a': 1,
'b': 4, 'c': 1, 'd': 1, 'e': 1,
- 'f': 1, 'g': 1, 'h': 1, 'i': 1,
+ 'f': 4, 'g': 1, 'h': 1, 'i': 1,
'j': 1, 'k': 1, 'l': 1, 'm': 1,
'n': 1, 'o': 1, 'p': 1, 'q': 1,
'r': 1, 's': 1, 't': 1, 'u': 1,
diff --git a/pypy/interpreter/pyparser/gendfa.py
b/pypy/interpreter/pyparser/gendfa.py
--- a/pypy/interpreter/pyparser/gendfa.py
+++ b/pypy/interpreter/pyparser/gendfa.py
@@ -152,9 +152,9 @@
return group(states,
chain(states,
maybe(states, groupStr(states, "rR")),
- maybe(states, groupStr(states, "bB"))),
+ maybe(states, groupStr(states, "bBfF"))),
chain(states,
- maybe(states, groupStr(states, "bB")),
+ maybe(states, groupStr(states, "bBfF")),
maybe(states, groupStr(states, "rR"))),
maybe(states, groupStr(states, "uU")))
# ____________________________________________________________
diff --git a/pypy/interpreter/pyparser/parsestring.py
b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -5,7 +5,8 @@
def parsestr(space, encoding, s):
- """Parses a string or unicode literal, and return a wrapped value.
+ """Parses a string or unicode literal, and return a pair
+ (wrapped value, f_string_flag).
If encoding=None, the source string is ascii only.
In other cases, the source string is in utf-8 encoding.
@@ -23,6 +24,7 @@
rawmode = False
unicode_literal = True
saw_u = False
+ saw_f = False
# string decoration handling
if quote == 'b' or quote == 'B':
@@ -37,6 +39,10 @@
ps += 1
quote = s[ps]
rawmode = True
+ elif quote == 'f' or quote == 'F':
+ ps += 1
+ quote = s[ps]
+ saw_f = True
if not saw_u:
if quote == 'r' or quote == 'R':
@@ -47,6 +53,10 @@
ps += 1
quote = s[ps]
unicode_literal = False
+ elif quote == 'f' or quote == 'F':
+ ps += 1
+ quote = s[ps]
+ saw_f = True
if quote != "'" and quote != '"':
raise_app_valueerror(space,
@@ -64,6 +74,10 @@
'unmatched triple quotes in literal')
q -= 2
+ if saw_f:
+ # forbid any '\' inside '{' and '}' pairs
+ pass # XXX DO IT
+
if unicode_literal and not rawmode: # XXX Py_UnicodeFlag is ignored for now
if encoding is None:
assert 0 <= ps <= q
@@ -71,7 +85,7 @@
else:
substr = decode_unicode_utf8(space, s, ps, q)
v = unicodehelper.decode_unicode_escape(space, substr)
- return space.wrap(v)
+ return space.wrap(v), saw_f
assert 0 <= ps <= q
substr = s[ps : q]
@@ -85,13 +99,13 @@
if rawmode or '\\' not in substr:
if not unicode_literal:
- return space.newbytes(substr)
+ return space.newbytes(substr), saw_f
else:
v = unicodehelper.decode_utf8(space, substr)
- return space.wrap(v)
+ return space.wrap(v), saw_f
v = PyString_DecodeEscape(space, substr, 'strict', encoding)
- return space.newbytes(v)
+ return space.newbytes(v), saw_f
def decode_unicode_utf8(space, s, ps, q):
# ****The Python 2.7 version, producing UTF-32 escapes****
diff --git a/pypy/interpreter/pyparser/pytokenize.py
b/pypy/interpreter/pyparser/pytokenize.py
--- a/pypy/interpreter/pyparser/pytokenize.py
+++ b/pypy/interpreter/pyparser/pytokenize.py
@@ -27,10 +27,12 @@
'R' : None,
"u" : None,
"U" : None,
+ 'f' : None,
+ 'F' : None,
'b' : None,
'B' : None}
-for uniPrefix in ("", "b", "B"):
+for uniPrefix in ("", "b", "B", "f", "F"):
for rawPrefix in ("", "r", "R"):
prefix_1 = uniPrefix + rawPrefix
prefix_2 = rawPrefix + uniPrefix
@@ -55,6 +57,11 @@
for t in ("'''", '"""',
"r'''", 'r"""', "R'''", 'R"""',
"u'''", 'u"""', "U'''", 'U"""',
+ "f'''", 'f"""', "F'''", 'F"""',
+ "fr'''", 'fr"""', "Fr'''", 'Fr"""',
+ "fR'''", 'fR"""', "FR'''", 'FR"""',
+ "rf'''", 'rf"""', "rF'''", 'rF"""',
+ "Rf'''", 'Rf"""', "RF'''", 'RF"""',
"b'''", 'b"""', "B'''", 'B"""',
"br'''", 'br"""', "Br'''", 'Br"""',
"bR'''", 'bR"""', "BR'''", 'BR"""',
@@ -65,6 +72,11 @@
for t in ("'", '"',
"r'", 'r"', "R'", 'R"',
"u'", 'u"', "U'", 'U"',
+ "f'", 'f"', "F'", 'F"',
+ "fr'", 'fr"', "Fr'", 'Fr"',
+ "fR'", 'fR"', "FR'", 'FR"',
+ "rf'", 'rf"', "rF'", 'rF"',
+ "Rf'", 'Rf"', "RF'", 'RF"',
"b'", 'b"', "B'", 'B"',
"br'", 'br"', "Br'", 'Br"',
"bR'", 'bR"', "BR'", 'BR"',
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit