Author: Amaury Forgeot d'Arc <[email protected]>
Branch: merge-2.7.2
Changeset: r51662:693b08144e00
Date: 2012-01-22 20:24 +0100
http://bitbucket.org/pypy/pypy/changeset/693b08144e00/
Log: Implement CPython issue5057: do not const-fold a unicode.__getitem__
operation which returns a non-BMP character, this produces .pyc
files which depends on the unicode width
diff --git a/pypy/interpreter/astcompiler/optimize.py
b/pypy/interpreter/astcompiler/optimize.py
--- a/pypy/interpreter/astcompiler/optimize.py
+++ b/pypy/interpreter/astcompiler/optimize.py
@@ -5,6 +5,7 @@
from pypy.tool import stdlib_opcode as ops
from pypy.interpreter.error import OperationError
from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.runicode import MAXUNICODE
def optimize_ast(space, tree, compile_info):
@@ -289,8 +290,30 @@
w_idx = subs.slice.as_constant()
if w_idx is not None:
try:
- return ast.Const(self.space.getitem(w_obj, w_idx),
subs.lineno, subs.col_offset)
+ w_const = self.space.getitem(w_obj, w_idx)
except OperationError:
- # Let exceptions propgate at runtime.
- pass
+ # Let exceptions propagate at runtime.
+ return subs
+
+ # CPython issue5057: if v is unicode, there might
+ # be differences between wide and narrow builds in
+ # cases like u'\U00012345'[0].
+ # Wide builds will return a non-BMP char, whereas
+ # narrow builds will return a surrogate. In both
+ # the cases skip the optimization in order to
+ # produce compatible pycs.
+ if (self.space.isinstance_w(w_obj, self.space.w_unicode)
+ and
+ self.space.isinstance_w(w_const,
self.space.w_unicode)):
+ unistr = self.space.unicode_w(w_const)
+ if len(unistr) == 1:
+ ch = ord(unistr[0])
+ else:
+ ch = 0
+ if (ch > 0xFFFF or
+ (MAXUNICODE == 0xFFFF and 0xD800 <= ch <=
OxDFFFF)):
+ return subs
+
+ return ast.Const(w_const, subs.lineno, subs.col_offset)
+
return subs
diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py
b/pypy/interpreter/astcompiler/test/test_compiler.py
--- a/pypy/interpreter/astcompiler/test/test_compiler.py
+++ b/pypy/interpreter/astcompiler/test/test_compiler.py
@@ -838,6 +838,30 @@
# Just checking this doesn't crash out
self.count_instructions(source)
+ def test_const_fold_unicode_subscr(self):
+ source = """def f():
+ return u"abc"[0]
+ """
+ counts = self.count_instructions(source)
+ assert counts == {ops.LOAD_CONST: 1, ops.RETURN_VALUE: 1}
+
+ # getitem outside of the BMP should not be optimized
+ source = """def f():
+ return u"\U00012345"[0]
+ """
+ counts = self.count_instructions(source)
+ assert counts == {ops.LOAD_CONST: 2, ops.BINARY_SUBSCR: 1,
+ ops.RETURN_VALUE: 1}
+
+ # getslice is not yet optimized.
+ # Still, check a case which yields the empty string.
+ source = """def f():
+ return u"abc"[:0]
+ """
+ counts = self.count_instructions(source)
+ assert counts == {ops.LOAD_CONST: 2, ops.SLICE+2: 1,
+ ops.RETURN_VALUE: 1}
+
def test_remove_dead_code(self):
source = """def f(x):
return 5
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit