Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r85137:418b05f95db5 Date: 2016-06-13 19:07 +0200 http://bitbucket.org/pypy/pypy/changeset/418b05f95db5/
Log: Improve CPython compatibility of 'is'. Now 'x is y' is guaranteed to return True if x == y and x, y are: * empty strings; empty unicode strings * single-character (unicode) strings * empty tuples This is in addition to all other special cases (ints, etc.) diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -18,6 +18,7 @@ from pypy.objspace.std.unicodeobject import ( decode_object, unicode_from_encoded_object, unicode_from_string, getdefaultencoding) +from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT class W_AbstractBytesObject(W_Root): @@ -30,12 +31,26 @@ return True if self.user_overridden_class or w_other.user_overridden_class: return False - return space.str_w(self) is space.str_w(w_other) + s1 = space.str_w(self) + s2 = space.str_w(w_other) + if len(s2) > 1: + return s1 is s2 + else: # strings of len <= 1 are unique-ified + return s1 == s2 def immutable_unique_id(self, space): if self.user_overridden_class: return None - return space.wrap(compute_unique_id(space.str_w(self))) + s = space.str_w(self) + if len(s) > 1: + uid = compute_unique_id(s) + else: # strings of len <= 1 are unique-ified + if len(s) == 1: + base = ord(s[0]) # base values 0-255 + else: + base = 256 # empty string: base value 256 + uid = (base << IDTAG_SHIFT) | IDTAG_SPECIAL + return space.wrap(uid) def unicode_w(self, space): # Use the default encoding. diff --git a/pypy/objspace/std/test/test_obj.py b/pypy/objspace/std/test/test_obj.py --- a/pypy/objspace/std/test/test_obj.py +++ b/pypy/objspace/std/test/test_obj.py @@ -186,17 +186,28 @@ def test_id_on_strs(self): if self.appdirect: skip("cannot run this test as apptest") - u = u"a" - assert id(self.unwrap_wrap_unicode(u)) == id(u) - s = "a" - assert id(self.unwrap_wrap_str(s)) == id(s) + for u in [u"", u"a", u"aa"]: + assert id(self.unwrap_wrap_unicode(u)) == id(u) + s = str(u) + assert id(self.unwrap_wrap_str(s)) == id(s) + # + assert id('') == (256 << 4) | 11 # always + assert id(u'') == (257 << 4) | 11 + assert id('a') == (ord('a') << 4) | 11 + assert id(u'\u1234') == ((~0x1234) << 4) | 11 + + def test_id_of_tuples(self): + l = [] + x = (l,) + assert id(x) != id((l,)) # no caching at all + if self.appdirect: + skip("cannot run this test as apptest") + assert id(()) == (258 << 4) | 11 # always def test_identity_vs_id_primitives(self): - if self.cpython_apptest: - skip("cpython behaves differently") import sys - l = range(-10, 10) - for i in range(10): + l = range(-10, 10, 2) + for i in [0, 1, 3]: l.append(float(i)) l.append(i + 0.1) l.append(long(i)) @@ -206,18 +217,13 @@ l.append(i - 1j) l.append(1 + i * 1j) l.append(1 - i * 1j) - s = str(i) - l.append(s) - u = unicode(s) - l.append(u) + l.append((i,)) l.append(-0.0) l.append(None) l.append(True) l.append(False) - s = "s" - l.append(s) - s = u"s" - l.append(s) + l.append(()) + l.append(tuple([])) for i, a in enumerate(l): for b in l[i:]: @@ -228,21 +234,18 @@ def test_identity_vs_id_str(self): if self.appdirect: skip("cannot run this test as apptest") - import sys - l = range(-10, 10) - for i in range(10): - s = str(i) + l = [] + def add(s, u): l.append(s) l.append(self.unwrap_wrap_str(s)) - u = unicode(s) + l.append(s[:1] + s[1:]) l.append(u) l.append(self.unwrap_wrap_unicode(u)) - s = "s" - l.append(s) - l.append(self.unwrap_wrap_str(s)) - s = u"s" - l.append(s) - l.append(self.unwrap_wrap_unicode(s)) + l.append(u[:1] + u[1:]) + for i in range(3, 18): + add(str(i), unicode(i)) + add("s", u"s") + add("", u"") for i, a in enumerate(l): for b in l[i:]: diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py --- a/pypy/objspace/std/tupleobject.py +++ b/pypy/objspace/std/tupleobject.py @@ -9,7 +9,7 @@ from pypy.interpreter.typedef import TypeDef from pypy.objspace.std.sliceobject import (W_SliceObject, unwrap_start_stop, normalize_simple_slice) -from pypy.objspace.std.util import negate +from pypy.objspace.std.util import negate, IDTAG_SPECIAL, IDTAG_SHIFT from rpython.rlib import jit from rpython.rlib.debug import make_sure_not_resized from rpython.rlib.rarithmetic import intmask @@ -38,6 +38,23 @@ class W_AbstractTupleObject(W_Root): __slots__ = () + def is_w(self, space, w_other): + if not isinstance(w_other, W_AbstractTupleObject): + return False + if self is w_other: + return True + if self.user_overridden_class or w_other.user_overridden_class: + return False + # empty tuples are unique-ified + return 0 == w_other.length() == self.length() + + def immutable_unique_id(self, space): + if self.user_overridden_class or self.length() > 0: + return None + # empty tuple: base value 258 + uid = (258 << IDTAG_SHIFT) | IDTAG_SPECIAL + return space.wrap(uid) + def __repr__(self): """representation for debugging purposes""" reprlist = [repr(w_item) for w_item in self.tolist()] diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -18,6 +18,7 @@ from pypy.objspace.std.basestringtype import basestring_typedef from pypy.objspace.std.formatting import mod_format from pypy.objspace.std.stringmethods import StringMethods +from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT __all__ = ['W_UnicodeObject', 'wrapunicode', 'plain_str2unicode', 'encode_object', 'decode_object', 'unicode_from_object', @@ -52,12 +53,26 @@ return True if self.user_overridden_class or w_other.user_overridden_class: return False - return space.unicode_w(self) is space.unicode_w(w_other) + s1 = space.unicode_w(self) + s2 = space.unicode_w(w_other) + if len(s2) > 1: + return s1 is s2 + else: # strings of len <= 1 are unique-ified + return s1 == s2 def immutable_unique_id(self, space): if self.user_overridden_class: return None - return space.wrap(compute_unique_id(space.unicode_w(self))) + s = space.unicode_w(self) + if len(s) > 1: + uid = compute_unique_id(s) + else: # strings of len <= 1 are unique-ified + if len(s) == 1: + base = ~ord(s[0]) # negative base values + else: + base = 257 # empty unicode string: base value 257 + uid = (base << IDTAG_SHIFT) | IDTAG_SPECIAL + return space.wrap(uid) def str_w(self, space): return space.str_w(space.str(self)) diff --git a/pypy/objspace/std/util.py b/pypy/objspace/std/util.py --- a/pypy/objspace/std/util.py +++ b/pypy/objspace/std/util.py @@ -9,6 +9,11 @@ IDTAG_FLOAT = 5 IDTAG_COMPLEX = 7 IDTAG_METHOD = 9 +IDTAG_SPECIAL = 11 # -1 - (-maxunicode-1): unichar + # 0 - 255: char + # 256: empty string + # 257: empty unicode + # 258: empty tuple CMP_OPS = dict(lt='<', le='<=', eq='==', ne='!=', gt='>', ge='>=') BINARY_BITWISE_OPS = {'and': '&', 'lshift': '<<', 'or': '|', 'rshift': '>>', _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit