[pypy-commit] pypy default: Improve CPython compatibility of 'is'. Now 'x is y' is guaranteed

arigo Mon, 13 Jun 2016 10:30:28 -0700

Author: Armin Rigo <ar...@tunes.org>
Branch: 
Changeset: r85137:418b05f95db5
Date: 2016-06-13 19:07 +0200
http://bitbucket.org/pypy/pypy/changeset/418b05f95db5/


Log:    Improve CPython compatibility of 'is'. Now 'x is y' is guaranteed to
        return True if x == y and x, y are:

         * empty strings; empty unicode strings

         * single-character (unicode) strings

         * empty tuples

        This is in addition to all other special cases (ints, etc.)

diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -18,6 +18,7 @@
 from pypy.objspace.std.unicodeobject import (
     decode_object, unicode_from_encoded_object,
     unicode_from_string, getdefaultencoding)
+from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT
 
 
 class W_AbstractBytesObject(W_Root):
@@ -30,12 +31,26 @@
             return True
         if self.user_overridden_class or w_other.user_overridden_class:
             return False
-        return space.str_w(self) is space.str_w(w_other)
+        s1 = space.str_w(self)
+        s2 = space.str_w(w_other)
+        if len(s2) > 1:
+            return s1 is s2
+        else:            # strings of len <= 1 are unique-ified
+            return s1 == s2
 
     def immutable_unique_id(self, space):
         if self.user_overridden_class:
             return None
-        return space.wrap(compute_unique_id(space.str_w(self)))
+        s = space.str_w(self)
+        if len(s) > 1:
+            uid = compute_unique_id(s)
+        else:            # strings of len <= 1 are unique-ified
+            if len(s) == 1:
+                base = ord(s[0])     # base values 0-255
+            else:
+                base = 256           # empty string: base value 256
+            uid = (base << IDTAG_SHIFT) | IDTAG_SPECIAL
+        return space.wrap(uid)
 
     def unicode_w(self, space):
         # Use the default encoding.
diff --git a/pypy/objspace/std/test/test_obj.py 
b/pypy/objspace/std/test/test_obj.py
--- a/pypy/objspace/std/test/test_obj.py
+++ b/pypy/objspace/std/test/test_obj.py
@@ -186,17 +186,28 @@
     def test_id_on_strs(self):
         if self.appdirect:
             skip("cannot run this test as apptest")
-        u = u"a"
-        assert id(self.unwrap_wrap_unicode(u)) == id(u)
-        s = "a"
-        assert id(self.unwrap_wrap_str(s)) == id(s)
+        for u in [u"", u"a", u"aa"]:
+            assert id(self.unwrap_wrap_unicode(u)) == id(u)
+            s = str(u)
+            assert id(self.unwrap_wrap_str(s)) == id(s)
+        #
+        assert id('') == (256 << 4) | 11     # always
+        assert id(u'') == (257 << 4) | 11
+        assert id('a') == (ord('a') << 4) | 11
+        assert id(u'\u1234') == ((~0x1234) << 4) | 11
+
+    def test_id_of_tuples(self):
+        l = []
+        x = (l,)
+        assert id(x) != id((l,))          # no caching at all
+        if self.appdirect:
+            skip("cannot run this test as apptest")
+        assert id(()) == (258 << 4) | 11     # always
 
     def test_identity_vs_id_primitives(self):
-        if self.cpython_apptest:
-            skip("cpython behaves differently")
         import sys
-        l = range(-10, 10)
-        for i in range(10):
+        l = range(-10, 10, 2)
+        for i in [0, 1, 3]:
             l.append(float(i))
             l.append(i + 0.1)
             l.append(long(i))
@@ -206,18 +217,13 @@
             l.append(i - 1j)
             l.append(1 + i * 1j)
             l.append(1 - i * 1j)
-            s = str(i)
-            l.append(s)
-            u = unicode(s)
-            l.append(u)
+            l.append((i,))
         l.append(-0.0)
         l.append(None)
         l.append(True)
         l.append(False)
-        s = "s"
-        l.append(s)
-        s = u"s"
-        l.append(s)
+        l.append(())
+        l.append(tuple([]))
 
         for i, a in enumerate(l):
             for b in l[i:]:
@@ -228,21 +234,18 @@
     def test_identity_vs_id_str(self):
         if self.appdirect:
             skip("cannot run this test as apptest")
-        import sys
-        l = range(-10, 10)
-        for i in range(10):
-            s = str(i)
+        l = []
+        def add(s, u):
             l.append(s)
             l.append(self.unwrap_wrap_str(s))
-            u = unicode(s)
+            l.append(s[:1] + s[1:])
             l.append(u)
             l.append(self.unwrap_wrap_unicode(u))
-        s = "s"
-        l.append(s)
-        l.append(self.unwrap_wrap_str(s))
-        s = u"s"
-        l.append(s)
-        l.append(self.unwrap_wrap_unicode(s))
+            l.append(u[:1] + u[1:])
+        for i in range(3, 18):
+            add(str(i), unicode(i))
+        add("s", u"s")
+        add("", u"")
 
         for i, a in enumerate(l):
             for b in l[i:]:
diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py
--- a/pypy/objspace/std/tupleobject.py
+++ b/pypy/objspace/std/tupleobject.py
@@ -9,7 +9,7 @@
 from pypy.interpreter.typedef import TypeDef
 from pypy.objspace.std.sliceobject import (W_SliceObject, unwrap_start_stop,
     normalize_simple_slice)
-from pypy.objspace.std.util import negate
+from pypy.objspace.std.util import negate, IDTAG_SPECIAL, IDTAG_SHIFT
 from rpython.rlib import jit
 from rpython.rlib.debug import make_sure_not_resized
 from rpython.rlib.rarithmetic import intmask
@@ -38,6 +38,23 @@
 class W_AbstractTupleObject(W_Root):
     __slots__ = ()
 
+    def is_w(self, space, w_other):
+        if not isinstance(w_other, W_AbstractTupleObject):
+            return False
+        if self is w_other:
+            return True
+        if self.user_overridden_class or w_other.user_overridden_class:
+            return False
+        # empty tuples are unique-ified
+        return 0 == w_other.length() == self.length()
+
+    def immutable_unique_id(self, space):
+        if self.user_overridden_class or self.length() > 0:
+            return None
+        # empty tuple: base value 258
+        uid = (258 << IDTAG_SHIFT) | IDTAG_SPECIAL
+        return space.wrap(uid)
+
     def __repr__(self):
         """representation for debugging purposes"""
         reprlist = [repr(w_item) for w_item in self.tolist()]
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -18,6 +18,7 @@
 from pypy.objspace.std.basestringtype import basestring_typedef
 from pypy.objspace.std.formatting import mod_format
 from pypy.objspace.std.stringmethods import StringMethods
+from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT
 
 __all__ = ['W_UnicodeObject', 'wrapunicode', 'plain_str2unicode',
            'encode_object', 'decode_object', 'unicode_from_object',
@@ -52,12 +53,26 @@
             return True
         if self.user_overridden_class or w_other.user_overridden_class:
             return False
-        return space.unicode_w(self) is space.unicode_w(w_other)
+        s1 = space.unicode_w(self)
+        s2 = space.unicode_w(w_other)
+        if len(s2) > 1:
+            return s1 is s2
+        else:            # strings of len <= 1 are unique-ified
+            return s1 == s2
 
     def immutable_unique_id(self, space):
         if self.user_overridden_class:
             return None
-        return space.wrap(compute_unique_id(space.unicode_w(self)))
+        s = space.unicode_w(self)
+        if len(s) > 1:
+            uid = compute_unique_id(s)
+        else:            # strings of len <= 1 are unique-ified
+            if len(s) == 1:
+                base = ~ord(s[0])      # negative base values
+            else:
+                base = 257       # empty unicode string: base value 257
+            uid = (base << IDTAG_SHIFT) | IDTAG_SPECIAL
+        return space.wrap(uid)
 
     def str_w(self, space):
         return space.str_w(space.str(self))
diff --git a/pypy/objspace/std/util.py b/pypy/objspace/std/util.py
--- a/pypy/objspace/std/util.py
+++ b/pypy/objspace/std/util.py
@@ -9,6 +9,11 @@
 IDTAG_FLOAT   = 5
 IDTAG_COMPLEX = 7
 IDTAG_METHOD  = 9
+IDTAG_SPECIAL = 11    # -1 - (-maxunicode-1): unichar
+                      # 0 - 255: char
+                      # 256: empty string
+                      # 257: empty unicode
+                      # 258: empty tuple
 
 CMP_OPS = dict(lt='<', le='<=', eq='==', ne='!=', gt='>', ge='>=')
 BINARY_BITWISE_OPS = {'and': '&', 'lshift': '<<', 'or': '|', 'rshift': '>>',
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy default: Improve CPython compatibility of 'is'. Now 'x is y' is guaranteed

Reply via email to