Author: Armin Rigo <ar...@tunes.org>
Branch: unicode-utf8-py3
Changeset: r95069:f9566e8f8110
Date: 2018-09-02 11:14 +0200
http://bitbucket.org/pypy/pypy/changeset/f9566e8f8110/

Log:    Fix for id(unicode)

diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -22,7 +22,7 @@
 from pypy.objspace.std.sliceobject import (W_SliceObject,
     unwrap_start_stop, normalize_simple_slice)
 from pypy.objspace.std.stringmethods import StringMethods
-from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT
+from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT, IDTAG_ALT_UID
 
 __all__ = ['W_UnicodeObject', 'encode_object', 'decode_object',
            'unicode_from_object', 'unicode_to_decimal_w']
@@ -68,7 +68,7 @@
             return False
         s1 = space.utf8_w(self)
         s2 = space.utf8_w(w_other)
-        if len(s2) > 2:
+        if self._len() > 1:
             return s1 is s2
         else:            # strings of len <= 1 are unique-ified
             return s1 == s2
@@ -76,14 +76,16 @@
     def immutable_unique_id(self, space):
         if self.user_overridden_class:
             return None
-        s = space.utf8_w(self)
-        if len(s) > 2:
-            uid = compute_unique_id(s)
-        else:            # strings of len <= 1 are unique-ified
-            if len(s) == 1:
-                base = ~ord(s[0])      # negative base values
-            elif len(s) == 2:
-                base = ~((ord(s[1]) << 8) | ord(s[0]))
+        l = self._len()
+        if l > 1:
+            # return the uid plus 2, to make sure we don't get
+            # conflicts with W_BytesObject, whose id() might be
+            # identical
+            uid = compute_unique_id(self._utf8) + IDTAG_ALT_UID
+        else:   # strings of len <= 1 are unique-ified
+            if l == 1:
+                base = rutf8.codepoint_at_pos(self._utf8, 0)
+                base = ~base     # negative base values
             else:
                 base = 257       # empty unicode string: base value 257
             uid = (base << IDTAG_SHIFT) | IDTAG_SPECIAL
diff --git a/pypy/objspace/std/util.py b/pypy/objspace/std/util.py
--- a/pypy/objspace/std/util.py
+++ b/pypy/objspace/std/util.py
@@ -4,6 +4,7 @@
 from pypy.interpreter import gateway
 
 IDTAG_SHIFT   = 4
+IDTAG_ALT_UID = 2     # gives an alternate id() from the same real uid
 
 IDTAG_INT     = 1
 IDTAG_LONG    = 3
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to