[pypy-commit] pypy rpython-hash: fix a bug found by the test (and also fix and improve the test itself)

arigo Sun, 29 Jan 2017 13:52:45 -0800

Author: Armin Rigo <[email protected]>
Branch: rpython-hash
Changeset: r89830:bf4314421560
Date: 2017-01-29 22:51 +0100
http://bitbucket.org/pypy/pypy/changeset/bf4314421560/


Log:    fix a bug found by the test (and also fix and improve the test
        itself)

diff --git a/rpython/rlib/rsiphash.py b/rpython/rlib/rsiphash.py
--- a/rpython/rlib/rsiphash.py
+++ b/rpython/rlib/rsiphash.py
@@ -74,7 +74,7 @@
                                           rffi.cast(rffi.ULONG, -1))):
             os.write(2,
                 "PYTHONHASHSEED must be \"random\" or an integer "
-                "in range [0; 4294967295]")
+                "in range [0; 4294967295]\n")
             os._exit(1)
         if not seed:
             # disable the randomized hash
@@ -149,7 +149,9 @@
         # NOTE: a latin-1 unicode string must have the same hash as the
         # corresponding byte string.  If the unicode is all within
         # 0-255, then we need to allocate a byte buffer and copy the
-        # latin-1 encoding in it manually.
+        # latin-1 encoding in it manually.  Note also that we give a
+        # different hash result than CPython on ucs4 platforms, for
+        # unicode strings where CPython uses 2 bytes per character.
         for i in range(length):
             if ord(ll_s.chars[i]) > 0xFF:
                 addr = rstr._get_raw_buf_unicode(rstr.UNICODE, ll_s, 0)
diff --git a/rpython/rlib/test/test_rsiphash.py 
b/rpython/rlib/test/test_rsiphash.py
--- a/rpython/rlib/test/test_rsiphash.py
+++ b/rpython/rlib/test/test_rsiphash.py
@@ -74,38 +74,72 @@
 
 def test_translated():
     d1 = {"foo": 123}
-    d2 = {u"foo": 456, u"\u1234": 789}
+    d2 = {u"foo": 456, u"\u1234\u5678": 789}
+    class G:
+        pass
+    g = G()
+    g.v1 = d1.copy()
+    g.v2 = d2.copy()
 
-    def entrypoint():
+    def fetch(n):
+        if n == 0: return d1.get("foo", -1)
+        if n == 1: return g.v1.get("foo", -1)
+        if n == 2: return compute_hash("foo")
+        if n == 3: return d2.get(u"foo", -1)
+        if n == 4: return g.v2.get(u"foo", -1)
+        if n == 5: return compute_hash(u"foo")
+        if n == 6: return d2.get(u"\u1234\u5678", -1)
+        if n == 7: return g.v2.get(u"\u1234\u5678", -1)
+        if n == 8: return compute_hash(u"\u1234\u5678")
+        assert 0
+
+    def entrypoint(n):
         enable_siphash24()
-        return '%d %d %d %d %d %d' % (
-            d1.get("foo", -1),     compute_hash("bar"),
-            d2.get(u"foo", -1),    compute_hash(u"foo"),
-            d2.get(u"\u1234", -1), compute_hash(u"\u1234"))
+        g.v1["bar"] = -2
+        g.v2[u"bar"] = -2
+        if n >= 0:    # get items one by one, because otherwise it may
+                      # be the case that one line influences the next
+            return str(fetch(n))
+        else:
+            # ...except in random mode, because we want all results
+            # to be computed with the same seed
+            return ' '.join([str(fetch(n)) for n in range(9)])
 
-    fn = compile(entrypoint, [])
+    fn = compile(entrypoint, [int])
+
+    def getall():
+        return [int(fn(i)) for i in range(9)]
 
     old_val = os.environ.get('PYTHONHASHSEED', None)
     try:
         os.environ['PYTHONHASHSEED'] = '0'
-        s1 = fn()
-        assert map(int, s1.split()) == [
-            123, intmask(15988776847138518036),
-            456, intmask(15988776847138518036),
-            789, intmask(16003099094427356855)]
+        s1 = getall()
+        assert s1[:8] == [
+            123, 123, intmask(15988776847138518036),
+            456, 456, intmask(15988776847138518036),
+            789, 789]
+        assert s1[8] in [intmask(17593683438421985039),    # ucs2 mode
+                         intmask(94801584261658677)]       # ucs4 mode
 
         os.environ['PYTHONHASHSEED'] = '3987654321'
-        s1 = fn()
-        assert map(int, s1.split()) == [
-            123, intmask(5890804383681474441),
-            456, intmask(5890804383681474441),
-            789, intmask(10331001347733193222)]
+        s1 = getall()
+        assert s1[:8] == [
+            123, 123, intmask(5890804383681474441),
+            456, 456, intmask(5890804383681474441),
+            789, 789]
+        assert s1[8] in [intmask(4192582507672183374),     # ucs2 mode
+                         intmask(7179255293164649778)]     # ucs4 mode
 
         for env in ['', 'random']:
             os.environ['PYTHONHASHSEED'] = env
-            s1 = fn()
-            s2 = fn()
-            assert s1 != s2
+            s1 = map(int, fn(-1).split())
+            s2 = map(int, fn(-1).split())
+            assert s1[0:2]+s1[3:5]+s1[6:8] == [123, 123, 456, 456, 789, 789]
+            assert s1[2] == s1[5]
+            assert s2[0:2]+s2[3:5]+s2[6:8] == [123, 123, 456, 456, 789, 789]
+            assert s2[2] == s2[5]
+            #
+            assert len(set([s1[2], s2[2], s1[8], s2[8]])) == 4
 
     finally:
         if old_val is None:
diff --git a/rpython/rtyper/lltypesystem/rordereddict.py 
b/rpython/rtyper/lltypesystem/rordereddict.py
--- a/rpython/rtyper/lltypesystem/rordereddict.py
+++ b/rpython/rtyper/lltypesystem/rordereddict.py
@@ -888,13 +888,18 @@
     assert d.num_live_items == d.num_ever_used_items
     assert not d.indexes
     #
-    # recompute all hashes, if they are stored in d.entries
+    # recompute all hashes.  Needed if they are stored in d.entries,
+    # but do it anyway: otherwise, e.g. a string-keyed dictionary
+    # won't have a fasthash on its strings if their hash is still
+    # uncomputed.
     ENTRY = lltype.typeOf(d.entries).TO.OF
-    if hasattr(ENTRY, 'f_hash'):
-        for i in range(d.num_ever_used_items):
-            assert d.entries.valid(i)
-            d_entry = d.entries[i]
-            d_entry.f_hash = d.keyhash(d_entry.key)
+    for i in range(d.num_ever_used_items):
+        assert d.entries.valid(i)
+        d_entry = d.entries[i]
+        h = d.keyhash(d_entry.key)
+        if hasattr(ENTRY, 'f_hash'):
+            d_entry.f_hash = h
+        #else: purely for the side-effect it can have on d_entry.key
     #
     # Use the smallest acceptable size for ll_dict_reindex
     new_size = DICT_INITSIZE
diff --git a/rpython/rtyper/lltypesystem/rstr.py 
b/rpython/rtyper/lltypesystem/rstr.py
--- a/rpython/rtyper/lltypesystem/rstr.py
+++ b/rpython/rtyper/lltypesystem/rstr.py
@@ -3,7 +3,7 @@
 from rpython.annotator import model as annmodel
 from rpython.rlib import jit, types, objectmodel
 from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated,
-    ll_hash_string, keepalive_until_here, specialize, enforceargs)
+    ll_hash_string, keepalive_until_here, specialize, enforceargs, dont_inline)
 from rpython.rlib.signature import signature
 from rpython.rlib.rarithmetic import ovfcheck
 from rpython.rtyper.error import TyperError
@@ -383,6 +383,7 @@
             return 0
 
     @staticmethod
+    @dont_inline
     def _ll_strhash(s):
         # unlike CPython, there is no reason to avoid to return -1
         # but our malloc initializes the memory to zero, so we use zero as the
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy rpython-hash: fix a bug found by the test (and also fix and improve the test itself)

Reply via email to