Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r89856:109091b4f12c Date: 2017-01-31 17:36 +0100 http://bitbucket.org/pypy/pypy/changeset/109091b4f12c/
Log: hg merge rpython-hash Support for choosing a different hash for strings and unicodes, with "siphash24" having a runtime randomized seed (the same as CPython 3.5). diff too long, truncating to 2000 out of 2907 lines diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py --- a/pypy/config/pypyoption.py +++ b/pypy/config/pypyoption.py @@ -196,6 +196,13 @@ default=False, requires=[("objspace.usemodules.cpyext", False)]), + ChoiceOption("hash", + "The hash function to use for strings: fnv from CPython 2.7" + " or siphash24 from CPython >= 3.4", + ["fnv", "siphash24"], + default="fnv", + cmdline="--hash"), + OptionDescription("std", "Standard Object Space Options", [ BoolOption("withtproxy", "support transparent proxies", default=True), diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py --- a/pypy/goal/targetpypystandalone.py +++ b/pypy/goal/targetpypystandalone.py @@ -35,6 +35,7 @@ w_run_toplevel = space.getitem(w_dict, space.wrap('run_toplevel')) w_initstdio = space.getitem(w_dict, space.wrap('initstdio')) withjit = space.config.objspace.usemodules.pypyjit + hashfunc = space.config.objspace.hash else: w_initstdio = space.appexec([], """(): return lambda unbuffered: None @@ -45,6 +46,10 @@ from rpython.jit.backend.hlinfo import highleveljitinfo highleveljitinfo.sys_executable = argv[0] + if hashfunc == "siphash24": + from rpython.rlib import rsiphash + rsiphash.enable_siphash24() + #debug("entry point starting") #for arg in argv: # debug(" argv -> " + arg) diff --git a/pypy/module/_cffi_backend/newtype.py b/pypy/module/_cffi_backend/newtype.py --- a/pypy/module/_cffi_backend/newtype.py +++ b/pypy/module/_cffi_backend/newtype.py @@ -23,13 +23,34 @@ # ____________________________________________________________ class UniqueCache: + for_testing = False # set to True on the class level in test_c.py + def __init__(self, space): self.ctvoid = None # Cache for the 'void' type self.ctvoidp = None # Cache for the 'void *' type self.ctchara = None # Cache for the 'char[]' type self.primitives = {} # Cache for {name: primitive_type} self.functions = [] # see _new_function_type() - self.for_testing = False + self.functions_packed = None # only across translation + + def _cleanup_(self): + import gc + assert self.functions_packed is None + # Note: a full PyPy translation may still have + # 'self.functions == []' at this point, possibly depending + # on details. Code tested directly in test_ffi_obj + gc.collect() + funcs = [] + for weakdict in self.functions: + funcs += weakdict._dict.values() + del self.functions[:] + self.functions_packed = funcs if len(funcs) > 0 else None + + def unpack_functions(self): + for fct in self.functions_packed: + _record_function_type(self, fct) + self.functions_packed = None + def _clean_cache(space): "NOT_RPYTHON" @@ -622,7 +643,7 @@ for w_arg in fargs: y = compute_identity_hash(w_arg) x = intmask((1000003 * x) ^ y) - x ^= (ellipsis - abi) + x ^= ellipsis + 2 * abi if unique_cache.for_testing: # constant-folded to False in translation; x &= 3 # but for test, keep only 2 bits of hash return x @@ -646,6 +667,8 @@ # one such dict, but in case of hash collision, there might be # more. unique_cache = space.fromcache(UniqueCache) + if unique_cache.functions_packed is not None: + unique_cache.unpack_functions() func_hash = _func_key_hash(unique_cache, fargs, fresult, ellipsis, abi) for weakdict in unique_cache.functions: ctype = weakdict.get(func_hash) @@ -674,13 +697,18 @@ # fct = ctypefunc.W_CTypeFunc(space, fargs, fresult, ellipsis, abi) unique_cache = space.fromcache(UniqueCache) - func_hash = _func_key_hash(unique_cache, fargs, fresult, ellipsis, abi) + _record_function_type(unique_cache, fct) + return fct + +def _record_function_type(unique_cache, fct): + from pypy.module._cffi_backend import ctypefunc + # + func_hash = _func_key_hash(unique_cache, fct.fargs, fct.ctitem, + fct.ellipsis, fct.abi) for weakdict in unique_cache.functions: if weakdict.get(func_hash) is None: - weakdict.set(func_hash, fct) break else: weakdict = rweakref.RWeakValueDictionary(int, ctypefunc.W_CTypeFunc) unique_cache.functions.append(weakdict) - weakdict.set(func_hash, fct) - return fct + weakdict.set(func_hash, fct) diff --git a/pypy/module/_cffi_backend/test/test_c.py b/pypy/module/_cffi_backend/test/test_c.py --- a/pypy/module/_cffi_backend/test/test_c.py +++ b/pypy/module/_cffi_backend/test/test_c.py @@ -36,6 +36,7 @@ def setup_class(cls): testfuncs_w = [] keepalive_funcs = [] + UniqueCache.for_testing = True def find_and_load_library_for_test(space, w_name, w_is_global=None): if w_is_global is None: @@ -86,11 +87,12 @@ _all_test_c.find_and_load_library = func _all_test_c._testfunc = testfunc """) - UniqueCache.for_testing = True def teardown_method(self, method): + _clean_cache(self.space) + + def teardown_class(cls): UniqueCache.for_testing = False - _clean_cache(self.space) all_names = ', '.join(Module.interpleveldefs.keys()) diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py --- a/pypy/module/_cffi_backend/test/test_ffi_obj.py +++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py @@ -1,5 +1,23 @@ +from pypy.module._cffi_backend import newtype from pypy.module._cffi_backend.newtype import _clean_cache + +class TestFFIObj: + spaceconfig = dict(usemodules=('_cffi_backend', 'array')) + + def teardown_method(self, meth): + _clean_cache(self.space) + + def test_new_function_type_during_translation(self): + space = self.space + BInt = newtype.new_primitive_type(space, "int") + BFunc = newtype.new_function_type(space, space.wrap([BInt]), BInt) + assert BFunc is newtype.new_function_type(space,space.wrap([BInt]),BInt) + unique_cache = space.fromcache(newtype.UniqueCache) + unique_cache._cleanup_() + assert BFunc is newtype.new_function_type(space,space.wrap([BInt]),BInt) + + class AppTestFFIObj: spaceconfig = dict(usemodules=('_cffi_backend', 'array')) diff --git a/pypy/module/_weakref/interp__weakref.py b/pypy/module/_weakref/interp__weakref.py --- a/pypy/module/_weakref/interp__weakref.py +++ b/pypy/module/_weakref/interp__weakref.py @@ -193,6 +193,15 @@ W_WeakrefBase.__init__(self, space, w_obj, w_callable) self.w_hash = None + def _cleanup_(self): + # When a prebuilt weakref is frozen inside a translation, if + # this weakref has got an already-cached w_hash, then throw it + # away. That's because the hash value will change after + # translation. It will be recomputed the first time we ask for + # it. Note that such a frozen weakref, if not dead, will point + # to a frozen object, so it will never die. + self.w_hash = None + def descr__init__weakref(self, space, w_obj, w_callable=None, __args__=None): if __args__.arguments_w: diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -1324,6 +1324,12 @@ raise wrap_oserror(space, e) return space.wrap(res) +class SigCheck: + pass +_sigcheck = SigCheck() +def _signal_checker(): + _sigcheck.space.getexecutioncontext().checksignals() + @unwrap_spec(n=int) def urandom(space, n): """urandom(n) -> str @@ -1331,9 +1337,12 @@ Return a string of n random bytes suitable for cryptographic use. """ context = get(space).random_context - signal_checker = space.getexecutioncontext().checksignals try: - return space.wrap(rurandom.urandom(context, n, signal_checker)) + # urandom() takes a final argument that should be a regular function, + # not a bound method like 'getexecutioncontext().checksignals'. + # Otherwise, we can't use it from several independent places. + _sigcheck.space = space + return space.wrap(rurandom.urandom(context, n, _signal_checker)) except OSError as e: raise wrap_oserror(space, e) diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py --- a/pypy/objspace/std/setobject.py +++ b/pypy/objspace/std/setobject.py @@ -576,6 +576,11 @@ class W_FrozensetObject(W_BaseSetObject): hash = 0 + def _cleanup_(self): + # in case there are frozenset objects existing during + # translation, make sure we don't translate a cached hash + self.hash = 0 + def is_w(self, space, w_other): if not isinstance(w_other, W_FrozensetObject): return False diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -287,7 +287,7 @@ for ek, ev in items: result.dictdef.generalize_key(self.immutablevalue(ek)) result.dictdef.generalize_value(self.immutablevalue(ev)) - result.dictdef.seen_prebuilt_key(ek) + #dictdef.seen_prebuilt_key(ek)---not needed any more seen_elements = len(items) # if the dictionary grew during the iteration, # start over again diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -115,13 +115,5 @@ def generalize_value(self, s_value): self.dictvalue.generalize(s_value) - def seen_prebuilt_key(self, x): - # In case we are an r_dict, we don't ask for the hash ourselves. - # Note that if the custom hashing function ends up asking for - # the hash of x, then it must use compute_hash() itself, so it - # works out. - if not self.dictkey.custom_eq_hash: - compute_hash(x) - def __repr__(self): return '<{%r: %r}>' % (self.dictkey.s_value, self.dictvalue.s_value) diff --git a/rpython/annotator/test/test_annrpython.py b/rpython/annotator/test/test_annrpython.py --- a/rpython/annotator/test/test_annrpython.py +++ b/rpython/annotator/test/test_annrpython.py @@ -3704,25 +3704,6 @@ s = a.build_types(f, [int]) assert s.const == 0 - def test_hash_sideeffect(self): - class X: - pass - x1 = X() - x2 = X() - x3 = X() - d = {(2, x1): 5, (3, x2): 7} - def f(n, m): - if m == 1: x = x1 - elif m == 2: x = x2 - else: x = x3 - return d[n, x] - a = self.RPythonAnnotator() - s = a.build_types(f, [int, int]) - assert s.knowntype == int - assert hasattr(x1, '__precomputed_identity_hash') - assert hasattr(x2, '__precomputed_identity_hash') - assert not hasattr(x3, '__precomputed_identity_hash') - def test_contains_of_empty_dict(self): class A(object): def meth(self): diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py --- a/rpython/config/translationoption.py +++ b/rpython/config/translationoption.py @@ -201,10 +201,6 @@ StrOption("icon", "Path to the (Windows) icon to use for the executable"), StrOption("libname", "Windows: name and possibly location of the lib file to create"), - ChoiceOption("hash", - "The hash to use for strings", - ["rpython", "siphash24"], - default="rpython", cmdline="--hash"), OptionDescription("backendopt", "Backend Optimization Options", [ # control inlining @@ -394,12 +390,6 @@ if sys.platform == "darwin" or sys.platform =="win32": raise ConfigError("'asmgcc' not supported on this platform") -def apply_extra_settings(config): - # make the setting of config.hash definitive - from rpython.rlib.objectmodel import set_hash_algorithm - config.translation.hash = config.translation.hash - set_hash_algorithm(config.translation.hash) - # ---------------------------------------------------------------- def set_platform(config): diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -117,9 +117,7 @@ # The following flag is set on nursery objects of which we asked the id # or the identityhash. It means that a space of the size of the object -# has already been allocated in the nonmovable part. The same flag is -# abused to mark prebuilt objects whose hash has been taken during -# translation and is statically recorded. +# has already been allocated in the nonmovable part. GCFLAG_HAS_SHADOW = first_gcflag << 3 # The following flag is set temporarily on some objects during a major @@ -208,10 +206,6 @@ # by GCFLAG_xxx above. HDR = lltype.Struct('header', ('tid', lltype.Signed)) typeid_is_in_field = 'tid' - withhash_flag_is_in_field = 'tid', GCFLAG_HAS_SHADOW - # ^^^ prebuilt objects may have the flag GCFLAG_HAS_SHADOW; - # then they are one word longer, the extra word storing the hash. - # During a minor collection, the objects in the nursery that are # moved outside are changed in-place: their header is replaced with @@ -2640,40 +2634,22 @@ return shadow _find_shadow._dont_inline_ = True - @specialize.arg(2) - def id_or_identityhash(self, gcobj, is_hash): + def id_or_identityhash(self, gcobj): """Implement the common logic of id() and identityhash() of an object, given as a GCREF. """ obj = llmemory.cast_ptr_to_adr(gcobj) - # if self.is_valid_gc_object(obj): if self.is_in_nursery(obj): obj = self._find_shadow(obj) - elif is_hash: - if self.header(obj).tid & GCFLAG_HAS_SHADOW: - # - # For identityhash(), we need a special case for some - # prebuilt objects: their hash must be the same before - # and after translation. It is stored as an extra word - # after the object. But we cannot use it for id() - # because the stored value might clash with a real one. - size = self.get_size(obj) - i = (obj + size).signed[0] - # Important: the returned value is not mangle_hash()ed! - return i - # - i = llmemory.cast_adr_to_int(obj) - if is_hash: - i = mangle_hash(i) - return i + return llmemory.cast_adr_to_int(obj) id_or_identityhash._always_inline_ = True def id(self, gcobj): - return self.id_or_identityhash(gcobj, False) + return self.id_or_identityhash(gcobj) def identityhash(self, gcobj): - return self.id_or_identityhash(gcobj, True) + return mangle_hash(self.id_or_identityhash(gcobj)) # ---------- # Finalizers diff --git a/rpython/memory/gc/minimark.py b/rpython/memory/gc/minimark.py --- a/rpython/memory/gc/minimark.py +++ b/rpython/memory/gc/minimark.py @@ -104,9 +104,7 @@ # The following flag is set on nursery objects of which we asked the id # or the identityhash. It means that a space of the size of the object -# has already been allocated in the nonmovable part. The same flag is -# abused to mark prebuilt objects whose hash has been taken during -# translation and is statically recorded. +# has already been allocated in the nonmovable part. GCFLAG_HAS_SHADOW = first_gcflag << 3 # The following flag is set temporarily on some objects during a major @@ -149,9 +147,6 @@ # by GCFLAG_xxx above. HDR = lltype.Struct('header', ('tid', lltype.Signed)) typeid_is_in_field = 'tid' - withhash_flag_is_in_field = 'tid', GCFLAG_HAS_SHADOW - # ^^^ prebuilt objects may have the flag GCFLAG_HAS_SHADOW; - # then they are one word longer, the extra word storing the hash. _ADDRARRAY = lltype.Array(llmemory.Address, hints={'nolength': True}) @@ -1868,40 +1863,22 @@ return shadow _find_shadow._dont_inline_ = True - @specialize.arg(2) - def id_or_identityhash(self, gcobj, is_hash): + def id_or_identityhash(self, gcobj): """Implement the common logic of id() and identityhash() of an object, given as a GCREF. """ obj = llmemory.cast_ptr_to_adr(gcobj) - # if self.is_valid_gc_object(obj): if self.is_in_nursery(obj): obj = self._find_shadow(obj) - elif is_hash: - if self.header(obj).tid & GCFLAG_HAS_SHADOW: - # - # For identityhash(), we need a special case for some - # prebuilt objects: their hash must be the same before - # and after translation. It is stored as an extra word - # after the object. But we cannot use it for id() - # because the stored value might clash with a real one. - size = self.get_size(obj) - i = (obj + size).signed[0] - # Important: the returned value is not mangle_hash()ed! - return i - # - i = llmemory.cast_adr_to_int(obj) - if is_hash: - i = mangle_hash(i) - return i + return llmemory.cast_adr_to_int(obj) id_or_identityhash._always_inline_ = True def id(self, gcobj): - return self.id_or_identityhash(gcobj, False) + return self.id_or_identityhash(gcobj) def identityhash(self, gcobj): - return self.id_or_identityhash(gcobj, True) + return mangle_hash(self.id_or_identityhash(gcobj)) # ---------- # Finalizers diff --git a/rpython/memory/gc/semispace.py b/rpython/memory/gc/semispace.py --- a/rpython/memory/gc/semispace.py +++ b/rpython/memory/gc/semispace.py @@ -48,9 +48,6 @@ HDR = lltype.Struct('header', ('tid', lltype.Signed)) # XXX or rffi.INT? typeid_is_in_field = 'tid' - withhash_flag_is_in_field = 'tid', _GCFLAG_HASH_BASE * 0x2 - # ^^^ prebuilt objects either have GC_HASH_TAKEN_ADDR or they - # have GC_HASH_HASFIELD (and then they are one word longer). FORWARDSTUB = lltype.GcStruct('forwarding_stub', ('forw', llmemory.Address)) FORWARDSTUBPTR = lltype.Ptr(FORWARDSTUB) diff --git a/rpython/memory/gctransform/boehm.py b/rpython/memory/gctransform/boehm.py --- a/rpython/memory/gctransform/boehm.py +++ b/rpython/memory/gctransform/boehm.py @@ -11,7 +11,7 @@ class BoehmGCTransformer(GCTransformer): malloc_zero_filled = True FINALIZER_PTR = lltype.Ptr(lltype.FuncType([llmemory.Address], lltype.Void)) - HDR = lltype.Struct("header", ("hash", lltype.Signed)) + NO_HEADER = True def __init__(self, translator, inline=False): super(BoehmGCTransformer, self).__init__(translator, inline=inline) @@ -29,13 +29,8 @@ ll_malloc_varsize_no_length = mh.ll_malloc_varsize_no_length ll_malloc_varsize = mh.ll_malloc_varsize - HDRPTR = lltype.Ptr(self.HDR) - def ll_identityhash(addr): - obj = llmemory.cast_adr_to_ptr(addr, HDRPTR) - h = obj.hash - if h == 0: - obj.hash = h = ~llmemory.cast_adr_to_int(addr) + h = ~llmemory.cast_adr_to_int(addr) return h if self.translator: @@ -194,11 +189,6 @@ resulttype = lltype.Signed) hop.genop('int_invert', [v_int], resultvar=hop.spaceop.result) - def gcheader_initdata(self, obj): - hdr = lltype.malloc(self.HDR, immortal=True) - hdr.hash = lltype.identityhash_nocache(obj._as_ptr()) - return hdr._obj - ########## weakrefs ########## # Boehm: weakref objects are small structures containing only a Boehm diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -610,25 +610,6 @@ def special_funcptr_for_type(self, TYPE): return self.layoutbuilder.special_funcptr_for_type(TYPE) - def gc_header_for(self, obj, needs_hash=False): - hdr = self.gcdata.gc.gcheaderbuilder.header_of_object(obj) - withhash, flag = self.gcdata.gc.withhash_flag_is_in_field - x = getattr(hdr, withhash) - TYPE = lltype.typeOf(x) - x = lltype.cast_primitive(lltype.Signed, x) - if needs_hash: - x |= flag # set the flag in the header - else: - x &= ~flag # clear the flag in the header - x = lltype.cast_primitive(TYPE, x) - setattr(hdr, withhash, x) - return hdr - - def get_hash_offset(self, T): - type_id = self.get_type_id(T) - assert not self.gcdata.q_is_varsize(type_id) - return self.gcdata.q_fixed_size(type_id) - def finish_tables(self): group = self.layoutbuilder.close_table() log.info("assigned %s typeids" % (len(group.members), )) @@ -1514,22 +1495,9 @@ def gcheader_initdata(self, obj): o = lltype.top_container(obj) - needs_hash = self.get_prebuilt_hash(o) is not None - hdr = self.gc_header_for(o, needs_hash) + hdr = self.gcdata.gc.gcheaderbuilder.header_of_object(o) return hdr._obj - def get_prebuilt_hash(self, obj): - # for prebuilt objects that need to have their hash stored and - # restored. Note that only structures that are StructNodes all - # the way have their hash stored (and not e.g. structs with var- - # sized arrays at the end). 'obj' must be the top_container. - TYPE = lltype.typeOf(obj) - if not isinstance(TYPE, lltype.GcStruct): - return None - if TYPE._is_varsize(): - return None - return getattr(obj, '_hash_cache_', None) - def get_finalizer_queue_index(self, hop): fq_tag = hop.spaceop.args[0].value assert 'FinalizerQueue TAG' in fq_tag.expr diff --git a/rpython/memory/gctransform/refcounting.py b/rpython/memory/gctransform/refcounting.py --- a/rpython/memory/gctransform/refcounting.py +++ b/rpython/memory/gctransform/refcounting.py @@ -18,8 +18,7 @@ class RefcountingGCTransformer(GCTransformer): malloc_zero_filled = True - HDR = lltype.Struct("header", ("refcount", lltype.Signed), - ("hash", lltype.Signed)) + HDR = lltype.Struct("header", ("refcount", lltype.Signed)) def __init__(self, translator): super(RefcountingGCTransformer, self).__init__(translator, inline=True) @@ -77,10 +76,7 @@ ll_malloc_varsize = mh.ll_malloc_varsize def ll_identityhash(addr): - obj = llmemory.cast_adr_to_ptr(addr, HDRPTR) - h = obj.hash - if h == 0: - obj.hash = h = llmemory.cast_adr_to_int(addr) + h = llmemory.cast_adr_to_int(addr) return h if self.translator: @@ -178,7 +174,6 @@ if not self.gcheaderbuilder.get_header(p): hdr = self.gcheaderbuilder.new_header(p) hdr.refcount = sys.maxint // 2 - hdr.hash = lltype.identityhash_nocache(p) def static_deallocation_funcptr_for_type(self, TYPE): if TYPE in self.static_deallocator_funcptrs: diff --git a/rpython/memory/gctransform/transform.py b/rpython/memory/gctransform/transform.py --- a/rpython/memory/gctransform/transform.py +++ b/rpython/memory/gctransform/transform.py @@ -374,9 +374,6 @@ return hop.cast_result(rmodel.inputconst(lltype.Ptr(ARRAY_TYPEID_MAP), lltype.nullptr(ARRAY_TYPEID_MAP))) - def get_prebuilt_hash(self, obj): - return None - class MinimalGCTransformer(BaseGCTransformer): def __init__(self, parenttransformer): diff --git a/rpython/rlib/_rweakvaldict.py b/rpython/rlib/_rweakvaldict.py --- a/rpython/rlib/_rweakvaldict.py +++ b/rpython/rlib/_rweakvaldict.py @@ -76,12 +76,16 @@ bk = self.rtyper.annotator.bookkeeper classdef = bk.getuniqueclassdef(weakdict._valueclass) r_value = getinstancerepr(self.rtyper, classdef) + any_value = False for dictkey, dictvalue in weakdict._dict.items(): llkey = self.r_key.convert_const(dictkey) llvalue = r_value.convert_const(dictvalue) if llvalue: llvalue = lltype.cast_pointer(rclass.OBJECTPTR, llvalue) self.ll_set_nonnull(l_dict, llkey, llvalue) + any_value = True + if any_value: + l_dict.resize_counter = -1 return l_dict def rtype_method_get(self, hop): @@ -114,6 +118,8 @@ @jit.dont_look_inside def ll_get(self, d, llkey): + if d.resize_counter < 0: + self.ll_weakdict_resize(d) # initialize prebuilt dicts at runtime hash = self.ll_keyhash(llkey) i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK #llop.debug_print(lltype.Void, i, 'get') @@ -132,6 +138,8 @@ @jit.dont_look_inside def ll_set_nonnull(self, d, llkey, llvalue): + if d.resize_counter < 0: + self.ll_weakdict_resize(d) # initialize prebuilt dicts at runtime hash = self.ll_keyhash(llkey) valueref = weakref_create(llvalue) # GC effects here, before the rest i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK @@ -147,6 +155,8 @@ @jit.dont_look_inside def ll_set_null(self, d, llkey): + if d.resize_counter < 0: + self.ll_weakdict_resize(d) # initialize prebuilt dicts at runtime hash = self.ll_keyhash(llkey) i = rdict.ll_dict_lookup(d, llkey, hash) & rdict.MASK if d.entries.everused(i): diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py --- a/rpython/rlib/debug.py +++ b/rpython/rlib/debug.py @@ -441,7 +441,7 @@ except OSError as e: os.write(2, "Could not start GDB: %s" % ( os.strerror(e.errno))) - raise SystemExit + os._exit(1) else: time.sleep(1) # give the GDB time to attach diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -465,8 +465,14 @@ Note that this can return 0 or -1 too. - It returns the same number, both before and after translation. - Dictionaries don't need to be rehashed after translation. + NOTE: It returns a different number before and after translation! + Dictionaries will be rehashed when the translated program starts. + Be careful about other places that store or depend on a hash value: + if such a place can exist before translation, you should add for + example a _cleanup_() method to clear this cache during translation. + + (Nowadays we could completely remove compute_hash() and decide that + hash(x) is valid RPython instead, at least for the types listed here.) """ if isinstance(x, (str, unicode)): return _hash_string(x) @@ -484,17 +490,11 @@ """RPython equivalent of object.__hash__(x). This returns the so-called 'identity hash', which is the non-overridable default hash of Python. Can be called for any RPython-level object that turns - into a GC object, but not NULL. The value is not guaranteed to be the - same before and after translation, except for RPython instances on the - lltypesystem. + into a GC object, but not NULL. The value will be different before + and after translation (WARNING: this is a change with older RPythons!) """ assert x is not None - result = object.__hash__(x) - try: - x.__dict__['__precomputed_identity_hash'] = result - except (TypeError, AttributeError): - pass - return result + return object.__hash__(x) def compute_unique_id(x): """RPython equivalent of id(x). The 'x' must be an RPython-level @@ -519,21 +519,17 @@ # ---------- -HASH_ALGORITHM = "rpython" # XXX Is there a better name? -HASH_ALGORITHM_FIXED = False +def _hash_string(s): + """The default algorithm behind compute_hash() for a string or a unicode. + This is a modified Fowler-Noll-Vo (FNV) hash. According to Wikipedia, + FNV needs carefully-computed constants called FNV primes and FNV offset + basis, which are absent from the present algorithm. Nevertheless, + this matches CPython 2.7 without -R, which has proven a good hash in + practice (even if not crypographical nor randomizable). -@not_rpython -def set_hash_algorithm(algo): - """Must be called very early, before any string is hashed with - compute_hash()!""" - global HASH_ALGORITHM - if HASH_ALGORITHM != algo: - assert not HASH_ALGORITHM_FIXED, "compute_hash() already called!" - assert algo in ("rpython", "siphash24") - HASH_ALGORITHM = algo - - -def _hash_string_rpython(s): + There is a mechanism to use another one in programs after translation. + See rsiphash.py, which implements the algorithm of CPython >= 3.4. + """ from rpython.rlib.rarithmetic import intmask length = len(s) @@ -547,100 +543,8 @@ x ^= length return intmask(x) - -@not_rpython -def _hash_string_siphash24(s): - """This version is called when untranslated only.""" - import array - from rpython.rlib.rsiphash import siphash24 - from rpython.rtyper.lltypesystem import lltype, rffi - from rpython.rlib.rarithmetic import intmask - - if not isinstance(s, str): - if isinstance(s, unicode): - lst = map(ord, s) - else: - lst = map(ord, s.chars) # for rstr.STR or UNICODE - # NOTE: a latin-1 unicode string must have the same hash as the - # corresponding byte string. - if all(n <= 0xFF for n in lst): - kind = "B" - elif rffi.sizeof(lltype.UniChar) == 4: - kind = "I" - else: - kind = "H" - s = array.array(kind, lst).tostring() - ptr = rffi.str2charp(s) - x = siphash24(ptr, len(s)) - rffi.free_charp(ptr) - return intmask(x) - -def ll_hash_string_siphash24(ll_s): - """Called from lltypesystem/rstr.py. 'll_s' is a rstr.STR or UNICODE.""" - from rpython.rlib.rsiphash import siphash24 - from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr - from rpython.rlib.rarithmetic import intmask - - length = len(ll_s.chars) - if lltype.typeOf(ll_s).TO.chars.OF == lltype.Char: - # no GC operation from here! - addr = rstr._get_raw_buf_string(rstr.STR, ll_s, 0) - else: - # NOTE: a latin-1 unicode string must have the same hash as the - # corresponding byte string. If the unicode is all within - # 0-255, then we need to allocate a byte buffer and copy the - # latin-1 encoding in it manually. - for i in range(length): - if ord(ll_s.chars[i]) > 0xFF: - # no GC operation from here! - addr = rstr._get_raw_buf_unicode(rstr.UNICODE, ll_s, 0) - length *= rffi.sizeof(rstr.UNICODE.chars.OF) - break - else: - p = lltype.malloc(rffi.CCHARP.TO, length, flavor='raw') - i = 0 - while i < length: - p[i] = chr(ord(ll_s.chars[i])) - i += 1 - x = siphash24(llmemory.cast_ptr_to_adr(p), length) - lltype.free(p, flavor='raw') - return intmask(x) - x = siphash24(addr, length) - keepalive_until_here(ll_s) - return intmask(x) -ll_hash_string_siphash24._jit_look_inside_ = False - - -@not_rpython -def _hash_string(s): - """The algorithm behind compute_hash() for a string or a unicode. - This version is only for untranslated usage, and 's' is a str or unicode. - """ - global HASH_ALGORITHM_FIXED - HASH_ALGORITHM_FIXED = True - if HASH_ALGORITHM == "rpython": - return _hash_string_rpython(s) - if HASH_ALGORITHM == "siphash24": - return _hash_string_siphash24(s) - raise NotImplementedError - def ll_hash_string(ll_s): - """The algorithm behind compute_hash() for a string or a unicode. - This version is called from lltypesystem/rstr.py, and 'll_s' is a - rstr.STR or rstr.UNICODE. - """ - if not we_are_translated(): - global HASH_ALGORITHM_FIXED - HASH_ALGORITHM_FIXED = True - if HASH_ALGORITHM == "rpython": - return _hash_string_rpython(ll_s.chars) - if HASH_ALGORITHM == "siphash24": - if we_are_translated(): - return ll_hash_string_siphash24(ll_s) - else: - return _hash_string_siphash24(ll_s) - raise NotImplementedError - + return _hash_string(ll_s.chars) def _hash_float(f): """The algorithm behind compute_hash() for a float. @@ -698,6 +602,21 @@ return hop.gendirectcall(ll_fn, v_obj) class Entry(ExtRegistryEntry): + _about_ = ll_hash_string + # this is only used when annotating the code in rstr.py, and so + # it always occurs after the RPython program signalled its intent + # to use a different hash. The code below overwrites the use of + # ll_hash_string() to make the annotator think a possibly different + # function was called. + + def compute_annotation(self): + from rpython.annotator import model as annmodel + bk = self.bookkeeper + translator = bk.annotator.translator + fn = getattr(translator, 'll_hash_string', ll_hash_string) + return annmodel.SomePBC([bk.getdesc(fn)]) + +class Entry(ExtRegistryEntry): _about_ = compute_identity_hash def compute_result_annotation(self, s_x): diff --git a/rpython/rlib/rsiphash.py b/rpython/rlib/rsiphash.py --- a/rpython/rlib/rsiphash.py +++ b/rpython/rlib/rsiphash.py @@ -1,12 +1,24 @@ -import sys, os, struct +""" +This module implements siphash-2-4, the hashing algorithm for strings +and unicodes. You can use it explicitly by calling siphash24() with +a byte string, or you can use enable_siphash24() to enable the use +of siphash-2-4 on all RPython strings and unicodes in your program +after translation. +""" +import sys, os, errno from contextlib import contextmanager -from rpython.rlib import rarithmetic +from rpython.rlib import rarithmetic, rurandom from rpython.rlib.objectmodel import not_rpython, always_inline -from rpython.rlib.rgc import no_collect -from rpython.rlib.rarithmetic import r_uint64 +from rpython.rlib.objectmodel import we_are_translated, dont_inline +from rpython.rlib.objectmodel import keepalive_until_here +from rpython.rlib import rgc, jit, rposix +from rpython.rlib.rarithmetic import r_uint64, r_uint32, r_uint from rpython.rlib.rawstorage import misaligned_is_fine -from rpython.rtyper.lltypesystem import lltype, llmemory, rffi +from rpython.rlib.nonconst import NonConstant +from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, rstr from rpython.rtyper.lltypesystem.lloperation import llop +from rpython.rtyper.extregistry import ExtRegistryEntry +from rpython.rtyper.annlowlevel import llhelper if sys.byteorder == 'little': @@ -16,37 +28,164 @@ _le64toh = rarithmetic.byteswap -# Initialize the values of the secret seed: two 64-bit constants. -# CPython picks a new seed every time 'python' starts. PyPy cannot do -# that as easily because many details may rely on getting the same hash -# value before and after translation. We can, however, pick a random -# seed once per translation, which should already be quite good. -# -# XXX no, it is not: e.g. all Ubuntu installations of the same Ubuntu -# would get the same seed. That's not good enough. +class Seed: + k0l = k1l = r_uint64(0) +seed = Seed() -@not_rpython -def select_random_seed(): - global k0, k1 # note: the globals k0, k1 are already byte-swapped - v0, v1 = struct.unpack("QQ", os.urandom(16)) - k0 = r_uint64(v0) - k1 = r_uint64(v1) -select_random_seed() +def _decode64(s): + return (r_uint64(ord(s[0])) | + r_uint64(ord(s[1])) << 8 | + r_uint64(ord(s[2])) << 16 | + r_uint64(ord(s[3])) << 24 | + r_uint64(ord(s[4])) << 32 | + r_uint64(ord(s[5])) << 40 | + r_uint64(ord(s[6])) << 48 | + r_uint64(ord(s[7])) << 56) + +def select_random_seed(s): + """'s' is a string of length 16""" + seed.k0l = _decode64(s) + seed.k1l = _decode64(s[8:16]) + + +random_ctx = rurandom.init_urandom() +strtoul = rffi.llexternal("strtoul", [rffi.CCHARP, rffi.CCHARPP, rffi.INT], + rffi.ULONG, save_err=rffi.RFFI_SAVE_ERRNO) + +env_var_name = "PYTHONHASHSEED" + +def initialize_from_env(): + # This uses the same algorithms as CPython 3.5. The environment + # variable we read also defaults to "PYTHONHASHSEED". If needed, + # a different RPython interpreter can patch the value of the + # global variable 'env_var_name', or just patch the whole + # initialize_from_env() function. + value = os.environ.get(env_var_name) + if value and value != "random": + with rffi.scoped_view_charp(value) as ptr: + with lltype.scoped_alloc(rffi.CCHARPP.TO, 1) as endptr: + endptr[0] = ptr + seed = strtoul(ptr, endptr, 10) + full = endptr[0][0] == '\x00' + seed = lltype.cast_primitive(lltype.Unsigned, seed) + if not full or seed > r_uint(4294967295) or ( + rposix.get_saved_errno() == errno.ERANGE and + seed == lltype.cast_primitive(lltype.Unsigned, + rffi.cast(rffi.ULONG, -1))): + os.write(2, + "%s must be \"random\" or an integer " + "in range [0; 4294967295]\n" % (env_var_name,)) + os._exit(1) + if not seed: + # disable the randomized hash + s = '\x00' * 16 + else: + s = lcg_urandom(seed) + else: + try: + s = rurandom.urandom(random_ctx, 16) + except Exception as e: + os.write(2, + "%s: failed to get random numbers to initialize Python\n" % + (str(e),)) + os._exit(1) + raise # makes the annotator happy + select_random_seed(s) + +def lcg_urandom(x): + s = '' + for index in range(16): + x *= 214013 + x += 2531011 + s += chr((x >> 16) & 0xff) + return s + + +_FUNC = lltype.Ptr(lltype.FuncType([], lltype.Void)) + +def enable_siphash24(): + """ + Enable the use of siphash-2-4 for all RPython strings and unicodes + in the translated program. You must call this function anywhere + from your interpreter (from a place that is annotated). Don't call + more than once. + """ + +class Entry(ExtRegistryEntry): + _about_ = enable_siphash24 + + def compute_result_annotation(self): + translator = self.bookkeeper.annotator.translator + if hasattr(translator, 'll_hash_string'): + assert translator.ll_hash_string == ll_hash_string_siphash24 + else: + translator.ll_hash_string = ll_hash_string_siphash24 + bk = self.bookkeeper + s_callable = bk.immutablevalue(initialize_from_env) + key = (enable_siphash24,) + bk.emulate_pbc_call(key, s_callable, []) + + def specialize_call(self, hop): + hop.exception_cannot_occur() + bk = hop.rtyper.annotator.bookkeeper + s_callable = bk.immutablevalue(initialize_from_env) + r_callable = hop.rtyper.getrepr(s_callable) + ll_init = r_callable.get_unique_llfn().value + bk.annotator.translator._call_at_startup.append(ll_init) + + +@rgc.no_collect +def ll_hash_string_siphash24(ll_s): + """Called indirectly from lltypesystem/rstr.py, by redirection from + objectmodel.ll_string_hash(). + """ + from rpython.rlib.rarithmetic import intmask + + # This function is entirely @rgc.no_collect. + length = len(ll_s.chars) + if lltype.typeOf(ll_s).TO.chars.OF == lltype.Char: # regular STR + addr = rstr._get_raw_buf_string(rstr.STR, ll_s, 0) + else: + # NOTE: a latin-1 unicode string must have the same hash as the + # corresponding byte string. If the unicode is all within + # 0-255, then we need to allocate a byte buffer and copy the + # latin-1 encoding in it manually. Note also that we give a + # different hash result than CPython on ucs4 platforms, for + # unicode strings where CPython uses 2 bytes per character. + for i in range(length): + if ord(ll_s.chars[i]) > 0xFF: + addr = rstr._get_raw_buf_unicode(rstr.UNICODE, ll_s, 0) + length *= rffi.sizeof(rstr.UNICODE.chars.OF) + break + else: + p = lltype.malloc(rffi.CCHARP.TO, length, flavor='raw') + i = 0 + while i < length: + p[i] = chr(ord(ll_s.chars[i])) + i += 1 + x = _siphash24(llmemory.cast_ptr_to_adr(p), length) + lltype.free(p, flavor='raw') + return intmask(x) + x = _siphash24(addr, length) + keepalive_until_here(ll_s) + return intmask(x) + @contextmanager def choosen_seed(new_k0, new_k1, test_misaligned_path=False): - global k0, k1, misaligned_is_fine - old = k0, k1, misaligned_is_fine - k0 = _le64toh(r_uint64(new_k0)) - k1 = _le64toh(r_uint64(new_k1)) + """For tests.""" + global misaligned_is_fine + old = seed.k0l, seed.k1l, misaligned_is_fine + seed.k0l = _le64toh(r_uint64(new_k0)) + seed.k1l = _le64toh(r_uint64(new_k1)) if test_misaligned_path: misaligned_is_fine = False yield - k0, k1, misaligned_is_fine = old + seed.k0l, seed.k1l, misaligned_is_fine = old def get_current_seed(): - return _le64toh(k0), _le64toh(k1) + return _le64toh(seed.k0l), _le64toh(seed.k1l) magic0 = r_uint64(0x736f6d6570736575) @@ -77,20 +216,21 @@ return v0, v1, v2, v3 -@no_collect -def siphash24(addr_in, size): +@rgc.no_collect +def _siphash24(addr_in, size): """Takes an address pointer and a size. Returns the hash as a r_uint64, which can then be casted to the expected type.""" - direct = (misaligned_is_fine or - (rffi.cast(lltype.Signed, addr_in) & 7) == 0) - + k0 = seed.k0l + k1 = seed.k1l b = r_uint64(size) << 56 v0 = k0 ^ magic0 v1 = k1 ^ magic1 v2 = k0 ^ magic2 v3 = k1 ^ magic3 + direct = (misaligned_is_fine or + (rffi.cast(lltype.Signed, addr_in) & 7) == 0) index = 0 if direct: while size >= 8: @@ -113,7 +253,6 @@ r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 6)) << 48 | r_uint64(llop.raw_load(rffi.UCHAR, addr_in, index + 7)) << 56 ) - mi = _le64toh(mi) size -= 8 index += 8 v3 ^= mi @@ -158,3 +297,13 @@ v0, v1, v2, v3 = _double_round(v0, v1, v2, v3) return (v0 ^ v1) ^ (v2 ^ v3) + + +@jit.dont_look_inside +def siphash24(s): + """'s' is a normal string. Returns its siphash-2-4 as a r_uint64. + Don't forget to cast the result to a regular integer if needed, + e.g. with rarithmetic.intmask(). + """ + with rffi.scoped_nonmovingbuffer(s) as p: + return _siphash24(llmemory.cast_ptr_to_adr(p), len(s)) diff --git a/rpython/rlib/rurandom.py b/rpython/rlib/rurandom.py --- a/rpython/rlib/rurandom.py +++ b/rpython/rlib/rurandom.py @@ -57,6 +57,8 @@ immortal=True, zero=True) def urandom(context, n, signal_checker=None): + # NOTE: no dictionaries here: rsiphash24 calls this to + # initialize the random seed of string hashes provider = context[0] if not provider: # This handle is never explicitly released. The operating @@ -139,6 +141,8 @@ def urandom(context, n, signal_checker=None): "Read n bytes from /dev/urandom." + # NOTE: no dictionaries here: rsiphash24 calls this to + # initialize the random seed of string hashes result = [] if SYS_getrandom is not None: n = _getrandom(n, result, signal_checker) diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py --- a/rpython/rlib/test/test_objectmodel.py +++ b/rpython/rlib/test/test_objectmodel.py @@ -166,7 +166,6 @@ foo = Foo() h = compute_hash(foo) assert h == object.__hash__(foo) - assert h == getattr(foo, '__precomputed_identity_hash') assert compute_hash(None) == 0 def test_compute_hash_float(): @@ -182,7 +181,6 @@ foo = Foo() h = compute_identity_hash(foo) assert h == object.__hash__(foo) - assert h == getattr(foo, '__precomputed_identity_hash') def test_compute_unique_id(): from rpython.rlib.rarithmetic import intmask @@ -410,36 +408,6 @@ res = self.interpret(f, []) assert res == 1 - def test_compute_hash_across_translation(self): - class Foo(object): - pass - q = Foo() - - def f(i): - assert compute_hash(None) == 0 - assert compute_hash(i) == h_42 - assert compute_hash(i + 1.0) == h_43_dot_0 - assert compute_hash((i + 3) / 6.0) == h_7_dot_5 - assert compute_hash("Hello" + str(i)) == h_Hello42 - if i == 42: - p = None - else: - p = Foo() - assert compute_hash(p) == h_None - assert compute_hash(("world", None, i, 7.5)) == h_tuple - assert compute_hash(q) == h_q - return i * 2 - h_42 = compute_hash(42) - h_43_dot_0 = compute_hash(43.0) - h_7_dot_5 = compute_hash(7.5) - h_Hello42 = compute_hash("Hello42") - h_None = compute_hash(None) - h_tuple = compute_hash(("world", None, 42, 7.5)) - h_q = compute_hash(q) - - res = self.interpret(f, [42]) - assert res == 84 - def test_fetch_translated_config(self): assert fetch_translated_config() is None def f(): diff --git a/rpython/rlib/test/test_rsiphash.py b/rpython/rlib/test/test_rsiphash.py --- a/rpython/rlib/test/test_rsiphash.py +++ b/rpython/rlib/test/test_rsiphash.py @@ -1,5 +1,10 @@ -from rpython.rlib.rsiphash import siphash24, choosen_seed +import os +from rpython.rlib.rsiphash import siphash24, _siphash24, choosen_seed +from rpython.rlib.rsiphash import initialize_from_env, enable_siphash24 +from rpython.rlib.objectmodel import compute_hash +from rpython.rlib.rarithmetic import intmask from rpython.rtyper.lltypesystem import llmemory, rffi +from rpython.translator.c.test.test_genc import compile CASES = [ @@ -28,13 +33,11 @@ ] def check(s): - p = rffi.str2charp(s) q = rffi.str2charp('?' + s) with choosen_seed(0x8a9f065a358479f4, 0x11cb1e9ee7f40e1f, test_misaligned_path=True): - x = siphash24(llmemory.cast_ptr_to_adr(p), len(s)) - y = siphash24(llmemory.cast_ptr_to_adr(rffi.ptradd(q, 1)), len(s)) - rffi.free_charp(p) + x = siphash24(s) + y = _siphash24(llmemory.cast_ptr_to_adr(rffi.ptradd(q, 1)), len(s)) rffi.free_charp(q) assert x == y return x @@ -42,3 +45,104 @@ def test_siphash24(): for expected, string in CASES: assert check(string) == expected + +def test_fix_seed(): + old_val = os.environ.get('PYTHONHASHSEED', None) + try: + os.environ['PYTHONHASHSEED'] = '0' + initialize_from_env() + assert siphash24("foo") == 15988776847138518036 + # value checked with CPython 3.5 + + os.environ['PYTHONHASHSEED'] = '4000000000' + initialize_from_env() + assert siphash24("foo") == 13829150778707464258 + # value checked with CPython 3.5 + + for env in ['', 'random']: + os.environ['PYTHONHASHSEED'] = env + initialize_from_env() + hash1 = siphash24("foo") + initialize_from_env() + hash2 = siphash24("foo") + assert hash1 != hash2 # extremely unlikely + finally: + if old_val is None: + del os.environ['PYTHONHASHSEED'] + else: + os.environ['PYTHONHASHSEED'] = old_val + +def test_translated(): + d1 = {"foo": 123} + d2 = {u"foo": 456, u"\u1234\u5678": 789} + class G: + pass + g = G() + g.v1 = d1.copy() + g.v2 = d2.copy() + + def fetch(n): + if n == 0: return d1.get("foo", -1) + if n == 1: return g.v1.get("foo", -1) + if n == 2: return compute_hash("foo") + if n == 3: return d2.get(u"foo", -1) + if n == 4: return g.v2.get(u"foo", -1) + if n == 5: return compute_hash(u"foo") + if n == 6: return d2.get(u"\u1234\u5678", -1) + if n == 7: return g.v2.get(u"\u1234\u5678", -1) + if n == 8: return compute_hash(u"\u1234\u5678") + assert 0 + + def entrypoint(n): + enable_siphash24() + g.v1["bar"] = -2 + g.v2[u"bar"] = -2 + if n >= 0: # get items one by one, because otherwise it may + # be the case that one line influences the next + return str(fetch(n)) + else: + # ...except in random mode, because we want all results + # to be computed with the same seed + return ' '.join([str(fetch(n)) for n in range(9)]) + + fn = compile(entrypoint, [int]) + + def getall(): + return [int(fn(i)) for i in range(9)] + + old_val = os.environ.get('PYTHONHASHSEED', None) + try: + os.environ['PYTHONHASHSEED'] = '0' + s1 = getall() + assert s1[:8] == [ + 123, 123, intmask(15988776847138518036), + 456, 456, intmask(15988776847138518036), + 789, 789] + assert s1[8] in [intmask(17593683438421985039), # ucs2 mode + intmask(94801584261658677)] # ucs4 mode + + os.environ['PYTHONHASHSEED'] = '3987654321' + s1 = getall() + assert s1[:8] == [ + 123, 123, intmask(5890804383681474441), + 456, 456, intmask(5890804383681474441), + 789, 789] + assert s1[8] in [intmask(4192582507672183374), # ucs2 mode + intmask(7179255293164649778)] # ucs4 mode + + for env in ['', 'random']: + os.environ['PYTHONHASHSEED'] = env + s1 = map(int, fn(-1).split()) + s2 = map(int, fn(-1).split()) + assert s1[0:2]+s1[3:5]+s1[6:8] == [123, 123, 456, 456, 789, 789] + assert s1[2] == s1[5] + assert s2[0:2]+s2[3:5]+s2[6:8] == [123, 123, 456, 456, 789, 789] + assert s2[2] == s2[5] + # + assert len(set([s1[2], s2[2], s1[8], s2[8]])) == 4 + + finally: + if old_val is None: + del os.environ['PYTHONHASHSEED'] + else: + os.environ['PYTHONHASHSEED'] = old_val diff --git a/rpython/rlib/test/test_rweakvaldict.py b/rpython/rlib/test/test_rweakvaldict.py --- a/rpython/rlib/test/test_rweakvaldict.py +++ b/rpython/rlib/test/test_rweakvaldict.py @@ -1,8 +1,9 @@ import py from rpython.annotator.model import UnionError -from rpython.rlib import rgc +from rpython.rlib import rgc, nonconst from rpython.rlib.rweakref import RWeakValueDictionary from rpython.rtyper.test.test_llinterp import interpret +from rpython.translator.c.test.test_genc import compile class X(object): pass @@ -213,3 +214,33 @@ assert d.get(keys[3]) is None f() interpret(f, []) + +def test_translation_prebuilt_1(): + class K: + pass + d = RWeakValueDictionary(K, X) + k1 = K(); k2 = K() + x1 = X(); x2 = X() + d.set(k1, x1) + d.set(k2, x2) + def f(): + assert d.get(k1) is x1 + assert d.get(k2) is x2 + f() + fc = compile(f, [], gcpolicy="boehm", rweakref=True) + fc() + +def _test_translation_prebuilt_2(): + from rpython.rlib import rsiphash + d = RWeakValueDictionary(str, X) + k1 = "key1"; k2 = "key2" + x1 = X(); x2 = X() + d.set(k1, x1) + d.set(k2, x2) + def f(): + rsiphash.enable_siphash24() + i = nonconst.NonConstant(1) + assert d.get("key%d" % (i,)) is x1 + assert d.get("key%d" % (i+1,)) is x2 + fc = compile(f, [], gcpolicy="boehm", rweakref=True) + fc() diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -539,6 +539,7 @@ 'decode_arg_def': LLOp(canraise=(Exception,)), 'getslice': LLOp(canraise=(Exception,)), 'check_and_clear_exc': LLOp(), + 'call_at_startup': LLOp(canrun=True), 'threadlocalref_addr': LLOp(), # get (or make) addr of tl 'threadlocalref_get': LLOp(sideeffects=False), # read field (no check) diff --git a/rpython/rtyper/lltypesystem/lltype.py b/rpython/rtyper/lltypesystem/lltype.py --- a/rpython/rtyper/lltypesystem/lltype.py +++ b/rpython/rtyper/lltypesystem/lltype.py @@ -1380,20 +1380,11 @@ return callb(*args) raise TypeError("%r instance is not a function" % (self._T,)) - def _identityhash(self, cache=True): + def _identityhash(self): p = normalizeptr(self) - try: - return p._obj._hash_cache_ - except AttributeError: - assert self._T._gckind == 'gc' - assert self # not for NULL - result = hash(p._obj) - if cache: - try: - p._obj._hash_cache_ = result - except AttributeError: - pass - return result + assert self._T._gckind == 'gc' + assert self # not for NULL + return hash(p._obj) class _ptr(_abstract_ptr): __slots__ = ('_TYPE', @@ -1759,7 +1750,7 @@ class _struct(_parentable): _kind = "structure" - __slots__ = ('_hash_cache_', '_compilation_info') + __slots__ = ('_compilation_info',) def __new__(self, TYPE, n=None, initialization=None, parent=None, parentindex=None): @@ -2442,24 +2433,6 @@ return SomeInteger() -def identityhash_nocache(p): - """Version of identityhash() to use from backends that don't care about - caching.""" - assert p - return p._identityhash(cache=False) - -def init_identity_hash(p, value): - """For a prebuilt object p, initialize its hash value to 'value'.""" - assert isinstance(typeOf(p), Ptr) - p = normalizeptr(p) - if not p: - raise ValueError("cannot change hash(NULL)!") - if hasattr(p._obj, '_hash_cache_'): - raise ValueError("the hash of %r was already computed" % (p,)) - if typeOf(p).TO._is_varsize(): - raise ValueError("init_identity_hash(): not for varsized types") - p._obj._hash_cache_ = intmask(value) - def isCompatibleType(TYPE1, TYPE2): return TYPE1._is_compatible(TYPE2) diff --git a/rpython/rtyper/lltypesystem/opimpl.py b/rpython/rtyper/lltypesystem/opimpl.py --- a/rpython/rtyper/lltypesystem/opimpl.py +++ b/rpython/rtyper/lltypesystem/opimpl.py @@ -742,6 +742,9 @@ def op_gc_move_out_of_nursery(obj): return obj +def op_call_at_startup(init_func): + pass # do nothing + # ____________________________________________________________ def get_op_impl(opname): diff --git a/rpython/rtyper/lltypesystem/rdict.py b/rpython/rtyper/lltypesystem/rdict.py --- a/rpython/rtyper/lltypesystem/rdict.py +++ b/rpython/rtyper/lltypesystem/rdict.py @@ -236,21 +236,14 @@ if self.r_rdict_hashfn.lowleveltype != lltype.Void: l_fn = self.r_rdict_hashfn.convert_const(dictobj.key_hash) l_dict.fnkeyhash = l_fn - - for dictkeycontainer, dictvalue in dictobj._dict.items(): - llkey = r_key.convert_const(dictkeycontainer.key) - llvalue = r_value.convert_const(dictvalue) - ll_dict_insertclean(l_dict, llkey, llvalue, - dictkeycontainer.hash) - return l_dict - + any_items = dictobj._dict.items() else: - for dictkey, dictvalue in dictobj.items(): - llkey = r_key.convert_const(dictkey) - llvalue = r_value.convert_const(dictvalue) - ll_dict_insertclean(l_dict, llkey, llvalue, - l_dict.keyhash(llkey)) - return l_dict + any_items = dictobj.items() + if any_items: + raise TyperError("found a prebuilt, explicitly non-ordered, " + "non-empty dict. it would require additional" + " support to rehash it at program start-up") + return l_dict def rtype_len(self, hop): v_dict, = hop.inputargs(self) diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -829,7 +829,7 @@ return assert_str0(charpsize2str(cp, size)) charp2str._annenforceargs_ = [lltype.SomePtr(TYPEP)] - # str -> char*, bool, bool + # str -> char*, flag # Can't inline this because of the raw address manipulation. @jit.dont_look_inside def get_nonmovingbuffer(data): diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py --- a/rpython/rtyper/lltypesystem/rordereddict.py +++ b/rpython/rtyper/lltypesystem/rordereddict.py @@ -5,7 +5,7 @@ from rpython.rtyper.lltypesystem import lltype, llmemory, rffi from rpython.rlib import objectmodel, jit, rgc, types from rpython.rlib.signature import signature -from rpython.rlib.objectmodel import specialize, likely +from rpython.rlib.objectmodel import specialize, likely, not_rpython from rpython.rtyper.debug import ll_assert from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rtyper import rmodel @@ -46,20 +46,23 @@ @jit.look_inside_iff(lambda d, key, hash, flag: jit.isvirtual(d)) @jit.oopspec('ordereddict.lookup(d, key, hash, flag)') def ll_call_lookup_function(d, key, hash, flag): - fun = d.lookup_function_no & FUNC_MASK - # This likely() here forces gcc to compile the check for fun == FUNC_BYTE - # first. Otherwise, this is a regular switch and gcc (at least 4.7) - # compiles this as a series of checks, with the FUNC_BYTE case last. - # It sounds minor, but it is worth 6-7% on a PyPy microbenchmark. - if likely(fun == FUNC_BYTE): - return ll_dict_lookup(d, key, hash, flag, TYPE_BYTE) - elif fun == FUNC_SHORT: - return ll_dict_lookup(d, key, hash, flag, TYPE_SHORT) - elif IS_64BIT and fun == FUNC_INT: - return ll_dict_lookup(d, key, hash, flag, TYPE_INT) - elif fun == FUNC_LONG: - return ll_dict_lookup(d, key, hash, flag, TYPE_LONG) - assert False + while True: + fun = d.lookup_function_no & FUNC_MASK + # This likely() here forces gcc to compile the check for fun==FUNC_BYTE + # first. Otherwise, this is a regular switch and gcc (at least 4.7) + # compiles this as a series of checks, with the FUNC_BYTE case last. + # It sounds minor, but it is worth 6-7% on a PyPy microbenchmark. + if likely(fun == FUNC_BYTE): + return ll_dict_lookup(d, key, hash, flag, TYPE_BYTE) + elif fun == FUNC_SHORT: + return ll_dict_lookup(d, key, hash, flag, TYPE_SHORT) + elif IS_64BIT and fun == FUNC_INT: + return ll_dict_lookup(d, key, hash, flag, TYPE_INT) + elif fun == FUNC_LONG: + return ll_dict_lookup(d, key, hash, flag, TYPE_LONG) + else: + ll_dict_create_initial_index(d) + # then, retry def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None, ll_fasthash_function=None, ll_hash_function=None, @@ -235,6 +238,7 @@ self.setup() self.setup_final() l_dict = ll_newdict_size(self.DICT, len(dictobj)) + ll_no_initial_index(l_dict) self.dict_cache[key] = l_dict r_key = self.key_repr if r_key.lowleveltype == llmemory.Address: @@ -252,16 +256,14 @@ for dictkeycontainer, dictvalue in dictobj._dict.items(): llkey = r_key.convert_const(dictkeycontainer.key) llvalue = r_value.convert_const(dictvalue) - _ll_dict_insertclean(l_dict, llkey, llvalue, - dictkeycontainer.hash) + _ll_dict_insert_no_index(l_dict, llkey, llvalue) return l_dict else: for dictkey, dictvalue in dictobj.items(): llkey = r_key.convert_const(dictkey) llvalue = r_value.convert_const(dictvalue) - _ll_dict_insertclean(l_dict, llkey, llvalue, - l_dict.keyhash(llkey)) + _ll_dict_insert_no_index(l_dict, llkey, llvalue) return l_dict def rtype_len(self, hop): @@ -336,11 +338,15 @@ return DictIteratorRepr(self, "items").newiter(hop) def rtype_method_iterkeys_with_hash(self, hop): - hop.exception_cannot_occur() + v_dic, = hop.inputargs(self) + hop.exception_is_here() + hop.gendirectcall(ll_ensure_indexes, v_dic) return DictIteratorRepr(self, "keys_with_hash").newiter(hop) def rtype_method_iteritems_with_hash(self, hop): - hop.exception_cannot_occur() + v_dic, = hop.inputargs(self) + hop.exception_is_here() + hop.gendirectcall(ll_ensure_indexes, v_dic) return DictIteratorRepr(self, "items_with_hash").newiter(hop) def rtype_method_clear(self, hop): @@ -458,17 +464,30 @@ IS_64BIT = sys.maxint != 2 ** 31 - 1 -FUNC_SHIFT = 2 -FUNC_MASK = 0x03 # two bits if IS_64BIT: - FUNC_BYTE, FUNC_SHORT, FUNC_INT, FUNC_LONG = range(4) + FUNC_SHIFT = 3 + FUNC_MASK = 0x07 # three bits + FUNC_BYTE, FUNC_SHORT, FUNC_INT, FUNC_LONG, FUNC_MUST_REINDEX = range(5) else: - FUNC_BYTE, FUNC_SHORT, FUNC_LONG = range(3) + FUNC_SHIFT = 2 + FUNC_MASK = 0x03 # two bits + FUNC_BYTE, FUNC_SHORT, FUNC_LONG, FUNC_MUST_REINDEX = range(4) TYPE_BYTE = rffi.UCHAR TYPE_SHORT = rffi.USHORT TYPE_INT = rffi.UINT TYPE_LONG = lltype.Unsigned +def ll_no_initial_index(d): + # Used when making new empty dicts, and when translating prebuilt dicts. + # Remove the index completely. A dictionary must always have an + # index unless it is freshly created or freshly translated. Most + # dict operations start with ll_call_lookup_function(), which will + # recompute the hashes and create the index. + ll_assert(d.num_live_items == d.num_ever_used_items, + "ll_no_initial_index(): dict already in use") + d.lookup_function_no = FUNC_MUST_REINDEX + d.indexes = lltype.nullptr(llmemory.GCREF.TO) + def ll_malloc_indexes_and_choose_lookup(d, n): # keep in sync with ll_clear_indexes() below if n <= 256: @@ -508,6 +527,7 @@ @jit.dont_look_inside def ll_call_insert_clean_function(d, hash, i): + assert i >= 0 fun = d.lookup_function_no & FUNC_MASK if fun == FUNC_BYTE: ll_dict_store_clean(d, hash, i, TYPE_BYTE) @@ -518,6 +538,8 @@ elif fun == FUNC_LONG: ll_dict_store_clean(d, hash, i, TYPE_LONG) else: + # can't be still FUNC_MUST_REINDEX here + ll_assert(False, "ll_call_insert_clean_function(): invalid lookup_fun") assert False def ll_call_delete_by_entry_index(d, hash, i): @@ -531,6 +553,8 @@ elif fun == FUNC_LONG: ll_dict_delete_by_entry_index(d, hash, i, TYPE_LONG) else: + # can't be still FUNC_MUST_REINDEX here + ll_assert(False, "ll_call_delete_by_entry_index(): invalid lookup_fun") assert False def ll_valid_from_flag(entries, i): @@ -648,15 +672,14 @@ ll_dict_reindex(d, _ll_len_of_d_indexes(d)) _ll_dict_rescue._dont_inline_ = True -def _ll_dict_insertclean(d, key, value, hash): +@not_rpython +def _ll_dict_insert_no_index(d, key, value): # never translated ENTRY = lltype.typeOf(d.entries).TO.OF - ll_call_insert_clean_function(d, hash, d.num_ever_used_items) entry = d.entries[d.num_ever_used_items] entry.key = key entry.value = value - if hasattr(ENTRY, 'f_hash'): - entry.f_hash = hash + # note that f_hash is left uninitialized in prebuilt dicts if hasattr(ENTRY, 'f_valid'): entry.f_valid = True d.num_ever_used_items += 1 @@ -811,12 +834,13 @@ # also possible that there are more dead items immediately behind the # last one, we reclaim all the dead items at the end of the ordereditem # at the same point. - i = d.num_ever_used_items - 2 - while i >= 0 and not d.entries.valid(i): + i = index + while True: i -= 1 - j = i + 1 - assert j >= 0 - d.num_ever_used_items = j + assert i >= 0 + if d.entries.valid(i): # must be at least one + break + d.num_ever_used_items = i + 1 # If the dictionary is at least 87.5% dead items, then consider shrinking # it. @@ -844,6 +868,50 @@ else: ll_dict_reindex(d, new_size) +def ll_ensure_indexes(d): + num = d.lookup_function_no + if num == FUNC_MUST_REINDEX: + ll_dict_create_initial_index(d) + else: + ll_assert((num & FUNC_MASK) != FUNC_MUST_REINDEX, + "bad combination in lookup_function_no") + +def ll_dict_create_initial_index(d): + """Create the initial index for a dictionary. The common case is + that 'd' is empty. The uncommon case is that it is a prebuilt + dictionary frozen by translation, in which case we must rehash all + entries. The common case must be seen by the JIT. + """ + if d.num_live_items == 0: + ll_malloc_indexes_and_choose_lookup(d, DICT_INITSIZE) + d.resize_counter = DICT_INITSIZE * 2 + else: + ll_dict_rehash_after_translation(d) + +@jit.dont_look_inside +def ll_dict_rehash_after_translation(d): + assert d.num_live_items == d.num_ever_used_items + assert not d.indexes + # + # recompute all hashes. Needed if they are stored in d.entries, + # but do it anyway: otherwise, e.g. a string-keyed dictionary + # won't have a fasthash on its strings if their hash is still + # uncomputed. + ENTRY = lltype.typeOf(d.entries).TO.OF + for i in range(d.num_ever_used_items): + assert d.entries.valid(i) + d_entry = d.entries[i] + h = d.keyhash(d_entry.key) + if hasattr(ENTRY, 'f_hash'): + d_entry.f_hash = h + #else: purely for the side-effect it can have on d_entry.key + # + # Use the smallest acceptable size for ll_dict_reindex + new_size = DICT_INITSIZE + while new_size * 2 - d.num_live_items * 3 <= 0: + new_size *= 2 + ll_dict_reindex(d, new_size) + def ll_dict_reindex(d, new_size): if bool(d.indexes) and _ll_len_of_d_indexes(d) == new_size: ll_clear_indexes(d, new_size) # hack: we can reuse the same array @@ -857,12 +925,33 @@ entries = d.entries i = 0 ibound = d.num_ever_used_items - while i < ibound: - if entries.valid(i): - hash = entries.hash(i) - ll_call_insert_clean_function(d, hash, i) - i += 1 - #old_entries.delete() XXXX! + # + # Write four loops, moving the check for the value of 'fun' out of + # the loops. A small speed-up over ll_call_insert_clean_function(). + fun = d.lookup_function_no # == lookup_function_no & FUNC_MASK + if fun == FUNC_BYTE: + while i < ibound: + if entries.valid(i): + ll_dict_store_clean(d, entries.hash(i), i, TYPE_BYTE) + i += 1 + elif fun == FUNC_SHORT: + while i < ibound: + if entries.valid(i): + ll_dict_store_clean(d, entries.hash(i), i, TYPE_SHORT) + i += 1 + elif IS_64BIT and fun == FUNC_INT: + while i < ibound: + if entries.valid(i): + ll_dict_store_clean(d, entries.hash(i), i, TYPE_INT) + i += 1 + elif fun == FUNC_LONG: + while i < ibound: + if entries.valid(i): + ll_dict_store_clean(d, entries.hash(i), i, TYPE_LONG) + i += 1 + else: + assert False + # ------- a port of CPython's dictobject.c's lookdict implementation ------- PERTURB_SHIFT = 5 @@ -1013,10 +1102,11 @@ def ll_newdict(DICT): d = DICT.allocate() d.entries = _ll_empty_array(DICT) - ll_malloc_indexes_and_choose_lookup(d, DICT_INITSIZE) + # Don't allocate an 'indexes' for empty dict. It seems a typical + # program contains tons of empty dicts, so this might be a memory win. d.num_live_items = 0 d.num_ever_used_items = 0 - d.resize_counter = DICT_INITSIZE * 2 + ll_no_initial_index(d) return d OrderedDictRepr.ll_newdict = staticmethod(ll_newdict) @@ -1101,6 +1191,10 @@ # as soon as we do something like ll_dict_reindex(). if index == (dict.lookup_function_no >> FUNC_SHIFT): dict.lookup_function_no += (1 << FUNC_SHIFT) + # note that we can't have modified a FUNC_MUST_REINDEX + # dict here because such dicts have no invalid entries + ll_assert((dict.lookup_function_no & FUNC_MASK) != + FUNC_MUST_REINDEX, "bad combination in _ll_dictnext") index = nextindex # clear the reference to the dict and prevent restarts iter.dict = lltype.nullptr(lltype.typeOf(iter).TO.dict.TO) @@ -1146,6 +1240,8 @@ return dict.entries[index].value def ll_dict_copy(dict): + ll_ensure_indexes(dict) + DICT = lltype.typeOf(dict).TO newdict = DICT.allocate() newdict.entries = DICT.entries.TO.allocate(len(dict.entries)) @@ -1180,6 +1276,10 @@ DICT = lltype.typeOf(d).TO old_entries = d.entries d.entries = _ll_empty_array(DICT) + # note: we can't remove the index here, because it is possible that + # crazy Python code calls d.clear() from the method __eq__() called + # from ll_dict_lookup(d). Instead, stick to the rule that once a + # dictionary has got an index, it will always have one. ll_malloc_indexes_and_choose_lookup(d, DICT_INITSIZE) d.num_live_items = 0 d.num_ever_used_items = 0 @@ -1190,6 +1290,7 @@ def ll_dict_update(dic1, dic2): if dic1 == dic2: return + ll_ensure_indexes(dic2) # needed for entries.hash() below ll_prepare_dict_update(dic1, dic2.num_live_items) i = 0 while i < dic2.num_ever_used_items: @@ -1216,6 +1317,7 @@ # the case where dict.update() actually has a lot of collisions. # If num_extra is much greater than d.num_live_items the conditional_call # will trigger anyway, which is really the goal. + ll_ensure_indexes(d) x = num_extra - d.num_live_items jit.conditional_call(d.resize_counter <= x * 3, _ll_dict_resize_to, d, num_extra) @@ -1275,6 +1377,7 @@ if dic.num_live_items == 0: raise KeyError + ll_ensure_indexes(dic) entries = dic.entries # find the last entry. It's unclear if the loop below is still diff --git a/rpython/rtyper/lltypesystem/rstr.py b/rpython/rtyper/lltypesystem/rstr.py --- a/rpython/rtyper/lltypesystem/rstr.py +++ b/rpython/rtyper/lltypesystem/rstr.py @@ -1,9 +1,9 @@ from weakref import WeakValueDictionary from rpython.annotator import model as annmodel -from rpython.rlib import jit, types +from rpython.rlib import jit, types, objectmodel from rpython.rlib.objectmodel import (malloc_zero_filled, we_are_translated, - ll_hash_string, keepalive_until_here, specialize, enforceargs) + ll_hash_string, keepalive_until_here, specialize, enforceargs, dont_inline) from rpython.rlib.signature import signature from rpython.rlib.rarithmetic import ovfcheck from rpython.rtyper.error import TyperError @@ -383,6 +383,8 @@ return 0 @staticmethod + @dont_inline + @jit.dont_look_inside def _ll_strhash(s): # unlike CPython, there is no reason to avoid to return -1 # but our malloc initializes the memory to zero, so we use zero as the @@ -400,6 +402,7 @@ @staticmethod def ll_strfasthash(s): + ll_assert(s.hash != 0, "ll_strfasthash: hash==0") return s.hash # assumes that the hash is already computed @staticmethod @@ -1258,7 +1261,8 @@ 'gethash': LLHelpers.ll_strhash, 'length': LLHelpers.ll_length, 'find': LLHelpers.ll_find, - 'rfind': LLHelpers.ll_rfind})) + 'rfind': LLHelpers.ll_rfind}, + hints={'remove_hash': True})) UNICODE.become(GcStruct('rpy_unicode', ('hash', Signed), ('chars', Array(UniChar, hints={'immutable': True})), adtmeths={'malloc' : staticAdtMethod(mallocunicode), @@ -1266,8 +1270,8 @@ 'copy_contents' : staticAdtMethod(copy_unicode_contents), 'copy_contents_from_str' : staticAdtMethod(copy_unicode_contents), 'gethash': LLHelpers.ll_strhash, - 'length': LLHelpers.ll_length} - )) + 'length': LLHelpers.ll_length}, + hints={'remove_hash': True})) # TODO: make the public interface of the rstr module cleaner diff --git a/rpython/rtyper/lltypesystem/test/test_lltype.py b/rpython/rtyper/lltypesystem/test/test_lltype.py --- a/rpython/rtyper/lltypesystem/test/test_lltype.py +++ b/rpython/rtyper/lltypesystem/test/test_lltype.py @@ -749,22 +749,10 @@ assert hash3 == identityhash(s3) assert hash3 == identityhash(s3.super) assert hash3 == identityhash(s3.super.super) - py.test.raises(ValueError, init_identity_hash, s3, hash3^1) - py.test.raises(ValueError, init_identity_hash, s3.super, hash3^4) - py.test.raises(ValueError, init_identity_hash, s3.super.super, hash3^9) - - s3 = malloc(S3) - init_identity_hash(s3.super, -123) - assert -123 == identityhash(s3) - assert -123 == identityhash(s3.super) - assert -123 == identityhash(s3.super.super) - py.test.raises(ValueError, init_identity_hash, s3, 4313) - py.test.raises(ValueError, init_identity_hash, s3.super, 0) - py.test.raises(ValueError, init_identity_hash, s3.super.super, -124) from rpython.rtyper.lltypesystem import llmemory p3 = cast_opaque_ptr(llmemory.GCREF, s3) - assert -123 == identityhash(p3) + assert hash3 == identityhash(p3) A = GcArray(Signed) a = malloc(A, 3) diff --git a/rpython/rtyper/rclass.py b/rpython/rtyper/rclass.py --- a/rpython/rtyper/rclass.py +++ b/rpython/rtyper/rclass.py @@ -170,7 +170,6 @@ ('subclassrange_max', Signed), ('rtti', Ptr(RuntimeTypeInfo)), ('name', Ptr(rstr.STR)), - ('hash', Signed), ('instantiate', Ptr(FuncType([], OBJECTPTR))), hints={'immutable': True})) # non-gc case @@ -338,7 +337,6 @@ def fill_vtable_root(self, vtable): """Initialize the head of the vtable.""" - vtable.hash = hash(self) # initialize the 'subclassrange_*' and 'name' fields if self.classdef is not None: #vtable.parenttypeptr = self.rbase.getvtable() @@ -785,7 +783,6 @@ def initialize_prebuilt_instance(self, value, classdef, result): # must fill in the hash cache before the other ones # (see test_circular_hash_initialization) - self.initialize_prebuilt_hash(value, result) self._initialize_data_flattenrec(self.initialize_prebuilt_data, value, classdef, result) @@ -943,11 +940,6 @@ rclass = getclassrepr(self.rtyper, classdef) result.typeptr = rclass.getvtable() - def initialize_prebuilt_hash(self, value, result): - llattrvalue = getattr(value, '__precomputed_identity_hash', None) - if llattrvalue is not None: - lltype.init_identity_hash(result, llattrvalue) - def getfieldrepr(self, attr): """Return the repr used for the given attribute.""" if attr in self.fields: _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit