Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de> Branch: Changeset: r93386:a00ca61351ba Date: 2017-12-12 18:27 +0100 http://bitbucket.org/pypy/pypy/changeset/a00ca61351ba/
Log: merge rdict-fast-hash: make it possible to declare that the hash and eq functions used in an objectmodel.r_dict are "simple", which means that they will not change the dict, and that the hash function is fast enough so that caching the hash is not necessary. diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -36,3 +36,6 @@ .. branch: win32-vcvars +.. branch rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -71,7 +71,7 @@ self.ll_chars = rffi.str2charp(s) self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw') self.pos = 0 - self.cache = r_dict(slice_eq, slice_hash) + self.cache = r_dict(slice_eq, slice_hash, simple_hash_eq=True) def close(self): rffi.free_charp(self.ll_chars) diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -194,13 +194,14 @@ listdef.generalize_range_step(flags['range_step']) return SomeList(listdef) - def getdictdef(self, is_r_dict=False, force_non_null=False): + def getdictdef(self, is_r_dict=False, force_non_null=False, simple_hash_eq=False): """Get the DictDef associated with the current position.""" try: dictdef = self.dictdefs[self.position_key] except KeyError: dictdef = DictDef(self, is_r_dict=is_r_dict, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) self.dictdefs[self.position_key] = dictdef return dictdef diff --git a/rpython/annotator/builtin.py b/rpython/annotator/builtin.py --- a/rpython/annotator/builtin.py +++ b/rpython/annotator/builtin.py @@ -237,22 +237,30 @@ return SomeInstance(clsdef) @analyzer_for(rpython.rlib.objectmodel.r_dict) -def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None): +def robjmodel_r_dict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeDict, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq) + +@analyzer_for(rpython.rlib.objectmodel.r_ordereddict) +def robjmodel_r_ordereddict(s_eqfn, s_hashfn, s_force_non_null=None, s_simple_hash_eq=None): + return _r_dict_helper(SomeOrderedDict, s_eqfn, s_hashfn, + s_force_non_null, s_simple_hash_eq) + +def _r_dict_helper(cls, s_eqfn, s_hashfn, s_force_non_null, s_simple_hash_eq): if s_force_non_null is None: force_non_null = False else: assert s_force_non_null.is_constant() force_non_null = s_force_non_null.const + if s_simple_hash_eq is None: + simple_hash_eq = False + else: + assert s_simple_hash_eq.is_constant() + simple_hash_eq = s_simple_hash_eq.const dictdef = getbookkeeper().getdictdef(is_r_dict=True, - force_non_null=force_non_null) + force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeDict(dictdef) - -@analyzer_for(rpython.rlib.objectmodel.r_ordereddict) -def robjmodel_r_ordereddict(s_eqfn, s_hashfn): - dictdef = getbookkeeper().getdictdef(is_r_dict=True) - dictdef.dictkey.update_rdict_annotations(s_eqfn, s_hashfn) - return SomeOrderedDict(dictdef) + return cls(dictdef) @analyzer_for(rpython.rlib.objectmodel.hlinvoke) def robjmodel_hlinvoke(s_repr, s_llcallable, *args_s): diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -81,12 +81,14 @@ def __init__(self, bookkeeper, s_key = s_ImpossibleValue, s_value = s_ImpossibleValue, is_r_dict = False, - force_non_null = False): + force_non_null = False, + simple_hash_eq = False): self.dictkey = DictKey(bookkeeper, s_key, is_r_dict) self.dictkey.itemof[self] = True self.dictvalue = DictValue(bookkeeper, s_value) self.dictvalue.itemof[self] = True self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def read_key(self, position_key): self.dictkey.read_locations.add(position_key) diff --git a/rpython/jit/metainterp/typesystem.py b/rpython/jit/metainterp/typesystem.py --- a/rpython/jit/metainterp/typesystem.py +++ b/rpython/jit/metainterp/typesystem.py @@ -106,11 +106,11 @@ # It is an r_dict on lltype. Two copies, to avoid conflicts with # the value type. Note that NULL is not allowed as a key. def new_ref_dict(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_2(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def new_ref_dict_3(self): - return r_dict(rd_eq, rd_hash) + return r_dict(rd_eq, rd_hash, simple_hash_eq=True) def cast_vtable_to_hashable(self, cpu, ptr): adr = llmemory.cast_ptr_to_adr(ptr) diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -748,11 +748,19 @@ def _newdict(self): return {} - def __init__(self, key_eq, key_hash, force_non_null=False): + def __init__(self, key_eq, key_hash, force_non_null=False, simple_hash_eq=False): + """ force_non_null=True means that the key can never be None (even if + the annotator things it could be) + + simple_hash_eq=True means that the hash function is very fast, meaning it's + efficient enough that the dict does not have to store the hash per key. + It also implies that neither the hash nor the eq function will mutate + the dictionary. """ self._dict = self._newdict() self.key_eq = key_eq self.key_hash = key_hash self.force_non_null = force_non_null + self.simple_hash_eq = simple_hash_eq def __getitem__(self, key): return self._dict[_r_dictkey(self, key)] diff --git a/rpython/rlib/test/test_objectmodel.py b/rpython/rlib/test/test_objectmodel.py --- a/rpython/rlib/test/test_objectmodel.py +++ b/rpython/rlib/test/test_objectmodel.py @@ -330,6 +330,13 @@ res = self.interpret(g, [3]) assert res == 77 + def test_r_dict_fast_functions(self): + def fn(): + d1 = r_dict(strange_key_eq, strange_key_hash, simple_hash_eq=True) + return play_with_r_dict(d1) + res = self.interpret(fn, []) + assert res + def test_prepare_dict_update(self): def g(n): d = {} diff --git a/rpython/rtyper/lltypesystem/rdict.py b/rpython/rtyper/lltypesystem/rdict.py --- a/rpython/rtyper/lltypesystem/rdict.py +++ b/rpython/rtyper/lltypesystem/rdict.py @@ -42,7 +42,8 @@ class DictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, fast_hash=False): + # fast_hash is ignored (only implemented in rordereddict.py) self.rtyper = rtyper self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) diff --git a/rpython/rtyper/lltypesystem/rordereddict.py b/rpython/rtyper/lltypesystem/rordereddict.py --- a/rpython/rtyper/lltypesystem/rordereddict.py +++ b/rpython/rtyper/lltypesystem/rordereddict.py @@ -66,7 +66,7 @@ def get_ll_dict(DICTKEY, DICTVALUE, get_custom_eq_hash=None, DICT=None, ll_fasthash_function=None, ll_hash_function=None, - ll_eq_function=None, method_cache={}, + ll_eq_function=None, method_cache={}, simple_hash_eq=False, dummykeyobj=None, dummyvalueobj=None, rtyper=None): # get the actual DICT type. if DICT is None, it's created, otherwise # forward reference is becoming DICT @@ -114,11 +114,14 @@ # * the value entryfields.append(("value", DICTVALUE)) - if ll_fasthash_function is None: + if simple_hash_eq: + assert get_custom_eq_hash is not None + entrymeths['entry_hash'] = ll_hash_custom_fast + elif ll_fasthash_function is None: entryfields.append(("f_hash", lltype.Signed)) - entrymeths['hash'] = ll_hash_from_cache + entrymeths['entry_hash'] = ll_hash_from_cache else: - entrymeths['hash'] = ll_hash_recomputed + entrymeths['entry_hash'] = ll_hash_recomputed entrymeths['fasthashfn'] = ll_fasthash_function # Build the lltype data structures @@ -140,7 +143,7 @@ 'keyeq': ll_keyeq_custom, 'r_rdict_eqfn': r_rdict_eqfn, 'r_rdict_hashfn': r_rdict_hashfn, - 'paranoia': True, + 'paranoia': not simple_hash_eq, } else: # figure out which functions must be used to hash and compare @@ -167,13 +170,14 @@ class OrderedDictRepr(AbstractDictRepr): def __init__(self, rtyper, key_repr, value_repr, dictkey, dictvalue, - custom_eq_hash=None, force_non_null=False): + custom_eq_hash=None, force_non_null=False, simple_hash_eq=False): #assert not force_non_null self.rtyper = rtyper self.finalized = False self.DICT = lltype.GcForwardReference() self.lowleveltype = lltype.Ptr(self.DICT) self.custom_eq_hash = custom_eq_hash is not None + self.simple_hash_eq = simple_hash_eq if not isinstance(key_repr, rmodel.Repr): # not computed yet, done by setup() assert callable(key_repr) self._key_repr_computer = key_repr @@ -211,6 +215,7 @@ self.r_rdict_eqfn, self.r_rdict_hashfn = ( self._custom_eq_hash_repr()) kwd['get_custom_eq_hash'] = self._custom_eq_hash_repr + kwd['simple_hash_eq'] = self.simple_hash_eq else: kwd['ll_hash_function'] = self.key_repr.get_ll_hash_function() kwd['ll_eq_function'] = self.key_repr.get_ll_eq_function() @@ -600,15 +605,21 @@ dummy = ENTRIES.dummy_obj.ll_dummy_value entries[i].value = dummy -@signature(types.any(), types.int(), returns=types.any()) -def ll_hash_from_cache(entries, i): +@signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_from_cache(entries, d, i): return entries[i].f_hash -@signature(types.any(), types.int(), returns=types.any()) -def ll_hash_recomputed(entries, i): +@signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_recomputed(entries, d, i): ENTRIES = lltype.typeOf(entries).TO return ENTRIES.fasthashfn(entries[i].key) +@signature(types.any(), types.any(), types.int(), returns=types.any()) +def ll_hash_custom_fast(entries, d, i): + DICT = lltype.typeOf(d).TO + key = entries[i].key + return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) + def ll_keyhash_custom(d, key): DICT = lltype.typeOf(d).TO return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key) @@ -962,22 +973,22 @@ if fun == FUNC_BYTE: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_BYTE) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_BYTE) i += 1 elif fun == FUNC_SHORT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_SHORT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_SHORT) i += 1 elif IS_64BIT and fun == FUNC_INT: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_INT) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_INT) i += 1 elif fun == FUNC_LONG: while i < ibound: if entries.valid(i): - ll_dict_store_clean(d, entries.hash(i), i, TYPE_LONG) + ll_dict_store_clean(d, entries.entry_hash(d, i), i, TYPE_LONG) i += 1 else: assert False @@ -1015,7 +1026,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1056,7 +1067,7 @@ checkingkey = entries[index - VALID_OFFSET].key if direct_compare and checkingkey == key: return index - VALID_OFFSET # found the entry - if d.keyeq is not None and entries.hash(index - VALID_OFFSET) == hash: + if d.keyeq is not None and entries.entry_hash(d, index - VALID_OFFSET) == hash: # correct hash, maybe the key is e.g. a different pointer to # an equal object found = d.keyeq(checkingkey, key) @@ -1305,14 +1316,14 @@ def ll_dict_update(dic1, dic2): if dic1 == dic2: return - ll_ensure_indexes(dic2) # needed for entries.hash() below + ll_ensure_indexes(dic2) # needed for entries.entry_hash() below ll_prepare_dict_update(dic1, dic2.num_live_items) i = 0 while i < dic2.num_ever_used_items: entries = dic2.entries if entries.valid(i): entry = entries[i] - hash = entries.hash(i) + hash = entries.entry_hash(dic2, i) key = entry.key value = entry.value index = dic1.lookup_function(dic1, key, hash, FLAG_STORE) @@ -1413,7 +1424,7 @@ r = lltype.malloc(ELEM.TO) r.item0 = recast(ELEM.TO.item0, entry.key) r.item1 = recast(ELEM.TO.item1, entry.value) - _ll_dict_del(dic, dic.entries.hash(i), i) + _ll_dict_del(dic, dic.entries.entry_hash(dic, i), i) return r def ll_dict_pop(dic, key): diff --git a/rpython/rtyper/rbuiltin.py b/rpython/rtyper/rbuiltin.py --- a/rpython/rtyper/rbuiltin.py +++ b/rpython/rtyper/rbuiltin.py @@ -717,9 +717,9 @@ @typer_for(OrderedDict) @typer_for(objectmodel.r_dict) @typer_for(objectmodel.r_ordereddict) -def rtype_dict_constructor(hop, i_force_non_null=None): - # 'i_force_non_null' is ignored here; if it has any effect, it - # has already been applied to 'hop.r_result' +def rtype_dict_constructor(hop, i_force_non_null=None, i_simple_hash_eq=None): + # 'i_force_non_null' and 'i_simple_hash_eq' are ignored here; if they have any + # effect, it has already been applied to 'hop.r_result' hop.exception_cannot_occur() r_dict = hop.r_result cDICT = hop.inputconst(lltype.Void, r_dict.DICT) diff --git a/rpython/rtyper/rdict.py b/rpython/rtyper/rdict.py --- a/rpython/rtyper/rdict.py +++ b/rpython/rtyper/rdict.py @@ -15,6 +15,7 @@ s_key = dictkey.s_value s_value = dictvalue.s_value force_non_null = self.dictdef.force_non_null + simple_hash_eq = self.dictdef.simple_hash_eq if dictkey.custom_eq_hash: custom_eq_hash = lambda: (rtyper.getrepr(dictkey.s_rdict_eqfn), rtyper.getrepr(dictkey.s_rdict_hashfn)) @@ -22,7 +23,7 @@ custom_eq_hash = None return self.get_dict_repr()(rtyper, lambda: rtyper.getrepr(s_key), lambda: rtyper.getrepr(s_value), dictkey, dictvalue, - custom_eq_hash, force_non_null) + custom_eq_hash, force_non_null, simple_hash_eq) def rtyper_makekey(self): self.dictdef.dictkey .dont_change_any_more = True @@ -89,7 +90,7 @@ resulttype=ENTRIES) # call the correct variant_*() method method = getattr(self, 'variant_' + self.variant) - return method(hop, ENTRIES, v_entries, v_index) + return method(hop, ENTRIES, v_entries, v_dict, v_index) def get_tuple_result(self, hop, items_v): # this allocates the tuple for the result, directly in the function @@ -109,7 +110,7 @@ hop.genop('setfield', [v_result, c_item, v_item]) return v_result - def variant_keys(self, hop, ENTRIES, v_entries, v_index): + def variant_keys(self, hop, ENTRIES, v_entries, v_dict, v_index): KEY = ENTRIES.TO.OF.key c_key = hop.inputconst(lltype.Void, 'key') v_key = hop.genop('getinteriorfield', [v_entries, v_index, c_key], @@ -118,30 +119,30 @@ variant_reversed = variant_keys - def variant_values(self, hop, ENTRIES, v_entries, v_index): + def variant_values(self, hop, ENTRIES, v_entries, v_dict, v_index): VALUE = ENTRIES.TO.OF.value c_value = hop.inputconst(lltype.Void, 'value') v_value = hop.genop('getinteriorfield', [v_entries,v_index,c_value], resulttype=VALUE) return self.r_dict.recast_value(hop.llops, v_value) - def variant_items(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) + def variant_items(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value)) - def variant_hashes(self, hop, ENTRIES, v_entries, v_index): + def variant_hashes(self, hop, ENTRIES, v_entries, v_dict, v_index): # there is not really a variant 'hashes', but this method is # convenient for the following variants - return hop.gendirectcall(ENTRIES.TO.hash, v_entries, v_index) + return hop.gendirectcall(ENTRIES.TO.entry_hash, v_entries, v_dict, v_index) - def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_keys_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_hash)) - def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_index): - v_key = self.variant_keys(hop, ENTRIES, v_entries, v_index) - v_value = self.variant_values(hop, ENTRIES, v_entries, v_index) - v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_index) + def variant_items_with_hash(self, hop, ENTRIES, v_entries, v_dict, v_index): + v_key = self.variant_keys(hop, ENTRIES, v_entries, v_dict, v_index) + v_value = self.variant_values(hop, ENTRIES, v_entries, v_dict, v_index) + v_hash = self.variant_hashes(hop, ENTRIES, v_entries, v_dict, v_index) return self.get_tuple_result(hop, (v_key, v_value, v_hash)) diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -538,6 +538,25 @@ r_dict = rtyper.getrepr(s) assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_r_dict_can_be_fast(self): + def myeq(n, m): + return n == m + def myhash(n): + return ~n + def f(): + d = self.new_r_dict(myeq, myhash, simple_hash_eq=True) + d[5] = 7 + d[12] = 19 + return d + + t = TranslationContext() + s = t.buildannotator().build_types(f, []) + rtyper = t.buildrtyper() + rtyper.specialize() + + r_dict = rtyper.getrepr(s) + assert not hasattr(r_dict.lowleveltype.TO.entries.TO.OF, "f_hash") + def test_tuple_dict(self): def f(i): d = self.newdict() @@ -1000,8 +1019,8 @@ return {} @staticmethod - def new_r_dict(myeq, myhash): - return r_dict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return r_dict(myeq, myhash, force_non_null=force_non_null, simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -386,8 +386,10 @@ return OrderedDict() @staticmethod - def new_r_dict(myeq, myhash): - return objectmodel.r_ordereddict(myeq, myhash) + def new_r_dict(myeq, myhash, force_non_null=False, simple_hash_eq=False): + return objectmodel.r_ordereddict( + myeq, myhash, force_non_null=force_non_null, + simple_hash_eq=simple_hash_eq) def test_two_dicts_with_different_value_types(self): def func(i): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit