Steven D'Aprano wrote: > On Mon, 22 Mar 2010 22:05:40 -0700, Paul Rubin wrote: > >> Antoine Pitrou <solip...@pitrou.net> writes: >>> "Orders of magnitude worse", in any case, sounds very exaggerated. >> >> The worst case can lose orders of magnitude if a lot of values hash to >> the same bucket. > > > Well, perhaps one order of magnitude. > > >>>> for i in xrange(100): > ... n = 32*i+1 > ... assert hash(2**n) == hash(2) > ... >>>> d1 = dict.fromkeys(xrange(100)) >>>> d2 = dict.fromkeys([2**(32*i+1) for i in xrange(100)]) >>>> >>>> from timeit import Timer >>>> setup = "from __main__ import d1, d2" >>>> t1 = Timer("for k in d1.keys(): x = d1[k]", setup) >>>> t2 = Timer("for k in d2.keys(): x = d2[k]", setup) >>>> >>>> min(t1.repeat(number=1000, repeat=5)) > 0.026707887649536133 >>>> min(t2.repeat(number=1000, repeat=5)) > 0.33103203773498535
But the ratio grows with the number of collisions: $ python extrapolate.py 10 0.00120401382446 0.00753307342529 ratio: 6.25663366337 100 0.00542402267456 0.316139936447 ratio: 58.2851428571 1000 0.00553417205811 3.36690688133 ratio: 608.384930209 $ cat extrapolate.py from timeit import Timer class Item(object): def __init__(self, value, hash=None): self.value = value self.hash = value if hash is None else hash def __eq__(self, other): return self.value == other.value def __hash__(self): return self.hash setup = "from __main__ import d" bench = "for k in d: x = d[k]" for n, number in (10,100), (100,100), (1000,10): print n d1 = dict.fromkeys(Item(i) for i in xrange(n)) d2 = dict.fromkeys(Item(i, 0) for i in xrange(n)) ab = [] for d in d1, d2: t = Timer(bench, setup) ab.append(min(t.repeat(number=number, repeat=3))) print ab[-1] print "ratio:", ab[1]/ab[0] print See also http://xkcd.com/605/ Peter -- http://mail.python.org/mailman/listinfo/python-list