https://github.com/karl3/rep

i _almost_ implemented dict. it works barebones but uses an
exponential growth ratio of 256.
i've almost reduced the exponential growth ratio to the smallest
needed power of 2, there's an arithmetic bug right now

i'll paste it

        if place != self._sentinel:
            collision = self._key(place)
            if collision != keyhash:
                assert idx ==
int.from_bytes(collision[:self._hashbytes], 'big') >> self._hashshift
#& self._hashmask
                spread = 0
                while True:
                    spread += 1
                    hashbits = self._hashbits + spread
                    expansion = 1 << spread
                    #hashmask = capacity - 1
                    hashbytes = (hashbits+7) >> 3
                    hashshift = (hashbytes << 3) - hashbits
                    idx = int.from_bytes(keyhash[:hashbytes], 'big')
>> hashshift #& hashmask
                    if idx != int.from_bytes(collision[:hashbytes],
'big') >> hashshift: #& hashmask:
                        break
                capacity = self._capacity * expansion
                assert 1 << hashbits == capacity
                expansionmask = spread - 1
                def content_generator():
                    for superidx, item in
enumerate(tqdm.tqdm(self.array, desc='growing hashtable',
leave=False)):
                        if item == self._sentinel:
                            yield from [self._sentinel] * expansion
                        else:
                            keyhash = self._key(item)
        if place != self._sentinel:
            collision = self._key(place)
            if collision != keyhash:
                assert idx ==
int.from_bytes(collision[:self._hashbytes], 'big') >> self._hashshift
#& self._hashmask
                spread = 0
                while True:
                    spread += 1
                    hashbits = self._hashbits + spread
                    expansion = 1 << spread
                    #hashmask = capacity - 1
                    hashbytes = (hashbits+7) >> 3
                    hashshift = (hashbytes << 3) - hashbits
                    idx = int.from_bytes(keyhash[:hashbytes], 'big')
>> hashshift #& hashmask
                    if idx != int.from_bytes(collision[:hashbytes],
'big') >> hashshift: #& hashmask:
                        break
                capacity = self._capacity * expansion
                assert 1 << hashbits == capacity
                expansionmask = spread - 1
                def content_generator():
                    for superidx, item in
enumerate(tqdm.tqdm(self.array, desc='growing hashtable',
leave=False)):
                        if item == self._sentinel:
                            yield from [self._sentinel] * expansion
                        else:
                            keyhash = self._key(item)
                            wholeidx =
int.from_bytes(keyhash[:hashbytes], 'big') #>> hashshift #& hashmask
                            assert superidx == wholeidx >> (hashbytes
* 8 - self._hashbits)
                            subidx = (wholeidx >> hashshift) & expansionmask
                            assert superidx * expansion + subidx ==
wholeidx >> hashshift
                            yield from [self._sentinel] * subidx
                            yield item
                            yield from [self._sentinel] * (expansion -
subidx - 1)
                self.array[:] =
IterableWithLength(content_generator(), self._capacity * expansion)
                        yield from [self._sentinel] * subidx
                            yield item
                            yield from [self._sentinel] * (expansion -
subidx - 1)
                self.array[:] =
IterableWithLength(content_generator(), self._capacity * expansion)

The arithmetic mistake is in this section:
                            wholeidx =
int.from_bytes(keyhash[:hashbytes], 'big') # misleading comment
removed
                            assert superidx == wholeidx >> (hashbytes
* 8 - self._hashbits)
                            subidx = (wholeidx >> hashshift) & expansionmask
                            assert superidx * expansion + subidx ==
wholeidx >> hashshift
The second assertion is raising. One of my bitwise arithmetics is wrong.

After reducing the exponential growth ratio I was going to implement
.update() for fast bulk updates of many items.

It's so useful to have basic data structures!

Reply via email to