On Sun, Mar 15, 2020 at 12:28 PM Alex Hall <alex.moj...@gmail.com> wrote:
> How about http://www.grantjenks.com/docs/sortedcontainers/sorteddict.html > ? > For my part, I didn't look for that, as I was having fun playing around with the idea. But yes, that looks like it would fit the bill. And at a glance, they were smarter than me :-) -- no need to keep the underlying dict in sorted order, just find the key in the list, then use the regular O(1) dict access. But since I'm having fun, enclosed is an (incomplete) implementation of a a SortedMap. This one keeps a list of keys and values in sorted order, then can access things in O(logN). Insert is O(N), as it's using a regular old list internally. But that is all in C, and not too bad in practice. With the OP's example, it's slower than the basic dict method for creating, but much faster for finding items. (not formally timed) But back the python_ideas topic: It might be good to have a set of real, tree-based data structures -- they are the best way to go for some use cases, and are non-trivial to implement well. (and yes, starting out at a third party package, maybe one that's already out there, would be the way to go) -CHB -- Christopher Barker, PhD Python Language Consulting - Teaching - Scientific Software Development - Desktop GUI and Web Development - wxPython, numpy, scipy, Cython
import bisect from random import randint N1 = 100_000 # N1 > N2 > N3 N2 = N1 // 10 N3 = N1 // 100 class SortedMap: """ a mapping that keeps the data in sorted order, for faster retreval of "nth" items note: not complete, but it has the core methods """ def __init__(self): """ for now, can only initialize an empty one """ # keys and values kept as in-sync lists # can't put them in tuples in s alist, as the # values may not be comparible for use in bisect self._keys = [] self._values = [] def items(self): return zip(self.keys, self.items) def keys(self): return iter(self._keys) def values(self): return iter(self._values) def __getitem__(self, key): """ get an item """ keys = self._keys idx = bisect.bisect_left(keys, key) if idx != len(keys) and keys[idx] == key: return self._values[idx] else: raise KeyError(repr(key)) def __setitem__(self, key, value): """ add an item, in sorted order in the keys """ # find the insertion point idx = bisect.bisect(self._keys, key) # put it in both lists: self._keys.insert(idx, key) self._values.insert(idx, value) def nth_smallest_key(self, n): """ return the nth smallest key """ return self._keys[n] def nth_smallest_value(self, n): return self._values[n] def nth_smallest_item(self, n): return (self._keys[n], self._values[n]) def test_sorted_map_preserve_items(): sd = SortedMap() sd[1] = "mary" sd[3] = "fred" sd[2] = "bob" print(sd._keys) print(sd._values) # make sure it acts like a regular dict() assert sd[1] == "mary" assert sd[3] == "fred" assert sd[2] == "bob" def test_sorted_dict_preserve_sort_order(): sd = SortedMap() sd[1] = "mary" sd[3] = "fred" sd[2] = "bob" # make sure the keys are in order assert list(sd.keys()) == sorted(sd.keys()) def test_nth_smallest_key(): sd = SortedMap() for i in range(20): sd[i] = i + 2 assert sd.nth_smallest_key(2) == 2 assert sd.nth_smallest_key(6) == 6 def test_nth_smallest_item(): sd = SortedMap() for i in range(20): sd[i] = i + 2 assert sd.nth_smallest_item(2) == (2, 4) assert sd.nth_smallest_item(6) == (6, 8) def test_nth_smallest_value(): sd = SortedMap() for i in range(20): sd[i] = i + 2 assert sd.nth_smallest_value(2) == 4 assert sd.nth_smallest_value(6) == 8 def nth_smallest_key(n, m): return sorted(m.keys())[n] def main(): dist = lambda: randint(0, 2147483647) my_map = SortedMap() # fills a map with N1 random mappings of type (int, int) print("populating map with random data") for i in range(0, N1): my_map[dist()] = dist() # prints out the N3th smallest key and its value print("Getting an nth item") target_key = my_map.nth_smallest_key(N3) print("({}: {})".format(target_key, my_map[target_key])) # writes a new random mapping to the map # then prints out the N3th smallest key and its value if that key # has changed # 100000 times print("Testing adding one new item at a time") for i in range(N3): my_map[dist()] = dist() test_key = my_map.nth_smallest_key(N3) if target_key != test_key: target_key = test_key print(i, target_key, test_key) print("({}: {})".format(target_key, my_map[target_key])) # print an indicator every N3 iterations for comparison if i % N3 == 0: print("iteration: {}".format(i)) if __name__ == "__main__": main()
_______________________________________________ Python-ideas mailing list -- python-ideas@python.org To unsubscribe send an email to python-ideas-le...@python.org https://mail.python.org/mailman3/lists/python-ideas.python.org/ Message archived at https://mail.python.org/archives/list/python-ideas@python.org/message/7BZJOSELPYSOPSYTER5NORB253TRHE6D/ Code of Conduct: http://python.org/psf/codeofconduct/