It occurred to me that quadtree compression would probably work really well for screenshots and possibly even sequences of screenshots. So I hacked together something one night to do quadtree compression of small arrays of values in Python.
#!/usr/bin/python """Quadtree image compression library. Currently implements only quadtree image analysis, in a very crude form. Still to do: - speed it up. Current time is 20 times too slow, even on a constant image of 4096 items: 38% in qtreenode, 30% in halves, 19% in qtreenodeindex, 8.0% in sum, and 4.4% in quarters. All five of these have to stop. - replace self.known_nodes[n] with some data structure that looks like a list, but uses a hash for .index() so it's fast - maybe use Numeric so slices don't involve copying - some kind of string representation of the data structure. Maybe pickle+zlib to start with? (Initial results not promising; on image 'abcdefghabijefkl', zlib gets 24 bytes, pickle(qtree) gets 216 bytes, zlib(pickle(qtree)) gets 131 bytes, zlib(pickle([])) gets 14 bytes. But maybe real images will have more redundancy.) - decompression - some program that reads and compresses, say, an xwd file; compare compression ratio to png's - video codec (multiple encoded images sharing nodes) - screenshot movie capture - reimplement in C for some particular pixel type for speed and space efficiency - implement video codec display with Xlib or SDL (SDL_BlitSurface, yay) """ import math, zlib, pickle, time def ok(a, b): assert a == b, (a, b) def sum(things): rv = things[0] for ii in things[1:]: rv += ii return rv def halves(pixels): nrows = int(math.sqrt(len(pixels)/2)) ncols = nrows * 2 assert len(pixels) == nrows * ncols return [ sum([pixels[ncols*ii:ncols*ii + ncols/2] for ii in range(nrows)]), sum([pixels[ncols*ii + ncols/2:ncols*(ii+1)] for ii in range(nrows)]), ] def quarters(pixels): midpoint = len(pixels)/2 return halves(pixels[:midpoint]) + halves(pixels[midpoint:]) def test_quarters(): ok(quarters([0, 1, 2, 3]), [[0], [1], [2], [3]]) ok(quarters([3, 2, 1, 0]), [[3], [2], [1], [0]]) ok(quarters([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ]), [[0, 1, 4, 5], [2, 3, 6, 7], [8, 9, 12, 13], [10, 11, 14, 15]]) ok(quarters(range(64)), [[0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27], [4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31], [32, 33, 34, 35, 40, 41, 42, 43, 48, 49, 50, 51, 56, 57, 58, 59], [36, 37, 38, 39, 44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63]]) def test_indexable_list(): pass class qtreer: def __init__(self): self.known_nodes = {} def qtreenode(self, pixels): length = len(pixels) if length == 1: return pixels[0] return tuple([self.qtreenodeindex(self.qtreenode(x), length/4) for x in quarters(pixels)]) def qtreenodeindex(self, node, length): if not self.known_nodes.has_key(length): self.known_nodes[length] = [] kn = self.known_nodes[length] try: return kn.index(node) except ValueError: kn.append(node) return len(kn) - 1 def qtree(self, pixels): self.known_nodes = {} top = self.qtreenode(pixels) topid = self.qtreenodeindex(top, len(pixels)) assert topid == 0, topid keys = self.known_nodes.keys() keys.sort() return [list(self.known_nodes[key]) for key in keys] def qtree(img): return qtreer().qtree(img) def test(): test_quarters() test_indexable_list() ok(qtree([0, 1, 2, 3]), [ [0, 1, 2, 3], [(0, 1, 2, 3)] ]) ok(qtree(['a', 'b', 'c', 'c']), [ ['a', 'b', 'c'], [(0, 1, 2, 2)] ]) ok(qtree('abcd'), qtree(['a', 'b', 'c', 'd'])) ok(qtree('abcd' + 'efgh' + 'abij' + 'efkl'), [ ['a', 'b', 'e', 'f', 'c', 'd', 'g', 'h', 'i', 'j', 'k', 'l'], [(0, 1, 2, 3), (4, 5, 6, 7), (8, 9, 10, 11)], [(0, 1, 0, 2)]]) test_speed() def test_speed(): start = time.time() tree = qtree([3] * 4096) # encoding in C is perhaps 100x faster # to do 20 megapixels per second in C, then, requires doing 200 # kilopixels in Python # so 4 kilopixels should be 20 ms duration = time.time() - start ok(tree, [[3]] + [[(0, 0, 0, 0)]] * 6) assert duration < 0.020, duration # mystr = 'abcdefghabijefkl' # dump = pickle.dumps(qtree(mystr)) # print len(zlib.compress(mystr)), len(dump), repr(dump), len(zlib.compress(dump)), len(zlib.compress(pickle.dumps([]))) test()