It occurred to me that quadtree compression would probably work really
well for screenshots and possibly even sequences of screenshots.  So I
hacked together something one night to do quadtree compression of
small arrays of values in Python.

#!/usr/bin/python
"""Quadtree image compression library.

Currently implements only quadtree image analysis, in a very crude
form.  Still to do:

- speed it up.  Current time is 20 times too slow, even on a constant
  image of 4096 items: 38% in qtreenode, 30% in halves, 19% in
  qtreenodeindex, 8.0% in sum, and 4.4% in quarters.  All five of
  these have to stop.

- replace self.known_nodes[n] with some data structure that looks like
  a list, but uses a hash for .index() so it's fast

- maybe use Numeric so slices don't involve copying

- some kind of string representation of the data structure.  Maybe
  pickle+zlib to start with?  (Initial results not promising; on image
  'abcdefghabijefkl', zlib gets 24 bytes, pickle(qtree) gets 216
  bytes, zlib(pickle(qtree)) gets 131 bytes, zlib(pickle([])) gets 14
  bytes.  But maybe real images will have more redundancy.)

- decompression

- some program that reads and compresses, say, an xwd file; compare
  compression ratio to png's

- video codec (multiple encoded images sharing nodes)

- screenshot movie capture

- reimplement in C for some particular pixel type for speed and space
  efficiency

- implement video codec display with Xlib or SDL (SDL_BlitSurface, yay)

"""


import math, zlib, pickle, time

def ok(a, b):
    assert a == b, (a, b)

def sum(things):
    rv = things[0]
    for ii in things[1:]: rv += ii
    return rv

def halves(pixels):
    nrows = int(math.sqrt(len(pixels)/2))
    ncols = nrows * 2
    assert len(pixels) == nrows * ncols
    return [
        sum([pixels[ncols*ii:ncols*ii + ncols/2] for ii in range(nrows)]),
        sum([pixels[ncols*ii + ncols/2:ncols*(ii+1)] for ii in range(nrows)]),
        ]

def quarters(pixels):
    midpoint = len(pixels)/2
    return halves(pixels[:midpoint]) + halves(pixels[midpoint:])

def test_quarters():
    ok(quarters([0, 1, 2, 3]), [[0], [1], [2], [3]])
    ok(quarters([3, 2, 1, 0]), [[3], [2], [1], [0]])
    ok(quarters([
        0, 1, 2, 3,
        4, 5, 6, 7,
        8, 9, 10, 11,
        12, 13, 14, 15,
        ]), [[0, 1, 4, 5], [2, 3, 6, 7], [8, 9, 12, 13], [10, 11, 14, 15]])
    ok(quarters(range(64)),
       [[0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27],
        [4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31],
        [32, 33, 34, 35, 40, 41, 42, 43, 48, 49, 50, 51, 56, 57, 58, 59],
        [36, 37, 38, 39, 44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63]])

def test_indexable_list():
    pass

class qtreer:
    def __init__(self): self.known_nodes = {}
    def qtreenode(self, pixels):
        length = len(pixels)
        if length == 1: return pixels[0]
        return tuple([self.qtreenodeindex(self.qtreenode(x), length/4)
                      for x in quarters(pixels)])
    def qtreenodeindex(self, node, length):
        if not self.known_nodes.has_key(length): self.known_nodes[length] = []
        kn = self.known_nodes[length]
        try: return kn.index(node)
        except ValueError:
            kn.append(node)
            return len(kn) - 1
    def qtree(self, pixels):
        self.known_nodes = {}
        top = self.qtreenode(pixels)
        topid = self.qtreenodeindex(top, len(pixels))
        assert topid == 0, topid
        keys = self.known_nodes.keys()
        keys.sort()
        return [list(self.known_nodes[key]) for key in keys]

def qtree(img):
    return qtreer().qtree(img)

def test():
    test_quarters()
    test_indexable_list()
    ok(qtree([0, 1, 2, 3]), [
        [0, 1, 2, 3], [(0, 1, 2, 3)]
        ])
    ok(qtree(['a', 'b', 'c', 'c']), [
        ['a', 'b', 'c'], [(0, 1, 2, 2)]
        ])
    ok(qtree('abcd'), qtree(['a', 'b', 'c', 'd']))
    ok(qtree('abcd' +
             'efgh' +
             'abij' +
             'efkl'), [
        ['a', 'b', 'e', 'f', 'c', 'd', 'g', 'h', 'i', 'j', 'k', 'l'],
        [(0, 1, 2, 3), (4, 5, 6, 7), (8, 9, 10, 11)],
        [(0, 1, 0, 2)]])
    test_speed()

def test_speed():
    start = time.time()
    tree = qtree([3] * 4096)
    # encoding in C is perhaps 100x faster
    # to do 20 megapixels per second in C, then, requires doing 200
    # kilopixels in Python
    # so 4 kilopixels should be 20 ms
    duration = time.time() - start
    ok(tree, [[3]] + [[(0, 0, 0, 0)]] * 6)
    assert duration < 0.020, duration

#     mystr = 'abcdefghabijefkl'
#     dump = pickle.dumps(qtree(mystr))
#     print len(zlib.compress(mystr)), len(dump), repr(dump), 
len(zlib.compress(dump)), len(zlib.compress(pickle.dumps([])))

test()


Reply via email to