i realized random writes may be as simple as applying the same
algorithm to the left and right edges of each unwritten region. this
means implementing region bounds. i got as far as below before
overwhelming dyskinesia.

https://github.com/xloem/flat_tree

append_indices.py

# indexes a balanced tree of past indices
# every write, publish a .copy() paired with data, then pass the
locator to .append() to index it

class append_indices(list):

    def __init__(self, degree = 2, initial_indices = []):
        super().__init__(*initial_indices)
        self.degree = degree
        self.leaf_count, self.size = 0, 0
        for leaf_count, size, value in self:
            self.leaf_count += leaf_count
            self.size += size

    def append(self, added_size, last_publish):
        self.leaf_count += 1
        self.size += added_size

        new_leaf_count = self.leaf_count
        new_size = self.size

        for idx, (branch_leaf_count, branch_size, branch_id) in enumerate(self):
            if branch_leaf_count * self.degree <= new_leaf_count:
                break
            new_leaf_count -= branch_leaf_count
            new_size -= branch_size
        else:
            idx = len(self)

        self[idx:] = [(new_leaf_count, new_size, last_publish)]

mix_indices.py

class append_indices(list):
    def __init__(self, degree = 2, initial_indices = []):
        super().__init__(*initial_indices)
        self.degree = degree
        self.degree = degree
        self.leaf_count, self.size = 0, 0
        for leaf_count, size, value in self:
            self.leaf_count += leaf_count
            self.size += size
        self.splice_queue = []

    def _insert(self, last_publish, *ordered_splices):
        # a reasonable next step is to provide for truncation appends,
where a tail of the data is replaced with new data

        # currently only performs 1 append
        assert len(ordered_splices) == 1

        for spliced_out_start, spliced_out_stop, spliced_in_size in
ordered_splices:
            #assert spliced_out_start == self.size # truncation
appends are drafted but untested
            assert spliced_out_stop == self.size

            self.leaf_count += 1
            self.size += spliced_in_size

            new_leaf_count = self.leaf_count
            new_offset = 0

            for idx, (branch_leaf_count, branch_offset, branch_size,
branch_id) in enumerate(self):
                if branch_leaf_count * self.degree <= new_leaf_count:
                    break
                new_leaf_count -= branch_leaf_count
                new_offset += branch_size
            else:
                idx = len(self)

            self[idx:] = ((new_leaf_count, new_offset,
spliced_out_start + spliced_in_size - new_offset, last_publish),)

    def append(self, last_publish, added_size):
        self._insert(last_publish, (self.size, self.size, added_size))

    def snap(self, data):
        return (self.copy(), data)

test.py

from mix_indices import append_indices

index = append_indices(degree=3)
stored_indices = {}

import random
data = [random.randbytes(random.randint(1,8)) for x in range(24)]

for chunk in data:
    id = index.leaf_count
    stored_indices[id] = (index.copy(), chunk)
    index.append(id, len(chunk))

def iterate(index, start_offset, end_offset):
    subendoffset = 0
    for subleafcount, suboffset, subsize, subid in index:
        subindex, subdata = stored_indices[subid]
        subendoffset += subsize
        if subendoffset > start_offset:
            data = b''.join(iterate(subindex, start_offset, end_offset))
            yield data
            if subendoffset > end_offset:
                return
            yield subdata
            start_offset = subendoffset

data = b''.join(data)
print(data)
cmp = b''.join(iterate(index, 0, index.size))
print(cmp)

assert data == cmp

print(index)

Reply via email to