i realized random writes may be as simple as applying the same algorithm to the left and right edges of each unwritten region. this means implementing region bounds. i got as far as below before overwhelming dyskinesia.
https://github.com/xloem/flat_tree append_indices.py # indexes a balanced tree of past indices # every write, publish a .copy() paired with data, then pass the locator to .append() to index it class append_indices(list): def __init__(self, degree = 2, initial_indices = []): super().__init__(*initial_indices) self.degree = degree self.leaf_count, self.size = 0, 0 for leaf_count, size, value in self: self.leaf_count += leaf_count self.size += size def append(self, added_size, last_publish): self.leaf_count += 1 self.size += added_size new_leaf_count = self.leaf_count new_size = self.size for idx, (branch_leaf_count, branch_size, branch_id) in enumerate(self): if branch_leaf_count * self.degree <= new_leaf_count: break new_leaf_count -= branch_leaf_count new_size -= branch_size else: idx = len(self) self[idx:] = [(new_leaf_count, new_size, last_publish)] mix_indices.py class append_indices(list): def __init__(self, degree = 2, initial_indices = []): super().__init__(*initial_indices) self.degree = degree self.degree = degree self.leaf_count, self.size = 0, 0 for leaf_count, size, value in self: self.leaf_count += leaf_count self.size += size self.splice_queue = [] def _insert(self, last_publish, *ordered_splices): # a reasonable next step is to provide for truncation appends, where a tail of the data is replaced with new data # currently only performs 1 append assert len(ordered_splices) == 1 for spliced_out_start, spliced_out_stop, spliced_in_size in ordered_splices: #assert spliced_out_start == self.size # truncation appends are drafted but untested assert spliced_out_stop == self.size self.leaf_count += 1 self.size += spliced_in_size new_leaf_count = self.leaf_count new_offset = 0 for idx, (branch_leaf_count, branch_offset, branch_size, branch_id) in enumerate(self): if branch_leaf_count * self.degree <= new_leaf_count: break new_leaf_count -= branch_leaf_count new_offset += branch_size else: idx = len(self) self[idx:] = ((new_leaf_count, new_offset, spliced_out_start + spliced_in_size - new_offset, last_publish),) def append(self, last_publish, added_size): self._insert(last_publish, (self.size, self.size, added_size)) def snap(self, data): return (self.copy(), data) test.py from mix_indices import append_indices index = append_indices(degree=3) stored_indices = {} import random data = [random.randbytes(random.randint(1,8)) for x in range(24)] for chunk in data: id = index.leaf_count stored_indices[id] = (index.copy(), chunk) index.append(id, len(chunk)) def iterate(index, start_offset, end_offset): subendoffset = 0 for subleafcount, suboffset, subsize, subid in index: subindex, subdata = stored_indices[subid] subendoffset += subsize if subendoffset > start_offset: data = b''.join(iterate(subindex, start_offset, end_offset)) yield data if subendoffset > end_offset: return yield subdata start_offset = subendoffset data = b''.join(data) print(data) cmp = b''.join(iterate(index, 0, index.size)) print(cmp) assert data == cmp print(index)