> BTW, I'm pretty sure I have figured out the method for storing minimal > required binary tree as an array, where adding each page adds exactly > one upper node. The node added is often not the immediate parent, but is > always the one needed for covering all nodes. > > I just have to write it up in an understandable way and then you all can > look at it and tell if it is something well-known from Knuth or Date ;)
Find sample code attached: not optimal at all, meant as proof-of-concept. just run the file to see how it works, some comments in the beginning. currently it interleaves leaf and internal nodes, but it may be better for some uses (like seqscanning leaf nodes when searching for clustered pos) to separate them, for example having 1st 4k for lef nodes and 2nd for internal nodes. also, I think that having a fan-out factor above 2 (making it more like b-tree instead of binary tree) would be more efficient due to CPU caches, but it takes some more work to figure it out. Of course unless someone recognizes the algorithm and can point to textbook example ;) --------------- Hannu
#!/usr/bin/python """ Toy/pilot implementation of minimal binary tree stored in array initial array contains just leaf 0 with leaf 1 a node covering 0-1 is added with leaf 2 a node covering 0-3 is added (node for 2-3 is not yet needed) with leaf 3 a node covering 2-3 is added ... so with each additional leaf just one binary tree node is added the added node is the one needed to cover all previous nodes and added node it is often _not_ the one just above added node. """ def bin(n): i = 0x80000000 ret = [] while i: ret.append(int(n&i>0)) i >>= 1 return ''.join(map(str,ret)) def bitlen(n): i = 0 while n: i += 1 n >>= 1 return i def masklen(n): i = 0 while n: i += 1 if (n & 1): break n >>= 1 return i def int_with_bit_at(n): return 1 << (n -1) def int_with_bits_to(n): i = 0 while 1: if not n: break n -= 1 i = (i << 1) | 1 return i def range(pos): n = (pos+1)/2 mask = int_with_bits_to(masklen(n)) min = int(n & (0xffffffff ^ mask)) max = n | mask return min, max def child_nodes(pos): if not (pos % 2): raise 'pos at leaf level - no child nodes' n = (pos+1)/2 if n % 2: # child is leaf c1 = n ^ 1 c2 = n return 'leaf', c1*2, c2*2 else: ml = masklen(n) mask = int_with_bits_to(ml) bit = int_with_bit_at(ml-1) c1 = (n | mask) ^ mask | bit c2 = c1 ^ (bit << 1) return 'node', c1*2-1, c2*2-1 def parent(pos): n = (pos+1)/2 if pos % 2: # node ml = masklen(n) return 2*((n | int_with_bits_to(ml+1))^ int_with_bits_to(ml)) - 1 else: # leaf return 2*((n | 1) ^ 1)+1 def get_root_node(treearray): return int_with_bit_at(bitlen(len(treearray)/2)) * 2 - 1 def nodes_in_tree(treearray): return (len(treearray)+1) / 2 def node_info(treearray, pos): value = treearray[pos] if (pos % 2) == 0: type = 'leaf' points_to = pos/2 else: type = 'node' points_to = range(pos) return type, points_to, value def print_tree(treearray): print " pos node_range value" fmt = { 'leaf': '%3d %3d %5d', 'node': '%3d %10s %5d' } for pos in xrange(len(treearray)): type,points_to,value = node_info(treearray, pos) print fmt[type] % (pos, points_to, value) def get_node_value(treearray, pos): if pos >= len(treearray): # outside of current array if pos % 2: # node, try child nodes type, c1_pos, c2_pos = child_nodes(pos) return max(get_node_value(treearray, c1_pos),get_node_value(treearray, c2_pos)) else: # leaf return None return treearray[pos] def update_leaf_nr(treearray, n, value): while n >= nodes_in_tree(treearray): treearray.extend([0,0]) # append one (node,leaf) pair update_leaf_nr(treearray, nodes_in_tree(treearray)-1, 0) pos = n*2 treearray[pos] = value root_node = get_root_node(treearray) while 1: parent_pos = parent(pos) if (parent_pos >= len(treearray)): # out of current tree pos = parent_pos continue # there may be upper parents in tree type, c1_pos, c2_pos = child_nodes(parent_pos) if c1_pos == pos: maxvalue = max(value, get_node_value(treearray, c2_pos)) elif c2_pos == pos: maxvalue = max(value, get_node_value(treearray, c1_pos)) else: raise "no %d in child_nodes(parent(%d))" % (pos,pos) if treearray[parent_pos] == maxvalue: # already right value break treearray[parent_pos] = maxvalue if parent_pos == root_node: # reached root of tree break pos = parent_pos def get_max_node(treearray): value, pos = _get_max_node(treearray,get_root_node(treearray)) node_nr = pos/2 return node_nr, value def _get_max_node(treearray, pos=None): if pos % 2: # node, try child nodes type, c1_pos, c2_pos = child_nodes(pos) return max(_get_max_node(treearray, c1_pos),_get_max_node(treearray, c2_pos)) else: # leaf, return value if pos >= len(treearray): value = None else: value = treearray[pos] return value, pos if __name__ == '__main__': ta = [3] # initial 1-node tree with leaf 0 value 1 print_tree(ta) print '\nupdate_leaf_nr 1 to value 1' update_leaf_nr(ta,1,1) print_tree(ta) print '\nupdate_leaf_nr 1 to value 7' update_leaf_nr(ta,1,7) print_tree(ta) print '\nupdate_leaf_nr 2 to value 5' update_leaf_nr(ta,2,5) print_tree(ta) print 'get max node --> nr=%d, value=%d' % get_max_node(ta) print '\nupdate_leaf_nr 1 to value 1' update_leaf_nr(ta,1,3) print_tree(ta)
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers