> BTW, I'm pretty sure I have figured out the method for storing minimal
> required binary tree as an array, where adding each page adds exactly
> one upper node. The node added is often not the immediate parent, but is
> always the one needed for covering all nodes.
>
> I just have to write it up in an understandable way and then you all can
> look at it and tell if it is something well-known from Knuth or Date ;)
Find sample code attached:
not optimal at all, meant as proof-of-concept.
just run the file to see how it works, some comments in the beginning.
currently it interleaves leaf and internal nodes, but it may be better
for some uses (like seqscanning leaf nodes when searching for clustered
pos) to separate them, for example having 1st 4k for lef nodes and 2nd
for internal nodes.
also, I think that having a fan-out factor above 2 (making it more like
b-tree instead of binary tree) would be more efficient due to CPU
caches, but it takes some more work to figure it out.
Of course unless someone recognizes the algorithm and can point to
textbook example ;)
---------------
Hannu
#!/usr/bin/python
"""
Toy/pilot implementation of minimal binary tree stored in array
initial array contains just leaf 0
with leaf 1 a node covering 0-1 is added
with leaf 2 a node covering 0-3 is added (node for 2-3 is not yet needed)
with leaf 3 a node covering 2-3 is added
...
so with each additional leaf just one binary tree node is added
the added node is the one needed to cover all previous nodes and added node
it is often _not_ the one just above added node.
"""
def bin(n):
i = 0x80000000
ret = []
while i:
ret.append(int(n&i>0))
i >>= 1
return ''.join(map(str,ret))
def bitlen(n):
i = 0
while n:
i += 1
n >>= 1
return i
def masklen(n):
i = 0
while n:
i += 1
if (n & 1): break
n >>= 1
return i
def int_with_bit_at(n):
return 1 << (n -1)
def int_with_bits_to(n):
i = 0
while 1:
if not n: break
n -= 1
i = (i << 1) | 1
return i
def range(pos):
n = (pos+1)/2
mask = int_with_bits_to(masklen(n))
min = int(n & (0xffffffff ^ mask))
max = n | mask
return min, max
def child_nodes(pos):
if not (pos % 2):
raise 'pos at leaf level - no child nodes'
n = (pos+1)/2
if n % 2: # child is leaf
c1 = n ^ 1
c2 = n
return 'leaf', c1*2, c2*2
else:
ml = masklen(n)
mask = int_with_bits_to(ml)
bit = int_with_bit_at(ml-1)
c1 = (n | mask) ^ mask | bit
c2 = c1 ^ (bit << 1)
return 'node', c1*2-1, c2*2-1
def parent(pos):
n = (pos+1)/2
if pos % 2: # node
ml = masklen(n)
return 2*((n | int_with_bits_to(ml+1))^ int_with_bits_to(ml)) - 1
else: # leaf
return 2*((n | 1) ^ 1)+1
def get_root_node(treearray):
return int_with_bit_at(bitlen(len(treearray)/2)) * 2 - 1
def nodes_in_tree(treearray):
return (len(treearray)+1) / 2
def node_info(treearray, pos):
value = treearray[pos]
if (pos % 2) == 0:
type = 'leaf'
points_to = pos/2
else:
type = 'node'
points_to = range(pos)
return type, points_to, value
def print_tree(treearray):
print " pos node_range value"
fmt = {
'leaf': '%3d %3d %5d',
'node': '%3d %10s %5d'
}
for pos in xrange(len(treearray)):
type,points_to,value = node_info(treearray, pos)
print fmt[type] % (pos, points_to, value)
def get_node_value(treearray, pos):
if pos >= len(treearray): # outside of current array
if pos % 2: # node, try child nodes
type, c1_pos, c2_pos = child_nodes(pos)
return max(get_node_value(treearray, c1_pos),get_node_value(treearray, c2_pos))
else: # leaf
return None
return treearray[pos]
def update_leaf_nr(treearray, n, value):
while n >= nodes_in_tree(treearray):
treearray.extend([0,0]) # append one (node,leaf) pair
update_leaf_nr(treearray, nodes_in_tree(treearray)-1, 0)
pos = n*2
treearray[pos] = value
root_node = get_root_node(treearray)
while 1:
parent_pos = parent(pos)
if (parent_pos >= len(treearray)): # out of current tree
pos = parent_pos
continue # there may be upper parents in tree
type, c1_pos, c2_pos = child_nodes(parent_pos)
if c1_pos == pos:
maxvalue = max(value, get_node_value(treearray, c2_pos))
elif c2_pos == pos:
maxvalue = max(value, get_node_value(treearray, c1_pos))
else:
raise "no %d in child_nodes(parent(%d))" % (pos,pos)
if treearray[parent_pos] == maxvalue: # already right value
break
treearray[parent_pos] = maxvalue
if parent_pos == root_node: # reached root of tree
break
pos = parent_pos
def get_max_node(treearray):
value, pos = _get_max_node(treearray,get_root_node(treearray))
node_nr = pos/2
return node_nr, value
def _get_max_node(treearray, pos=None):
if pos % 2: # node, try child nodes
type, c1_pos, c2_pos = child_nodes(pos)
return max(_get_max_node(treearray, c1_pos),_get_max_node(treearray, c2_pos))
else: # leaf, return value
if pos >= len(treearray):
value = None
else:
value = treearray[pos]
return value, pos
if __name__ == '__main__':
ta = [3] # initial 1-node tree with leaf 0 value 1
print_tree(ta)
print '\nupdate_leaf_nr 1 to value 1'
update_leaf_nr(ta,1,1)
print_tree(ta)
print '\nupdate_leaf_nr 1 to value 7'
update_leaf_nr(ta,1,7)
print_tree(ta)
print '\nupdate_leaf_nr 2 to value 5'
update_leaf_nr(ta,2,5)
print_tree(ta)
print 'get max node --> nr=%d, value=%d' % get_max_node(ta)
print '\nupdate_leaf_nr 1 to value 1'
update_leaf_nr(ta,1,3)
print_tree(ta)
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers