> BTW, I'm pretty sure I have figured out the method for storing minimal
> required binary tree as an array, where adding each page adds exactly
> one upper node. The node added is often not the immediate parent, but is
> always the one needed for covering all nodes.
> 
> I just have to write it up in an understandable way and then you all can
> look at it and tell if it is something well-known from Knuth or Date ;)

Find sample code attached:

not optimal at all, meant as proof-of-concept.

just run the file to see how it works, some comments in the beginning.

currently it interleaves leaf and internal nodes, but it may be better
for some uses (like seqscanning leaf nodes when searching for clustered
pos) to separate them, for example having 1st 4k for lef nodes and 2nd
for internal nodes.

also, I think that having a fan-out factor above 2 (making it more like
b-tree instead of binary tree) would be more efficient due to CPU
caches, but it takes some more work to figure it out. 

Of course unless someone recognizes the algorithm and can point to
textbook example ;) 

---------------
Hannu

#!/usr/bin/python

"""
Toy/pilot implementation of minimal binary tree stored in array
initial array contains just leaf 0
with leaf 1 a node covering 0-1 is added
with leaf 2 a node covering 0-3 is added (node for 2-3 is not yet needed)
with leaf 3 a node covering 2-3 is added
...

so with each additional leaf just one binary tree node is added
the added node is the one needed to cover all previous nodes and added node
it is often _not_ the one just above added node.

"""



def bin(n):
    i = 0x80000000
    ret = []
    while i:
        ret.append(int(n&i>0))
        i >>= 1
    return ''.join(map(str,ret))

def bitlen(n):
    i = 0
    while n:
        i += 1
        n >>= 1
    return i

def masklen(n):
    i = 0
    while n:
        i += 1
        if (n & 1): break
        n >>= 1
    return i

def int_with_bit_at(n):
    return 1 << (n -1)

def int_with_bits_to(n):
    i = 0
    while 1:
        if not n: break
        n -= 1
        i = (i << 1) | 1
    return i

def range(pos):
    n = (pos+1)/2
    mask = int_with_bits_to(masklen(n))
    min = int(n & (0xffffffff ^ mask))
    max = n | mask
    return min, max

def child_nodes(pos):
    if not (pos % 2):
        raise 'pos at leaf level - no child nodes'
    n = (pos+1)/2
    if n % 2: # child is leaf
        c1 = n ^ 1
        c2 = n
        return 'leaf', c1*2, c2*2
    else:
        ml = masklen(n)
        mask = int_with_bits_to(ml)
        bit = int_with_bit_at(ml-1)
        c1 = (n | mask) ^ mask | bit
        c2 = c1 ^ (bit << 1)
        return 'node', c1*2-1, c2*2-1

def parent(pos):
    n = (pos+1)/2
    if pos % 2: # node
        ml = masklen(n)
        return 2*((n | int_with_bits_to(ml+1))^ int_with_bits_to(ml)) - 1
    else: # leaf
        return 2*((n | 1) ^ 1)+1

def get_root_node(treearray):
    return int_with_bit_at(bitlen(len(treearray)/2)) * 2 - 1

def nodes_in_tree(treearray):
    return (len(treearray)+1) / 2

def node_info(treearray, pos):
    value = treearray[pos]
    if (pos % 2) == 0:
        type = 'leaf'
        points_to = pos/2
    else:
        type = 'node'
        points_to = range(pos)
    return type, points_to, value

def print_tree(treearray):
    print " pos   node_range  value"
    fmt = {
        'leaf': '%3d      %3d     %5d',
        'node': '%3d  %10s  %5d'
    }
    for pos in xrange(len(treearray)):
        type,points_to,value = node_info(treearray, pos)
        print fmt[type] % (pos, points_to, value)

def get_node_value(treearray, pos):
    if pos >= len(treearray): # outside of current array
        if pos % 2: # node, try child nodes
            type, c1_pos, c2_pos = child_nodes(pos)
            return max(get_node_value(treearray, c1_pos),get_node_value(treearray, c2_pos))
        else: # leaf
            return None
    return treearray[pos]

def update_leaf_nr(treearray, n, value): 
    while n >= nodes_in_tree(treearray):
        treearray.extend([0,0]) # append one (node,leaf) pair
        update_leaf_nr(treearray, nodes_in_tree(treearray)-1, 0)
    pos = n*2
    treearray[pos] = value
    root_node = get_root_node(treearray)
    while 1:
        parent_pos = parent(pos)
        if (parent_pos >= len(treearray)): # out of current tree
            pos = parent_pos
            continue                       # there may be upper parents in tree
        type, c1_pos, c2_pos = child_nodes(parent_pos)
        if c1_pos == pos:
            maxvalue = max(value, get_node_value(treearray, c2_pos))
        elif c2_pos == pos:
            maxvalue = max(value, get_node_value(treearray, c1_pos))
        else:
            raise "no %d in child_nodes(parent(%d))" % (pos,pos)
        if treearray[parent_pos] == maxvalue: # already right value
            break
        treearray[parent_pos] = maxvalue
        if parent_pos == root_node: # reached root of tree
            break
        pos = parent_pos

def get_max_node(treearray):
    value, pos = _get_max_node(treearray,get_root_node(treearray))
    node_nr = pos/2
    return node_nr, value

def _get_max_node(treearray, pos=None):
    if pos % 2: # node, try child nodes
        type, c1_pos, c2_pos = child_nodes(pos)
        return max(_get_max_node(treearray, c1_pos),_get_max_node(treearray, c2_pos))
    else:       # leaf, return value
        if pos >= len(treearray):
            value = None
        else:
            value = treearray[pos]
        return value, pos

if __name__ == '__main__':
    ta = [3] # initial 1-node tree with leaf 0 value 1
    print_tree(ta)
    print '\nupdate_leaf_nr 1 to value 1'
    update_leaf_nr(ta,1,1)
    print_tree(ta)
    print '\nupdate_leaf_nr 1 to value 7'
    update_leaf_nr(ta,1,7)
    print_tree(ta)
    print '\nupdate_leaf_nr 2 to value 5'
    update_leaf_nr(ta,2,5)
    print_tree(ta)
    print 'get max node --> nr=%d, value=%d' % get_max_node(ta)
    print '\nupdate_leaf_nr 1 to value 1'
    update_leaf_nr(ta,1,3)
    print_tree(ta)

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to