On Fri, 20 May 2011 05:01:41 +0200 Ariane van der Steldt <ari...@stack.nl> wrote:
> On Thu, May 19, 2011 at 08:28:09PM +0300, Michael Pounov wrote: > > Add AVL tree implementation and merge few RB tree related macros. > > > > If you have comments or any claims, please send me feedback > > and I will fix them. > > > > >>>>>>>>>>>>>>>> Inline patch:: > > --- /usr/src/sys/sys/tree.h Mon Mar 2 11:42:55 2009 > > +++ tree.h Thu May 19 20:16:36 2011 > > @@ -730,9 +730,367 @@ > > (x) != NULL; \ > > (x) = name##_RB_NEXT(x)) > > > > +#define RB_FOREACH_FROM(x, name, y) > > \ > > This macro modifies parameter y, which I do not expect from using it. > Please keep y its original value. Also, I want to be able to use an > rvalue for y. > > > + for ((x) = (y); \ > > + ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); \ > > + (x) = (y)) > > + > > +#define RB_FOREACH_SAFE(x, name, head, y) \ > > (Again, don't modify the value of y.) > > > + for ((x) = RB_MIN(name, head); \ > > + ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); \ > > + (x) = (y)) > > + > > Is there a specific reason your wrote the above 2 macros? The pattern > where the for loop is written out is quite common and well understood by > developers. > > > #define RB_FOREACH_REVERSE(x, name, head) \ > > for ((x) = RB_MAX(name, head); \ > > (x) != NULL; \ > > (x) = name##_RB_PREV(x)) > > + > > +#define RB_FOREACH_REVERSE_FROM(x, name, y) > > \ > > + for ((x) = (y); \ > > + ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); \ > > + (x) = (y)) > > + > > +#define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \ > > + for ((x) = RB_MAX(name, head); \ > > + ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); \ > > + (x) = (y)) > > + > > +/* Macros that define a avl tree */ > > +#define AVL_MAX_HEIGHT 42 > > No. Trees do not have a max height. On a modern amd64, I can > create code which will be able to fit more than 2 ** 42 items in the > tree (provided I had the physmem in my machine). > > > +#define AVL_HEAD(name, type) > > \ > > +struct name { > > \ > > + struct type *avlh_root; /* root of the tree */ \ > > + int avlh_height; \ > > + long avlh_count; \ > This tree implementation use height for ancestors temporary array because we have not a pointer to the parent. You may use all memory when set new height 2**<height> = all stored elements in tree! :) > size_t, not long. K&R guarantee it'll fit for every future version C. > > Why is there a count? For most trees in the system, we don't care how > large they are. And we don't care for the height either. > > > +} > > + > > +#define AVL_INITIALIZER(root) > > \ > > + { NULL, 0, 0 } > > + > > +#define AVL_INIT(root, height) do { > > \ > > + (root)->avlh_root = NULL; \ > > + (root)->avlh_height = !height ? AVL_MAX_HEIGHT : height; \ > > height? This is not the current height of the tree... Don't like it; if > the items in the tree are constrained in some way, other code will have > dealt with it. > > > + (root)->avlh_count = 0; \ > > +} while (0) > > + > > +#define AVL_ENTRY(type) > > \ > > +struct { \ > > + struct type *avle_left; /* left element */ \ > > + struct type *avle_right; /* right element */ \ > > + int avle_height; /* node color */ \ > > Comment copy-pasted from RB tree. > > > +} > > + > > +#define AVL_LEFT(elm, field) (elm)->field.avle_left > > +#define AVL_RIGHT(elm, field) (elm)->field.avle_right > > +#define AVL_HEIGHT(elm, field) (elm)->field.avle_height > > +#define AVL_ROOT(head) (head)->avlh_root > > +#define AVL_EMPTY(head) (AVL_ROOT(head) == NULL) > > +#define AVL_ROOT_HEIGHT(head) (head)->avlh_height > > How about this instead: (AVL_EMPTY(head) ? 0 : AVL_HEIGHT(AVL_ROOT(head))) > > > +#define AVL_ROOT_COUNT(head) (head)->avlh_count > > + > > +/* Generates prototypes and inline functions */ > > +#define AVL_PROTOTYPE(name, type, field, cmp) > > \ > > + AVL_PROTOTYPE_INTERNAL(name, type, field, cmp,) > > +#define AVL_PROTOTYPE_STATIC(name, type, field, cmp) > > \ > > + AVL_PROTOTYPE_INTERNAL(name, type, field, cmp, > > __attribute__((__unused__)) static) > > +#define AVL_PROTOTYPE_INTERNAL(name, type, field, cmp, attr) > > \ > > +attr struct type *name##_AVL_REMOVE(struct name *, struct type *); \ > > +attr struct type *name##_AVL_INSERT(struct name *, struct type *); \ > > +attr struct type *name##_AVL_FIND(struct name *, struct type *); \ > > +attr struct type *name##_AVL_NFIND(struct name *, struct type *); \ > > +attr struct type *name##_AVL_NEXT(struct name *, struct type *); \ > > +attr struct type *name##_AVL_PREV(struct name *, struct type *); \ > > +attr struct type *name##_AVL_MIN(struct name *); \ > > +attr struct type *name##_AVL_MAX(struct name *); > > + > > +#define AVL_ROTATE_LEFT(parent, elm, type, field) do { > > \ > > + struct type *_rl = AVL_LEFT(elm, field); > > \ > > + struct type *_rr = AVL_RIGHT(elm, field); > > \ > > + int _l = !_rl ? 0 : AVL_HEIGHT(_rl, field); > > \ > > + > > \ > > + if (_rr && AVL_HEIGHT(_rr, field) >= _l) { > > \ > > + AVL_RIGHT(*parent, field) = _rl; > > \ > > + AVL_HEIGHT(*parent, field) = _l + 1; > > \ > > + AVL_LEFT(elm, field) = (*parent); > > \ > > + AVL_HEIGHT(elm, field) = _l + 2; > > \ > > + (*parent) = (elm); > > \ > > + } else { > > \ > > + AVL_RIGHT(*parent, field) = AVL_LEFT(_rl, field); > > \ > > + AVL_HEIGHT(*parent, field) = _l; > > \ > > + AVL_LEFT(elm, field) = AVL_RIGHT(_rl, field); > > \ > > + AVL_HEIGHT(elm, field) = _l; > > \ > > + AVL_LEFT(_rl, field) = (*parent); > > \ > > + AVL_RIGHT(_rl, field) = (elm); > > \ > > + AVL_HEIGHT(_rl, field) = _l + 1; > > \ > > + (*parent) = _rl; > > \ > > + } > > \ > > +} while (0) > > + > > +#define AVL_ROTATE_RIGHT(parent, elm, type, field) do { > > \ > > + struct type *_ll = AVL_LEFT(elm, field); > > \ > > + struct type *_lr = AVL_RIGHT(elm, field); > > \ > > + int _r = !_lr ? 0 : AVL_HEIGHT(_lr, field); > > \ > > + > > \ > > + if (_ll && AVL_HEIGHT(_ll, field) >= _r) { > > \ > > + AVL_LEFT(*(parent), field) = _lr; > > \ > > + AVL_HEIGHT(*(parent), field) = _r + 1; > > \ > > + AVL_RIGHT(elm, field) = *parent; > > \ > > + AVL_HEIGHT(elm, field) = _r + 2; > > \ > > + *(parent) = (elm); > > \ > > + } else { > > \ > > + AVL_RIGHT(elm, field) = AVL_LEFT(_lr, field); > > \ > > + AVL_HEIGHT(elm, field) = _r; > > \ > > + AVL_LEFT(*parent, field) = AVL_RIGHT(_lr, field); > > \ > > + AVL_HEIGHT(*parent, field) = _r; > > \ > > + AVL_LEFT(_lr, field) = (elm); > > \ > > + AVL_RIGHT(_lr, field) = (*parent); > > \ > > + AVL_HEIGHT(_lr, field) = _r + 1; > > \ > > + (*parent) = _lr; > > \ > > + } > > \ > > +} while (0) > > + > > +#define AVL_REBALANCE(_anc, type, field, count) while (count-- > 0) { > > \ > > + int _lh, _rh; > > \ > > + struct type *_el = NULL; > > \ > > + > > \ > > + _lh = !AVL_LEFT(*_anc[count], field) ? 0 : > > \ > > + AVL_HEIGHT(AVL_LEFT(*_anc[count], field), field); > > \ > > + _rh = !AVL_RIGHT(*_anc[count], field) ? 0 : > > \ > > + AVL_HEIGHT(AVL_RIGHT(*_anc[count], field), field); > > \ > > + > > \ > > + if (_rh - _lh < -1) { > > \ > > + _el = AVL_LEFT(*_anc[count], field); > > \ > > + AVL_ROTATE_RIGHT(_anc[count], _el, type, field); > > \ > > + } else if (_rh - _lh > 1) { > > \ > > + _el = AVL_RIGHT(*_anc[count], field); > > \ > > + AVL_ROTATE_LEFT(_anc[count], _el, type, field); > > \ > > + } else if (AVL_HEIGHT(*_anc[count], field) != ((_rh > _lh) ? _rh : _lh) > > + 1) \ > > + AVL_HEIGHT(*_anc[count], field) = ((_rh > _lh) ? _rh : _lh) + > > 1; \ > > + else > > \ > > + break; > > \ > > +} > > + > > +/* Main avl operation. > > + * Moves node close to the key of elm to top > > + */ > > +#define AVL_GENERATE(name, type, field, cmp) > > \ > > + AVL_GENERATE_INTERNAL(name, type, field, cmp,) > > +#define AVL_GENERATE_STATIC(name, type, field, cmp) > > \ > > + AVL_GENERATE_INTERNAL(name, type, field, cmp, > > __attribute__((__unused__)) static) > > +#define AVL_GENERATE_INTERNAL(name, type, field, cmp, attr) > > \ > > +attr struct type * > > \ > > +name##_AVL_MIN(struct name *head) > > \ > > +{ > > \ > > + struct type *t = AVL_ROOT(head); > > \ > > + > > \ > > + while (t && AVL_LEFT(t, field)) > > \ > > + t = AVL_LEFT(t, field); > > \ > > + return (t); > > \ > > +} > > \ > > + > > \ > > +attr struct type * > > \ > > +name##_AVL_MAX(struct name *head) > > \ > > +{ > > \ > > + struct type *t = AVL_ROOT(head); > > \ > > + > > \ > > + while (t && AVL_RIGHT(t, field)) > > \ > > + t = AVL_RIGHT(t, field); > > \ > > + return (t); > > \ > > +} > > \ > > + > > \ > > +/* Finds the node with the same key as elm */ > > \ > > +attr struct type * > > \ > > +name##_AVL_FIND(struct name *head, struct type *elm) > > \ > > +{ > > \ > > + struct type *t = AVL_ROOT(head); > > \ > > + int comp; > > \ > > + while (t) { > > \ > > + comp = cmp(t, elm); > > \ > > + if (!comp) > > \ > > + return (t); > > \ > > + else if (comp < 0) > > \ > > + t = AVL_LEFT(t, field); > > \ > > + else > > \ > > + t = AVL_RIGHT(t, field); > > \ > > + } > > \ > > + return (NULL); > > \ > > +} > > \ > > + > > \ > > +/* Finds the first node less than or equal to the search key */ > > \ > > +attr struct type * > > \ > > +name##_AVL_NFIND(struct name *head, struct type *elm) > > \ > > +{ > > \ > > + struct type *t = AVL_ROOT(head); > > \ > > + struct type *res = NULL; > > \ > > + int comp; > > \ > > + while (t) { > > \ > > + comp = cmp(t, elm); > > \ > > + if (!comp) > > \ > > + return (t); > > \ > > + else if (comp < 0) { > > \ > > + res = t; > > \ > > + t = AVL_LEFT(t, field); > > \ > > + } else > > \ > > + t = AVL_RIGHT(t, field); > > \ > > + } > > \ > > + return (res); > > \ > > +} > > \ > > + > > \ > > +/* ARGSUSED */ > > \ > > +attr struct type * > > \ > > +name##_AVL_NEXT(struct name *head, struct type *elm) > > \ > > +{ > > \ > > + struct type *t = AVL_ROOT(head); > > \ > > + struct type *res = NULL; > > \ > > + int comp; > > \ > > + while (t) { > > \ > > + comp = cmp(t, elm); > > \ > > + if (comp < 0) { > > \ > > + res = t; > > \ > > + t = AVL_LEFT(t, field); > > \ > > + } else > > \ > > + t = AVL_RIGHT(t, field); > > \ > > + } > > \ > > + return (res); > > \ > > +} > > \ > > + > > \ > > +/* ARGSUSED */ > > \ > > +attr struct type * > > \ > > +name##_AVL_PREV(struct name *head, struct type *elm) > > \ > > +{ > > \ > > + struct type *t = AVL_ROOT(head); > > \ > > + struct type *res = NULL; > > \ > > + int comp; > > \ > > + while (t) { > > \ > > + comp = cmp(t, elm); > > \ > > + if (comp > 0) { > > \ > > + res = t; > > \ > > + t = AVL_RIGHT(t, field); > > \ > > + } else > > \ > > + t = AVL_LEFT(t, field); > > \ > > + } > > \ > > + return (res); > > \ > > +} > > \ > > + > > \ > > +/* Inserts a node into the AVL tree */ > > \ > > +attr struct type * > > \ > > +name##_AVL_INSERT(struct name *head, struct type *elm) > > \ > > +{ > > \ > > + struct type *temp, **pt; > > \ > > + struct type ***ancestors; > > \ > > + register int i; > > \ > > + int comp; > > \ > > + > > \ > > + ancestors = (struct type ***) alloca(AVL_ROOT_HEIGHT(head) * > > sizeof(void*)); \ > > That is nasty, alloca in a macro. If I had a stack overflow, I'd never > found out it was the macro doing that. > > Also, I doubt you need it, since you can use the parent pointer to > traverse up the tree. Kernel has very little stack space and no room to > grow more. In this implementation we have not pointer to parent element. We need every root node for tree to store for rebalance it. > ancestors will overflow: > - either because the tree temporarily (prior to rebalancing) may be > deeper than max_height, > - or because the tree becomes too high (you never limit the number of > items I can put in the tree, thought your implementation requires > that). > You remedied this by limiting the for loop below to AVL_ROOT_HEIGHT > items, but this means you will drop subtrees when the tree approaches > its max size. Can't overflow!!! height in root node is guaranteed room for 2**<height> this is enough and clear limit of tree > > + if (!ancestors) > > \ > > + return (struct type *) -1; > > \ > > Bad return value. This breaks the symmetry between the other trees, > which will only return NULL or a colliding item. You make us test for 3 > values. > > > + else > > \ > > + memset(ancestors, 0, AVL_ROOT_HEIGHT(head) * sizeof(void*)); > > \ > > + for (i = 0, pt = &AVL_ROOT(head); i < AVL_ROOT_HEIGHT(head) && *pt; > > i++) { \ > > Instead of i < AVL_ROOT_HEIGHT(head) why not only check on *pt != NULL? > The latter is a common pattern, while the former looks suspect to me: > the AVL tree is not guaranteed to be exactly AVL_ROOT_HEIGHT(head) high > everywhere. This is probably why the *pt is added to the test. > > > + temp = *pt; > > \ > > + ancestors[i] = pt; > > \ > > + comp = (cmp)(temp, elm); > > \ > > + if (!comp) > > \ > > + return temp; > > \ > > + else if (comp < 0) > > \ > > + pt = &AVL_LEFT(temp, field); > > \ > > + else > > \ > > + pt = &AVL_RIGHT(temp, field); > > \ > > + } > > \ > > + *pt = elm; > > \ > > + > > \ > > + AVL_LEFT(elm, field) = AVL_RIGHT(elm, field) = (struct type *) NULL; > > \ > > + AVL_HEIGHT(elm, field) = 1; > > \ > > + > > \ > > + AVL_ROOT_COUNT(head)++; > > \ > > + AVL_REBALANCE(ancestors, type, field, i); > > \ > > + return (NULL); > > \ > > +} > > \ > > + > > \ > > +attr struct type * > > \ > > +name##_AVL_REMOVE(struct name *head, struct type *elm) > > \ > > +{ > > \ > > + struct type *temp, *old, **dp, **pt; > > \ > > + struct type ***ancestors; > > \ > > + register int i; > > \ > > + int comp, delcnt; > > \ > > + > > \ > > + ancestors = (struct type ***) alloca(AVL_ROOT_HEIGHT(head) * > > sizeof(void*)); \ > > + if (!ancestors) > > \ > > + return (NULL); > > \ > > + else > > \ > > + memset(ancestors, 0, AVL_ROOT_HEIGHT(head) * sizeof(void*)); > > \ > > + for (i = 0, pt = &AVL_ROOT(head); i < AVL_ROOT_HEIGHT(head); i++) { > > \ > > + if (!*pt) > > \ > > + return (NULL); > > \ > > + else > > \ > > + temp = *pt; > > \ > > + ancestors[i] = pt; > > \ > > + comp = (cmp)(temp, elm); > > \ > > + if (!comp) > > \ > > + break; > > \ > > + else if (comp < 0) > > \ > > + pt = &AVL_LEFT(temp, field); > > \ > > + else > > \ > > + pt = &AVL_RIGHT(temp, field); > > \ > > + } > > \ > > + old = temp; > > \ > > + > > \ > > + if (!AVL_LEFT(temp, field)) { > > \ > > + *pt = AVL_RIGHT(temp, field); > > \ > > + i--; > > \ > > + } else { > > \ > > + delcnt = i; > > \ > > + dp = pt; > > \ > > + for (pt = &AVL_LEFT(temp, field); i < AVL_ROOT_HEIGHT(head) && > > \ > > + AVL_RIGHT(temp, field); i++) { > > \ > > + temp = *pt; > > \ > > + ancestors[i] = pt; > > \ > > + pt = &AVL_RIGHT(temp, field); > > \ > > + } > > \ > > + *pt = AVL_LEFT(temp, field); > > \ > > + > > \ > > + AVL_LEFT(temp, field) = AVL_LEFT(old, field); > > \ > > + AVL_RIGHT(temp, field) = AVL_RIGHT(old, field); > > \ > > + AVL_HEIGHT(temp, field) = AVL_HEIGHT(old, field); > > \ > > + *dp = temp; > > \ > > + > > \ > > + ancestors[delcnt] = &AVL_LEFT(temp, field); > > \ > > + } > > \ > > + > > \ > > + AVL_ROOT_COUNT(head)--; > > \ > > + AVL_REBALANCE(ancestors, type, field, i); > > \ > > + return (old); > > \ > > +} > > + > > +#define AVL_INSERT(name, x, y) name##_AVL_INSERT(x, y) > > +#define AVL_REMOVE(name, x, y) name##_AVL_REMOVE(x, y) > > +#define AVL_FIND(name, x, y) name##_AVL_FIND(x, y) > > +#define AVL_NFIND(name, x, y) name##_AVL_NFIND(x, y) > > +#define AVL_NEXT(name, x, y) name##_AVL_NEXT(x, y) > > +#define AVL_PREV(name, x, y) name##_AVL_PREV(x, y) > > +#define AVL_MIN(name, x) name##_AVL_MIN(x) > > +#define AVL_MAX(name, x) name##_AVL_MAX(x) > > + > > +#define AVL_FOREACH(x, name, head) > > \ > > + for ((x) = name##_AVL_MIN(head); > > \ > > + (x) != NULL; > > \ > > + (x) = name##_AVL_NEXT(head, x)) > > + > > +#define AVL_FOREACH_REVERSE(x, name, head) > > \ > > + for ((x) = name##_AVL_MAX(head); > > \ > > + (x) != NULL; > > \ > > + (x) = name##_AVL_PREV(head, x)) > > + > > +#define AVL_FOREACH_SAFE(x, name, head, y) > > \ > > + for ((x) = name##_AVL_MIN(head); > > \ > > + (x) && ((y) = name##_AVL_NEXT(head, x), (x)); > > \ > > + (x) = (y)) > > + > > +#define AVL_FOREACH_REVERSE_SAFE(x, name, head, y) > > \ > > + for ((x) = name##_AVL_MAX(head); > > \ > > + (x) && ((y) = name##_AVL_PREV(head, x), (x)); > > \ > > + (x) = (y)) > > + > > > > #endif /* _SYS_TREE_H_ */ > > > > > I don't like the array, use parent-pointer traversal instead (like > RBtree does). > > I haven't checked the internals of your algorithm and would have to > dig up the AVL algorithm stuff to refresh myself on how it works. But I > thought it was possible to store only the balance between the left and > right node and rebalance based on that? That way, you can use an int > instead of a size_t (or long, as it is currently in your code) and lose > the artificial tree-size limit. > -- > Ariane -- M.Punov --------------------- AITNET - Sofia/Bulgaria - Software & Network Solutions (+359) 888 73 73 58;(+359) 2 402 4000