On Fri, 20 May 2011 05:01:41 +0200 Ariane van der Steldt <[email protected]> wrote:
> On Thu, May 19, 2011 at 08:28:09PM +0300, Michael Pounov wrote:
> > Add AVL tree implementation and merge few RB tree related macros.
> >
> > If you have comments or any claims, please send me feedback
> > and I will fix them.
> >
> > >>>>>>>>>>>>>>>> Inline patch::
> > --- /usr/src/sys/sys/tree.h Mon Mar 2 11:42:55 2009
> > +++ tree.h Thu May 19 20:16:36 2011
> > @@ -730,9 +730,367 @@
> > (x) != NULL; \
> > (x) = name##_RB_NEXT(x))
> >
> > +#define RB_FOREACH_FROM(x, name, y)
> > \
>
> This macro modifies parameter y, which I do not expect from using it.
> Please keep y its original value. Also, I want to be able to use an
> rvalue for y.
>
> > + for ((x) = (y); \
> > + ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); \
> > + (x) = (y))
> > +
> > +#define RB_FOREACH_SAFE(x, name, head, y) \
>
> (Again, don't modify the value of y.)
>
> > + for ((x) = RB_MIN(name, head); \
> > + ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL); \
> > + (x) = (y))
> > +
>
> Is there a specific reason your wrote the above 2 macros? The pattern
> where the for loop is written out is quite common and well understood by
> developers.
>
> > #define RB_FOREACH_REVERSE(x, name, head) \
> > for ((x) = RB_MAX(name, head); \
> > (x) != NULL; \
> > (x) = name##_RB_PREV(x))
> > +
> > +#define RB_FOREACH_REVERSE_FROM(x, name, y)
> > \
> > + for ((x) = (y); \
> > + ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); \
> > + (x) = (y))
> > +
> > +#define RB_FOREACH_REVERSE_SAFE(x, name, head, y) \
> > + for ((x) = RB_MAX(name, head); \
> > + ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL); \
> > + (x) = (y))
> > +
> > +/* Macros that define a avl tree */
> > +#define AVL_MAX_HEIGHT 42
>
> No. Trees do not have a max height. On a modern amd64, I can
> create code which will be able to fit more than 2 ** 42 items in the
> tree (provided I had the physmem in my machine).
>
> > +#define AVL_HEAD(name, type)
> > \
> > +struct name {
> > \
> > + struct type *avlh_root; /* root of the tree */ \
> > + int avlh_height; \
> > + long avlh_count; \
>
This tree implementation use height for ancestors temporary array
because we have not a pointer to the parent.
You may use all memory when set new height 2**<height> = all stored elements in
tree!
:)
> size_t, not long. K&R guarantee it'll fit for every future version C.
>
> Why is there a count? For most trees in the system, we don't care how
> large they are. And we don't care for the height either.
>
> > +}
> > +
> > +#define AVL_INITIALIZER(root)
> > \
> > + { NULL, 0, 0 }
> > +
> > +#define AVL_INIT(root, height) do {
> > \
> > + (root)->avlh_root = NULL; \
> > + (root)->avlh_height = !height ? AVL_MAX_HEIGHT : height; \
>
> height? This is not the current height of the tree... Don't like it; if
> the items in the tree are constrained in some way, other code will have
> dealt with it.
>
> > + (root)->avlh_count = 0; \
> > +} while (0)
> > +
> > +#define AVL_ENTRY(type)
> > \
> > +struct { \
> > + struct type *avle_left; /* left element */ \
> > + struct type *avle_right; /* right element */ \
> > + int avle_height; /* node color */ \
>
> Comment copy-pasted from RB tree.
>
> > +}
> > +
> > +#define AVL_LEFT(elm, field) (elm)->field.avle_left
> > +#define AVL_RIGHT(elm, field) (elm)->field.avle_right
> > +#define AVL_HEIGHT(elm, field) (elm)->field.avle_height
> > +#define AVL_ROOT(head) (head)->avlh_root
> > +#define AVL_EMPTY(head) (AVL_ROOT(head) == NULL)
> > +#define AVL_ROOT_HEIGHT(head) (head)->avlh_height
>
> How about this instead: (AVL_EMPTY(head) ? 0 : AVL_HEIGHT(AVL_ROOT(head)))
>
> > +#define AVL_ROOT_COUNT(head) (head)->avlh_count
> > +
> > +/* Generates prototypes and inline functions */
> > +#define AVL_PROTOTYPE(name, type, field, cmp)
> > \
> > + AVL_PROTOTYPE_INTERNAL(name, type, field, cmp,)
> > +#define AVL_PROTOTYPE_STATIC(name, type, field, cmp)
> > \
> > + AVL_PROTOTYPE_INTERNAL(name, type, field, cmp,
> > __attribute__((__unused__)) static)
> > +#define AVL_PROTOTYPE_INTERNAL(name, type, field, cmp, attr)
> > \
> > +attr struct type *name##_AVL_REMOVE(struct name *, struct type *); \
> > +attr struct type *name##_AVL_INSERT(struct name *, struct type *); \
> > +attr struct type *name##_AVL_FIND(struct name *, struct type *); \
> > +attr struct type *name##_AVL_NFIND(struct name *, struct type *); \
> > +attr struct type *name##_AVL_NEXT(struct name *, struct type *); \
> > +attr struct type *name##_AVL_PREV(struct name *, struct type *); \
> > +attr struct type *name##_AVL_MIN(struct name *); \
> > +attr struct type *name##_AVL_MAX(struct name *);
> > +
> > +#define AVL_ROTATE_LEFT(parent, elm, type, field) do {
> > \
> > + struct type *_rl = AVL_LEFT(elm, field);
> > \
> > + struct type *_rr = AVL_RIGHT(elm, field);
> > \
> > + int _l = !_rl ? 0 : AVL_HEIGHT(_rl, field);
> > \
> > +
> > \
> > + if (_rr && AVL_HEIGHT(_rr, field) >= _l) {
> > \
> > + AVL_RIGHT(*parent, field) = _rl;
> > \
> > + AVL_HEIGHT(*parent, field) = _l + 1;
> > \
> > + AVL_LEFT(elm, field) = (*parent);
> > \
> > + AVL_HEIGHT(elm, field) = _l + 2;
> > \
> > + (*parent) = (elm);
> > \
> > + } else {
> > \
> > + AVL_RIGHT(*parent, field) = AVL_LEFT(_rl, field);
> > \
> > + AVL_HEIGHT(*parent, field) = _l;
> > \
> > + AVL_LEFT(elm, field) = AVL_RIGHT(_rl, field);
> > \
> > + AVL_HEIGHT(elm, field) = _l;
> > \
> > + AVL_LEFT(_rl, field) = (*parent);
> > \
> > + AVL_RIGHT(_rl, field) = (elm);
> > \
> > + AVL_HEIGHT(_rl, field) = _l + 1;
> > \
> > + (*parent) = _rl;
> > \
> > + }
> > \
> > +} while (0)
> > +
> > +#define AVL_ROTATE_RIGHT(parent, elm, type, field) do {
> > \
> > + struct type *_ll = AVL_LEFT(elm, field);
> > \
> > + struct type *_lr = AVL_RIGHT(elm, field);
> > \
> > + int _r = !_lr ? 0 : AVL_HEIGHT(_lr, field);
> > \
> > +
> > \
> > + if (_ll && AVL_HEIGHT(_ll, field) >= _r) {
> > \
> > + AVL_LEFT(*(parent), field) = _lr;
> > \
> > + AVL_HEIGHT(*(parent), field) = _r + 1;
> > \
> > + AVL_RIGHT(elm, field) = *parent;
> > \
> > + AVL_HEIGHT(elm, field) = _r + 2;
> > \
> > + *(parent) = (elm);
> > \
> > + } else {
> > \
> > + AVL_RIGHT(elm, field) = AVL_LEFT(_lr, field);
> > \
> > + AVL_HEIGHT(elm, field) = _r;
> > \
> > + AVL_LEFT(*parent, field) = AVL_RIGHT(_lr, field);
> > \
> > + AVL_HEIGHT(*parent, field) = _r;
> > \
> > + AVL_LEFT(_lr, field) = (elm);
> > \
> > + AVL_RIGHT(_lr, field) = (*parent);
> > \
> > + AVL_HEIGHT(_lr, field) = _r + 1;
> > \
> > + (*parent) = _lr;
> > \
> > + }
> > \
> > +} while (0)
> > +
> > +#define AVL_REBALANCE(_anc, type, field, count) while (count-- > 0) {
> > \
> > + int _lh, _rh;
> > \
> > + struct type *_el = NULL;
> > \
> > +
> > \
> > + _lh = !AVL_LEFT(*_anc[count], field) ? 0 :
> > \
> > + AVL_HEIGHT(AVL_LEFT(*_anc[count], field), field);
> > \
> > + _rh = !AVL_RIGHT(*_anc[count], field) ? 0 :
> > \
> > + AVL_HEIGHT(AVL_RIGHT(*_anc[count], field), field);
> > \
> > +
> > \
> > + if (_rh - _lh < -1) {
> > \
> > + _el = AVL_LEFT(*_anc[count], field);
> > \
> > + AVL_ROTATE_RIGHT(_anc[count], _el, type, field);
> > \
> > + } else if (_rh - _lh > 1) {
> > \
> > + _el = AVL_RIGHT(*_anc[count], field);
> > \
> > + AVL_ROTATE_LEFT(_anc[count], _el, type, field);
> > \
> > + } else if (AVL_HEIGHT(*_anc[count], field) != ((_rh > _lh) ? _rh : _lh)
> > + 1) \
> > + AVL_HEIGHT(*_anc[count], field) = ((_rh > _lh) ? _rh : _lh) +
> > 1; \
> > + else
> > \
> > + break;
> > \
> > +}
> > +
> > +/* Main avl operation.
> > + * Moves node close to the key of elm to top
> > + */
> > +#define AVL_GENERATE(name, type, field, cmp)
> > \
> > + AVL_GENERATE_INTERNAL(name, type, field, cmp,)
> > +#define AVL_GENERATE_STATIC(name, type, field, cmp)
> > \
> > + AVL_GENERATE_INTERNAL(name, type, field, cmp,
> > __attribute__((__unused__)) static)
> > +#define AVL_GENERATE_INTERNAL(name, type, field, cmp, attr)
> > \
> > +attr struct type *
> > \
> > +name##_AVL_MIN(struct name *head)
> > \
> > +{
> > \
> > + struct type *t = AVL_ROOT(head);
> > \
> > +
> > \
> > + while (t && AVL_LEFT(t, field))
> > \
> > + t = AVL_LEFT(t, field);
> > \
> > + return (t);
> > \
> > +}
> > \
> > +
> > \
> > +attr struct type *
> > \
> > +name##_AVL_MAX(struct name *head)
> > \
> > +{
> > \
> > + struct type *t = AVL_ROOT(head);
> > \
> > +
> > \
> > + while (t && AVL_RIGHT(t, field))
> > \
> > + t = AVL_RIGHT(t, field);
> > \
> > + return (t);
> > \
> > +}
> > \
> > +
> > \
> > +/* Finds the node with the same key as elm */
> > \
> > +attr struct type *
> > \
> > +name##_AVL_FIND(struct name *head, struct type *elm)
> > \
> > +{
> > \
> > + struct type *t = AVL_ROOT(head);
> > \
> > + int comp;
> > \
> > + while (t) {
> > \
> > + comp = cmp(t, elm);
> > \
> > + if (!comp)
> > \
> > + return (t);
> > \
> > + else if (comp < 0)
> > \
> > + t = AVL_LEFT(t, field);
> > \
> > + else
> > \
> > + t = AVL_RIGHT(t, field);
> > \
> > + }
> > \
> > + return (NULL);
> > \
> > +}
> > \
> > +
> > \
> > +/* Finds the first node less than or equal to the search key */
> > \
> > +attr struct type *
> > \
> > +name##_AVL_NFIND(struct name *head, struct type *elm)
> > \
> > +{
> > \
> > + struct type *t = AVL_ROOT(head);
> > \
> > + struct type *res = NULL;
> > \
> > + int comp;
> > \
> > + while (t) {
> > \
> > + comp = cmp(t, elm);
> > \
> > + if (!comp)
> > \
> > + return (t);
> > \
> > + else if (comp < 0) {
> > \
> > + res = t;
> > \
> > + t = AVL_LEFT(t, field);
> > \
> > + } else
> > \
> > + t = AVL_RIGHT(t, field);
> > \
> > + }
> > \
> > + return (res);
> > \
> > +}
> > \
> > +
> > \
> > +/* ARGSUSED */
> > \
> > +attr struct type *
> > \
> > +name##_AVL_NEXT(struct name *head, struct type *elm)
> > \
> > +{
> > \
> > + struct type *t = AVL_ROOT(head);
> > \
> > + struct type *res = NULL;
> > \
> > + int comp;
> > \
> > + while (t) {
> > \
> > + comp = cmp(t, elm);
> > \
> > + if (comp < 0) {
> > \
> > + res = t;
> > \
> > + t = AVL_LEFT(t, field);
> > \
> > + } else
> > \
> > + t = AVL_RIGHT(t, field);
> > \
> > + }
> > \
> > + return (res);
> > \
> > +}
> > \
> > +
> > \
> > +/* ARGSUSED */
> > \
> > +attr struct type *
> > \
> > +name##_AVL_PREV(struct name *head, struct type *elm)
> > \
> > +{
> > \
> > + struct type *t = AVL_ROOT(head);
> > \
> > + struct type *res = NULL;
> > \
> > + int comp;
> > \
> > + while (t) {
> > \
> > + comp = cmp(t, elm);
> > \
> > + if (comp > 0) {
> > \
> > + res = t;
> > \
> > + t = AVL_RIGHT(t, field);
> > \
> > + } else
> > \
> > + t = AVL_LEFT(t, field);
> > \
> > + }
> > \
> > + return (res);
> > \
> > +}
> > \
> > +
> > \
> > +/* Inserts a node into the AVL tree */
> > \
> > +attr struct type *
> > \
> > +name##_AVL_INSERT(struct name *head, struct type *elm)
> > \
> > +{
> > \
> > + struct type *temp, **pt;
> > \
> > + struct type ***ancestors;
> > \
> > + register int i;
> > \
> > + int comp;
> > \
> > +
> > \
> > + ancestors = (struct type ***) alloca(AVL_ROOT_HEIGHT(head) *
> > sizeof(void*)); \
>
> That is nasty, alloca in a macro. If I had a stack overflow, I'd never
> found out it was the macro doing that.
>
> Also, I doubt you need it, since you can use the parent pointer to
> traverse up the tree. Kernel has very little stack space and no room to
> grow more.
In this implementation we have not pointer to parent element.
We need every root node for tree to store for rebalance it.
> ancestors will overflow:
> - either because the tree temporarily (prior to rebalancing) may be
> deeper than max_height,
> - or because the tree becomes too high (you never limit the number of
> items I can put in the tree, thought your implementation requires
> that).
> You remedied this by limiting the for loop below to AVL_ROOT_HEIGHT
> items, but this means you will drop subtrees when the tree approaches
> its max size.
Can't overflow!!!
height in root node is guaranteed room for 2**<height> this is enough and clear
limit of tree
> > + if (!ancestors)
> > \
> > + return (struct type *) -1;
> > \
>
> Bad return value. This breaks the symmetry between the other trees,
> which will only return NULL or a colliding item. You make us test for 3
> values.
>
> > + else
> > \
> > + memset(ancestors, 0, AVL_ROOT_HEIGHT(head) * sizeof(void*));
> > \
> > + for (i = 0, pt = &AVL_ROOT(head); i < AVL_ROOT_HEIGHT(head) && *pt;
> > i++) { \
>
> Instead of i < AVL_ROOT_HEIGHT(head) why not only check on *pt != NULL?
> The latter is a common pattern, while the former looks suspect to me:
> the AVL tree is not guaranteed to be exactly AVL_ROOT_HEIGHT(head) high
> everywhere. This is probably why the *pt is added to the test.
>
> > + temp = *pt;
> > \
> > + ancestors[i] = pt;
> > \
> > + comp = (cmp)(temp, elm);
> > \
> > + if (!comp)
> > \
> > + return temp;
> > \
> > + else if (comp < 0)
> > \
> > + pt = &AVL_LEFT(temp, field);
> > \
> > + else
> > \
> > + pt = &AVL_RIGHT(temp, field);
> > \
> > + }
> > \
> > + *pt = elm;
> > \
> > +
> > \
> > + AVL_LEFT(elm, field) = AVL_RIGHT(elm, field) = (struct type *) NULL;
> > \
> > + AVL_HEIGHT(elm, field) = 1;
> > \
> > +
> > \
> > + AVL_ROOT_COUNT(head)++;
> > \
> > + AVL_REBALANCE(ancestors, type, field, i);
> > \
> > + return (NULL);
> > \
> > +}
> > \
> > +
> > \
> > +attr struct type *
> > \
> > +name##_AVL_REMOVE(struct name *head, struct type *elm)
> > \
> > +{
> > \
> > + struct type *temp, *old, **dp, **pt;
> > \
> > + struct type ***ancestors;
> > \
> > + register int i;
> > \
> > + int comp, delcnt;
> > \
> > +
> > \
> > + ancestors = (struct type ***) alloca(AVL_ROOT_HEIGHT(head) *
> > sizeof(void*)); \
> > + if (!ancestors)
> > \
> > + return (NULL);
> > \
> > + else
> > \
> > + memset(ancestors, 0, AVL_ROOT_HEIGHT(head) * sizeof(void*));
> > \
> > + for (i = 0, pt = &AVL_ROOT(head); i < AVL_ROOT_HEIGHT(head); i++) {
> > \
> > + if (!*pt)
> > \
> > + return (NULL);
> > \
> > + else
> > \
> > + temp = *pt;
> > \
> > + ancestors[i] = pt;
> > \
> > + comp = (cmp)(temp, elm);
> > \
> > + if (!comp)
> > \
> > + break;
> > \
> > + else if (comp < 0)
> > \
> > + pt = &AVL_LEFT(temp, field);
> > \
> > + else
> > \
> > + pt = &AVL_RIGHT(temp, field);
> > \
> > + }
> > \
> > + old = temp;
> > \
> > +
> > \
> > + if (!AVL_LEFT(temp, field)) {
> > \
> > + *pt = AVL_RIGHT(temp, field);
> > \
> > + i--;
> > \
> > + } else {
> > \
> > + delcnt = i;
> > \
> > + dp = pt;
> > \
> > + for (pt = &AVL_LEFT(temp, field); i < AVL_ROOT_HEIGHT(head) &&
> > \
> > + AVL_RIGHT(temp, field); i++) {
> > \
> > + temp = *pt;
> > \
> > + ancestors[i] = pt;
> > \
> > + pt = &AVL_RIGHT(temp, field);
> > \
> > + }
> > \
> > + *pt = AVL_LEFT(temp, field);
> > \
> > +
> > \
> > + AVL_LEFT(temp, field) = AVL_LEFT(old, field);
> > \
> > + AVL_RIGHT(temp, field) = AVL_RIGHT(old, field);
> > \
> > + AVL_HEIGHT(temp, field) = AVL_HEIGHT(old, field);
> > \
> > + *dp = temp;
> > \
> > +
> > \
> > + ancestors[delcnt] = &AVL_LEFT(temp, field);
> > \
> > + }
> > \
> > +
> > \
> > + AVL_ROOT_COUNT(head)--;
> > \
> > + AVL_REBALANCE(ancestors, type, field, i);
> > \
> > + return (old);
> > \
> > +}
> > +
> > +#define AVL_INSERT(name, x, y) name##_AVL_INSERT(x, y)
> > +#define AVL_REMOVE(name, x, y) name##_AVL_REMOVE(x, y)
> > +#define AVL_FIND(name, x, y) name##_AVL_FIND(x, y)
> > +#define AVL_NFIND(name, x, y) name##_AVL_NFIND(x, y)
> > +#define AVL_NEXT(name, x, y) name##_AVL_NEXT(x, y)
> > +#define AVL_PREV(name, x, y) name##_AVL_PREV(x, y)
> > +#define AVL_MIN(name, x) name##_AVL_MIN(x)
> > +#define AVL_MAX(name, x) name##_AVL_MAX(x)
> > +
> > +#define AVL_FOREACH(x, name, head)
> > \
> > + for ((x) = name##_AVL_MIN(head);
> > \
> > + (x) != NULL;
> > \
> > + (x) = name##_AVL_NEXT(head, x))
> > +
> > +#define AVL_FOREACH_REVERSE(x, name, head)
> > \
> > + for ((x) = name##_AVL_MAX(head);
> > \
> > + (x) != NULL;
> > \
> > + (x) = name##_AVL_PREV(head, x))
> > +
> > +#define AVL_FOREACH_SAFE(x, name, head, y)
> > \
> > + for ((x) = name##_AVL_MIN(head);
> > \
> > + (x) && ((y) = name##_AVL_NEXT(head, x), (x));
> > \
> > + (x) = (y))
> > +
> > +#define AVL_FOREACH_REVERSE_SAFE(x, name, head, y)
> > \
> > + for ((x) = name##_AVL_MAX(head);
> > \
> > + (x) && ((y) = name##_AVL_PREV(head, x), (x));
> > \
> > + (x) = (y))
> > +
> >
> > #endif /* _SYS_TREE_H_ */
> >
>
>
> I don't like the array, use parent-pointer traversal instead (like
> RBtree does).
>
> I haven't checked the internals of your algorithm and would have to
> dig up the AVL algorithm stuff to refresh myself on how it works. But I
> thought it was possible to store only the balance between the left and
> right node and rebalance based on that? That way, you can use an int
> instead of a size_t (or long, as it is currently in your code) and lose
> the artificial tree-size limit.
> --
> Ariane
--
M.Punov
---------------------
AITNET - Sofia/Bulgaria -
Software & Network Solutions
(+359) 888 73 73 58;(+359) 2 402 4000
