The following diff replaces the existing hash table with a red black tree, to the acclaim of network hackers worldwide.
This has already received some testing, but needs some more shakedown in order to make sure that nobody gets hosed by this. Folks with more esoteric (read: vether and/or gif) setups are especially encouraged to test, and may receive cookies. - Bert Index: if_bridge.h =================================================================== RCS file: /cvs/src/sys/net/if_bridge.h,v retrieving revision 1.32 diff -u -p -r1.32 if_bridge.h --- if_bridge.h 28 Oct 2010 13:49:54 -0000 1.32 +++ if_bridge.h 29 Oct 2010 19:45:38 -0000 @@ -397,7 +397,7 @@ struct bridge_iflist { * Bridge route node */ struct bridge_rtnode { - LIST_ENTRY(bridge_rtnode) brt_next; /* next in list */ + RB_ENTRY(bridge_rtnode) brt_entry; /* entry in rb tree */ struct ifnet *brt_if; /* destination ifs */ u_int8_t brt_flags; /* address flags */ u_int8_t brt_age; /* age counter */ @@ -423,7 +423,7 @@ struct bridge_softc { struct timeout sc_brtimeout; /* timeout state */ struct bstp_state *sc_stp; /* stp state */ LIST_HEAD(, bridge_iflist) sc_iflist; /* interface list */ - LIST_HEAD(, bridge_rtnode) sc_rts[BRIDGE_RTABLE_SIZE]; /* hash table */ + RB_HEAD(brt_tree, bridge_rtnode) sc_rts; /* route tree */ LIST_HEAD(, bridge_iflist) sc_spanlist; /* span ports */ }; Index: if_bridge.c =================================================================== RCS file: /cvs/src/sys/net/if_bridge.c,v retrieving revision 1.186 diff -u -p -r1.186 if_bridge.c --- if_bridge.c 28 Oct 2010 19:00:57 -0000 1.186 +++ if_bridge.c 29 Oct 2010 19:45:38 -0000 @@ -45,6 +45,7 @@ #include <sys/ioctl.h> #include <sys/errno.h> #include <sys/kernel.h> +#include <sys/tree.h> #include <machine/cpu.h> #include <net/if.h> @@ -135,7 +136,7 @@ int bridge_rtfind(struct bridge_softc *, void bridge_rtage(struct bridge_softc *); void bridge_rttrim(struct bridge_softc *); int bridge_rtdaddr(struct bridge_softc *, struct ether_addr *); -int bridge_rtflush(struct bridge_softc *, int); +void bridge_rtflush(struct bridge_softc *, int); struct ifnet * bridge_rtupdate(struct bridge_softc *, struct ether_addr *, struct ifnet *ifp, int, u_int8_t); struct ifnet * bridge_rtlookup(struct bridge_softc *, @@ -180,6 +181,16 @@ LIST_HEAD(, bridge_softc) bridge_list; struct if_clone bridge_cloner = IF_CLONE_INITIALIZER("bridge", bridge_clone_create, bridge_clone_destroy); +static __inline int brt_cmp(struct bridge_rtnode *, struct bridge_rtnode *); +RB_PROTOTYPE(brt_tree, bridge_rtnode, brt_entry, brt_cmp); +RB_GENERATE(brt_tree, bridge_rtnode, brt_entry, brt_cmp); + +static __inline int +brt_cmp(struct bridge_rtnode *a, struct bridge_rtnode *b) +{ + return (memcmp(&a->brt_addr, &b->brt_addr, sizeof(a->brt_addr))); +} + /* ARGSUSED */ void bridgeattach(int n) @@ -194,7 +205,7 @@ bridge_clone_create(struct if_clone *ifc { struct bridge_softc *sc; struct ifnet *ifp; - int i, s; + int s; sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); if (!sc) @@ -211,8 +222,7 @@ bridge_clone_create(struct if_clone *ifc timeout_set(&sc->sc_brtimeout, bridge_timer, sc); LIST_INIT(&sc->sc_iflist); LIST_INIT(&sc->sc_spanlist); - for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) - LIST_INIT(&sc->sc_rts[i]); + RB_INIT(&sc->sc_rts); sc->sc_hashkey = arc4random(); ifp = &sc->sc_if; snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, @@ -560,7 +570,7 @@ bridge_ioctl(struct ifnet *ifp, u_long c if ((error = suser(curproc, 0)) != 0) break; - error = bridge_rtflush(sc, req->ifbr_ifsflags); + bridge_rtflush(sc, req->ifbr_ifsflags); break; case SIOCBRDGSADDR: if ((error = suser(curproc, 0)) != 0) @@ -1003,7 +1013,7 @@ bridge_output(struct ifnet *ifp, struct struct ifnet *dst_if; struct ether_addr *dst; struct bridge_softc *sc; - int s, error, len; + int s, error; #ifdef IPSEC struct m_tag *mtag; #endif /* IPSEC */ @@ -1109,33 +1119,12 @@ bridge_output(struct ifnet *ifp, struct used = 1; mc = m; } else { - struct mbuf *m1, *m2, *mx; - - m1 = m_copym2(m, 0, ETHER_HDR_LEN, - M_DONTWAIT); - if (m1 == NULL) { - sc->sc_if.if_oerrors++; - continue; - } - m2 = m_copym2(m, ETHER_HDR_LEN, + mc = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); - if (m2 == NULL) { - m_freem(m1); + if (mc == NULL) { sc->sc_if.if_oerrors++; continue; } - - for (mx = m1; mx->m_next != NULL; mx = mx->m_next) - /*EMPTY*/; - mx->m_next = m2; - - if (m1->m_flags & M_PKTHDR) { - len = 0; - for (mx = m1; mx != NULL; mx = mx->m_next) - len += mx->m_len; - m1->m_pkthdr.len = len; - } - mc = m1; } error = bridge_ifenqueue(sc, dst_if, mc); @@ -1762,151 +1751,46 @@ struct ifnet * bridge_rtupdate(struct bridge_softc *sc, struct ether_addr *ea, struct ifnet *ifp, int setflags, u_int8_t flags) { - struct bridge_rtnode *p, *q; - u_int32_t h; - int dir; - - h = bridge_hash(sc, ea); - p = LIST_FIRST(&sc->sc_rts[h]); - if (p == LIST_END(&sc->sc_rts[h])) { + struct bridge_rtnode *brt, tmp; + + bcopy(ea, &tmp.brt_addr, sizeof(*ea)); + + if ((brt = RB_FIND(brt_tree, &sc->sc_rts, &tmp)) == NULL) { + if (sc->sc_brtcnt >= sc->sc_brtmax) - goto done; - p = malloc(sizeof(*p), M_DEVBUF, M_NOWAIT); - if (p == NULL) - goto done; + goto fail; + if ((brt = malloc(sizeof(*brt), M_DEVBUF, M_NOWAIT)) == NULL) + goto fail; - bcopy(ea, &p->brt_addr, sizeof(p->brt_addr)); - p->brt_if = ifp; - p->brt_age = 1; + bcopy(ea, &brt->brt_addr, sizeof(brt->brt_addr)); + brt->brt_if = ifp; + brt->brt_age = 1; if (setflags) - p->brt_flags = flags; + brt->brt_flags = flags; else - p->brt_flags = IFBAF_DYNAMIC; + brt->brt_flags = IFBAF_DYNAMIC; - LIST_INSERT_HEAD(&sc->sc_rts[h], p, brt_next); + RB_INSERT(brt_tree, &sc->sc_rts, brt); sc->sc_brtcnt++; - goto want; } - do { - q = p; - p = LIST_NEXT(p, brt_next); - - dir = memcmp(ea, &q->brt_addr, sizeof(q->brt_addr)); - if (dir == 0) { - if (setflags) { - q->brt_if = ifp; - q->brt_flags = flags; - } else if (!(q->brt_flags & IFBAF_STATIC)) - q->brt_if = ifp; - - if (q->brt_if == ifp) - q->brt_age = 1; - ifp = q->brt_if; - goto want; - } - - if (dir > 0) { - if (sc->sc_brtcnt >= sc->sc_brtmax) - goto done; - p = malloc(sizeof(*p), M_DEVBUF, M_NOWAIT); - if (p == NULL) - goto done; - - bcopy(ea, &p->brt_addr, sizeof(p->brt_addr)); - p->brt_if = ifp; - p->brt_age = 1; - - if (setflags) - p->brt_flags = flags; - else - p->brt_flags = IFBAF_DYNAMIC; - - LIST_INSERT_BEFORE(q, p, brt_next); - sc->sc_brtcnt++; - goto want; - } - - if (p == LIST_END(&sc->sc_rts[h])) { - if (sc->sc_brtcnt >= sc->sc_brtmax) - goto done; - p = malloc(sizeof(*p), M_DEVBUF, M_NOWAIT); - if (p == NULL) - goto done; - - bcopy(ea, &p->brt_addr, sizeof(p->brt_addr)); - p->brt_if = ifp; - p->brt_age = 1; - - if (setflags) - p->brt_flags = flags; - else - p->brt_flags = IFBAF_DYNAMIC; - LIST_INSERT_AFTER(q, p, brt_next); - sc->sc_brtcnt++; - goto want; - } - } while (p != LIST_END(&sc->sc_rts[h])); - -done: - ifp = NULL; -want: - return (ifp); + return (brt->brt_if); +fail: + return (NULL); } struct ifnet * bridge_rtlookup(struct bridge_softc *sc, struct ether_addr *ea) { - struct bridge_rtnode *p; - u_int32_t h; - int dir; - - h = bridge_hash(sc, ea); - LIST_FOREACH(p, &sc->sc_rts[h], brt_next) { - dir = memcmp(ea, &p->brt_addr, sizeof(p->brt_addr)); - if (dir == 0) - return (p->brt_if); - if (dir > 0) - goto fail; - } -fail: - return (NULL); -} + struct bridge_rtnode *brt, tmp; -/* - * The following hash function is adapted from 'Hash Functions' by Bob Jenkins - * ("Algorithm Alley", Dr. Dobbs Journal, September 1997). - * "You may use this code any way you wish, private, educational, or - * commercial. It's free." - */ -#define mix(a,b,c) \ - do { \ - a -= b; a -= c; a ^= (c >> 13); \ - b -= c; b -= a; b ^= (a << 8); \ - c -= a; c -= b; c ^= (b >> 13); \ - a -= b; a -= c; a ^= (c >> 12); \ - b -= c; b -= a; b ^= (a << 16); \ - c -= a; c -= b; c ^= (b >> 5); \ - a -= b; a -= c; a ^= (c >> 3); \ - b -= c; b -= a; b ^= (a << 10); \ - c -= a; c -= b; c ^= (b >> 15); \ - } while (0) - -u_int32_t -bridge_hash(struct bridge_softc *sc, struct ether_addr *addr) -{ - u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_hashkey; - - b += addr->ether_addr_octet[5] << 8; - b += addr->ether_addr_octet[4]; - a += addr->ether_addr_octet[3] << 24; - a += addr->ether_addr_octet[2] << 16; - a += addr->ether_addr_octet[1] << 8; - a += addr->ether_addr_octet[0]; + bcopy(ea, &tmp.brt_addr, sizeof(*ea)); + + if ((brt = RB_FIND(brt_tree, &sc->sc_rts, &tmp))) + return (brt->brt_if); - mix(a, b, c); - return (c & BRIDGE_RTABLE_MASK); + return (NULL); } /* @@ -1917,7 +1801,6 @@ void bridge_rttrim(struct bridge_softc *sc) { struct bridge_rtnode *n, *p; - int i; /* * Make sure we have to trim the address table @@ -1933,18 +1816,14 @@ bridge_rttrim(struct bridge_softc *sc) if (sc->sc_brtcnt <= sc->sc_brtmax) return; - for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) { - n = LIST_FIRST(&sc->sc_rts[i]); - while (n != LIST_END(&sc->sc_rts[i])) { - p = LIST_NEXT(n, brt_next); - if ((n->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { - LIST_REMOVE(n, brt_next); - sc->sc_brtcnt--; - free(n, M_DEVBUF); - if (sc->sc_brtcnt <= sc->sc_brtmax) - return; - } - n = p; + for (n = RB_MIN(brt_tree, &sc->sc_rts); n != NULL; n = p) { + p = RB_NEXT(brt_tree, &sc->sc_rts, n); + if ((n->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { + RB_REMOVE(brt_tree, &sc->sc_rts, n); + sc->sc_brtcnt--; + free(n, M_DEVBUF); + if (sc->sc_brtcnt <= sc->sc_brtmax) + return; } } } @@ -1967,26 +1846,19 @@ void bridge_rtage(struct bridge_softc *sc) { struct bridge_rtnode *n, *p; - int i; - for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) { - n = LIST_FIRST(&sc->sc_rts[i]); - while (n != LIST_END(&sc->sc_rts[i])) { - if ((n->brt_flags & IFBAF_TYPEMASK) == IFBAF_STATIC) { - n->brt_age = !n->brt_age; - if (n->brt_age) - n->brt_age = 0; - n = LIST_NEXT(n, brt_next); - } else if (n->brt_age) { + for (n = RB_MIN(brt_tree, &sc->sc_rts); n != NULL; n = p) { + p = RB_NEXT(brt_tree, &sc->sc_rts, n); + if ((n->brt_flags & IFBAF_TYPEMASK) == IFBAF_STATIC) { + n->brt_age = !n->brt_age; + if (n->brt_age) n->brt_age = 0; - n = LIST_NEXT(n, brt_next); - } else { - p = LIST_NEXT(n, brt_next); - LIST_REMOVE(n, brt_next); - sc->sc_brtcnt--; - free(n, M_DEVBUF); - n = p; - } + } else if (n->brt_age) { + n->brt_age = 0; + } else { + RB_REMOVE(brt_tree, &sc->sc_rts, n); + sc->sc_brtcnt--; + free(n, M_DEVBUF); } } @@ -1999,7 +1871,6 @@ bridge_rtagenode(struct ifnet *ifp, int { struct bridge_softc *sc = (struct bridge_softc *)ifp->if_bridge; struct bridge_rtnode *n; - int i; if (sc == NULL) return; @@ -2011,14 +1882,12 @@ bridge_rtagenode(struct ifnet *ifp, int if (age == 0) bridge_rtdelete(sc, ifp, 1); else { - for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) { - LIST_FOREACH(n, &sc->sc_rts[i], brt_next) { - /* Cap the expiry time to 'age' */ - if (n->brt_if == ifp && - n->brt_age > time_uptime + age && - (n->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) - n->brt_age = time_uptime + age; - } + RB_FOREACH(n, brt_tree, &sc->sc_rts) { + /* Cap the expiry time to 'age' */ + if (n->brt_if == ifp && + n->brt_age > time_uptime + age && + (n->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) + n->brt_age = time_uptime + age; } } } @@ -2028,28 +1897,20 @@ bridge_rtagenode(struct ifnet *ifp, int /* * Remove all dynamic addresses from the cache */ -int +void bridge_rtflush(struct bridge_softc *sc, int full) { - int i; struct bridge_rtnode *p, *n; - for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) { - n = LIST_FIRST(&sc->sc_rts[i]); - while (n != LIST_END(&sc->sc_rts[i])) { - if (full || - (n->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { - p = LIST_NEXT(n, brt_next); - LIST_REMOVE(n, brt_next); - sc->sc_brtcnt--; - free(n, M_DEVBUF); - n = p; - } else - n = LIST_NEXT(n, brt_next); + for (n = RB_MIN(brt_tree, &sc->sc_rts); n != NULL; n = p) { + p = RB_NEXT(brt_tree, &sc->sc_rts, n); + + if (full || (n->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { + RB_REMOVE(brt_tree, &sc->sc_rts, n); + sc->sc_brtcnt--; + free(n, M_DEVBUF); } } - - return (0); } /* @@ -2058,54 +1919,44 @@ bridge_rtflush(struct bridge_softc *sc, int bridge_rtdaddr(struct bridge_softc *sc, struct ether_addr *ea) { - int h; - struct bridge_rtnode *p; + struct bridge_rtnode *brt, tmp; - h = bridge_hash(sc, ea); - LIST_FOREACH(p, &sc->sc_rts[h], brt_next) { - if (bcmp(ea, &p->brt_addr, sizeof(p->brt_addr)) == 0) { - LIST_REMOVE(p, brt_next); - sc->sc_brtcnt--; - free(p, M_DEVBUF); - return (0); - } - } + bcopy(ea, &tmp.brt_addr, sizeof(*ea)); + + if ((brt = RB_FIND(brt_tree, &sc->sc_rts, &tmp)) == NULL) + return (ENOENT); - return (ENOENT); + RB_REMOVE(brt_tree, &sc->sc_rts, brt); + sc->sc_brtcnt--; + free(brt, M_DEVBUF); + + return (0); } + /* * Delete routes to a specific interface member. */ void bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int dynonly) { - int i; struct bridge_rtnode *n, *p; /* - * Loop through all of the hash buckets and traverse each - * chain looking for routes to this interface. + * traverse tree looking for routes to this interface. */ - for (i = 0; i < BRIDGE_RTABLE_SIZE; i++) { - n = LIST_FIRST(&sc->sc_rts[i]); - while (n != LIST_END(&sc->sc_rts[i])) { - if (n->brt_if != ifp) { - /* Not ours */ - n = LIST_NEXT(n, brt_next); - continue; - } - if (dynonly && - (n->brt_flags & IFBAF_TYPEMASK) != IFBAF_DYNAMIC) { - /* only deleting dynamics */ - n = LIST_NEXT(n, brt_next); - continue; - } - p = LIST_NEXT(n, brt_next); - LIST_REMOVE(n, brt_next); - sc->sc_brtcnt--; - free(n, M_DEVBUF); - n = p; - } + for (n = RB_MIN(brt_tree, &sc->sc_rts); n != NULL; n = p) { + p = RB_NEXT(brt_tree, &sc->sc_rts, n); + + if (n->brt_if != ifp) + continue; + + if (dynonly && + (n->brt_flags & IFBAF_TYPEMASK) != IFBAF_DYNAMIC) + continue; + + RB_REMOVE(brt_tree, &sc->sc_rts, n); + sc->sc_brtcnt--; + free(n, M_DEVBUF); } } @@ -2115,35 +1966,33 @@ bridge_rtdelete(struct bridge_softc *sc, int bridge_rtfind(struct bridge_softc *sc, struct ifbaconf *baconf) { - int i, error = 0, onlycnt = 0; - u_int32_t cnt = 0; - struct bridge_rtnode *n; struct ifbareq bareq; + struct bridge_rtnode *brt; + int error = 0, onlycnt = 0; + u_int32_t cnt = 0; if (baconf->ifbac_len == 0) onlycnt = 1; - for (i = 0, cnt = 0; i < BRIDGE_RTABLE_SIZE; i++) { - LIST_FOREACH(n, &sc->sc_rts[i], brt_next) { - if (!onlycnt) { - if (baconf->ifbac_len < sizeof(struct ifbareq)) - goto done; - bcopy(sc->sc_if.if_xname, bareq.ifba_name, - sizeof(bareq.ifba_name)); - bcopy(n->brt_if->if_xname, bareq.ifba_ifsname, - sizeof(bareq.ifba_ifsname)); - bcopy(&n->brt_addr, &bareq.ifba_dst, - sizeof(bareq.ifba_dst)); - bareq.ifba_age = n->brt_age; - bareq.ifba_flags = n->brt_flags; - error = copyout((caddr_t)&bareq, - (caddr_t)(baconf->ifbac_req + cnt), sizeof(bareq)); - if (error) - goto done; - baconf->ifbac_len -= sizeof(struct ifbareq); - } - cnt++; + RB_FOREACH(brt, brt_tree, &sc->sc_rts) { + if (!onlycnt) { + if (baconf->ifbac_len < sizeof(struct ifbareq)) + goto done; + bcopy(sc->sc_if.if_xname, bareq.ifba_name, + sizeof(bareq.ifba_name)); + bcopy(brt->brt_if->if_xname, bareq.ifba_ifsname, + sizeof(bareq.ifba_ifsname)); + bcopy(&brt->brt_addr, &bareq.ifba_dst, + sizeof(bareq.ifba_dst)); + bareq.ifba_age = brt->brt_age; + bareq.ifba_flags = brt->brt_flags; + error = copyout((caddr_t)&bareq, + (caddr_t)(baconf->ifbac_req + cnt), sizeof(bareq)); + if (error) + goto done; + baconf->ifbac_len -= sizeof(struct ifbareq); } + cnt++; } done: baconf->ifbac_len = cnt * sizeof(struct ifbareq);