I finished the patch dealing with both RTF_UP and RTP_DOWN. I've tested as best as I could. I'm not currently using ECMP but I don't think anything I've touched would have changed how that works.
Forgive me if I'm not going about things the right way. This is my first attempt at hacking on the kernel. dmo --- /usr/src/sys/net/radix_mpath.c Thu Dec 10 17:29:34 2009 +++ radix_mpath.c Thu Dec 10 19:00:16 2009 @@ -68,7 +68,7 @@ rn_mpath_capable(struct radix_node_head *rnh) } struct radix_node * -rn_mpath_next(struct radix_node *rn, int all) +rn_mpath_next(struct radix_node *rn, int kind) { struct radix_node *next; struct rtentry *rt = (struct rtentry *)rn; @@ -76,11 +76,17 @@ rn_mpath_next(struct radix_node *rn, int all) if (!rn->rn_dupedkey) return NULL; next = rn->rn_dupedkey; - if (rn->rn_mask == next->rn_mask && (all || - rt->rt_priority == ((struct rtentry *)next)->rt_priority)) - return next; - else - return NULL; + if (rn->rn_mask == next->rn_mask) { + if (kind == 0 && (rt->rt_priority == + (((struct rtentry *)next)->rt_priority))) + return next; + if (kind == 1) + return next; + if (kind == 2 && ((rt->rt_priority & RTP_MASK) == + ((((struct rtentry *)next)->rt_priority) & RTP_MASK))) + return next; + } + return NULL; } struct radix_node * @@ -109,6 +115,33 @@ rn_mpath_prio(struct radix_node *rn, u_int8_t prio) return (prev); } +struct radix_node * +rn_mpath_prio_lookup(struct radix_node *rn, u_int8_t prio) +{ + struct radix_node *prev = rn; + struct rtentry *rt; + + if (prio == RTP_ANY) + return rn; + prio &= RTP_MASK; + + while (rn) { + /* different netmask -> different route */ + if (rn->rn_mask != prev->rn_mask) + return NULL; + + rt = (struct rtentry *)rn; + if ((rt->rt_priority & RTP_MASK) == prio) + return rn; + if ((rt->rt_priority & RTP_MASK) > prio) + /* list is sorted return */ + return NULL; + prev = rn; + rn = rn->rn_dupedkey; + } + return NULL; +} + void rn_mpath_reprio(struct radix_node *rn, int newprio) { @@ -282,6 +315,7 @@ rt_mpath_conflict(struct radix_node_head *rnh, struct struct rtentry *rt1; char *p, *q, *eq; int same, l, skip; + u_int8_t prio; rn = (struct radix_node *)rt; rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh); @@ -348,10 +382,17 @@ rt_mpath_conflict(struct radix_node_head *rnh, struct } maskmatched: - if (!mpathok && rt1->rt_priority == rt->rt_priority) + /* consider route even if RTP_DOWN */ + prio = rt->rt_priority & RTP_MASK; + + rn1 = rn_mpath_prio_lookup((struct radix_node *)rt1, prio); + if (!rn1) + goto different; + + rt1 = (struct rtentry *)rn1; + if (!mpathok && ((rt1->rt_priority & RTP_MASK) == prio)) return EEXIST; - rn1 = rn_mpath_prio_lookup((struct radix_node *)rt1, rt->rt_priority); /* key/mask were the same. compare gateway for all multipaths */ do { rt1 = (struct rtentry *)rn1; @@ -366,12 +407,12 @@ rt_mpath_conflict(struct radix_node_head *rnh, struct continue; /* check the route priority */ - if (rt1->rt_priority != rt->rt_priority) + if ((rt1->rt_priority & RTP_MASK) != prio) continue; /* all key/mask/gateway are the same. conflicting entry. */ return EEXIST; - } while ((rn1 = rn_mpath_next(rn1, 0)) != NULL); + } while ((rn1 = rn_mpath_next(rn1, 2)) != NULL); different: return 0; --- /usr/src/sys/net/radix_mpath.h Mon Nov 24 04:53:53 2008 +++ radix_mpath.h Thu Dec 10 19:00:16 2009 @@ -47,6 +47,7 @@ struct sockaddr; int rn_mpath_capable(struct radix_node_head *); struct radix_node *rn_mpath_next(struct radix_node *, int); struct radix_node *rn_mpath_prio(struct radix_node *, u_int8_t); +struct radix_node *rn_mpath_prio_lookup(struct radix_node *, u_int8_t); void rn_mpath_reprio(struct radix_node *, int); int rn_mpath_count(struct radix_node *); struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *, --- /usr/src/sys/net/route.c Tue Dec 1 00:36:58 2009 +++ route.c Thu Dec 10 19:00:16 2009 @@ -753,8 +753,9 @@ rtrequest1(int req, struct rt_addrinfo *info, u_int8_t struct rtentry **ret_nrt, u_int tableid) { int s = splsoftnet(); int error = 0; + int mpaths = 0; struct rtentry *rt, *crt; - struct radix_node *rn; + struct radix_node *rn, *justone; struct radix_node_head *rnh; struct ifaddr *ifa; struct sockaddr *ndst; @@ -786,6 +787,9 @@ rtrequest1(int req, struct rt_addrinfo *info, u_int8_t rn = (struct radix_node *)rt; if (!rt) senderr(ESRCH); + + /* to find correct route to update RTF_MPATH */ + prio = rt->rt_priority & RTP_MASK; } #endif if ((rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], @@ -814,7 +818,8 @@ rtrequest1(int req, struct rt_addrinfo *info, u_int8_t if (rn_mpath_capable(rnh)) { if ((rn = rnh->rnh_lookup(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], rnh)) != NULL && - rn_mpath_next(rn, 0) == NULL) + (rn = rn_mpath_prio_lookup(rn, prio)) != NULL && + rn_mpath_next(rn, 2) == NULL) ((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH; } #endif @@ -893,6 +898,19 @@ makeroute: pool_put(&rtentry_pool, rt); senderr(EEXIST); } + + /* find out if we need to add RTF_MPATH after adding */ + justone = NULL; + if ((rn_mpath_capable(rnh)) && + (rn = rnh->rnh_lookup(info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], rnh)) != NULL && + (rn = rn_mpath_prio_lookup(rn, prio & RTP_MASK)) != NULL) { + mpaths = 1; + if (rn_mpath_next(rn, 2) == NULL) + justone = rn; + else + mpaths = 2; + } #endif if (info->rti_info[RTAX_LABEL] != NULL) { @@ -972,14 +990,16 @@ makeroute: } #ifndef SMALL_KERNEL - if (rn_mpath_capable(rnh) && - (rn = rnh->rnh_lookup(info->rti_info[RTAX_DST], - info->rti_info[RTAX_NETMASK], rnh)) != NULL && - (rn = rn_mpath_prio(rn, prio)) != NULL) { - if (rn_mpath_next(rn, 0) == NULL) - ((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH; - else - ((struct rtentry *)rn)->rt_flags |= RTF_MPATH; + if (rn_mpath_capable(rnh)) { + rt = (struct rtentry *)rn; + if (mpaths == 0) + rt->rt_flags &= ~RTF_MPATH; + else + rt->rt_flags |= RTF_MPATH; + + if (mpaths == 1) + ((struct rtentry *)justone)->rt_flags |= + RTF_MPATH; } #endif --- /usr/src/sys/net/rtsock.c Tue Dec 1 00:36:58 2009 +++ rtsock.c Thu Dec 10 19:21:52 2009 @@ -641,6 +641,19 @@ report: } } + /* new gateway, possible link state change */ + if ((LINK_STATE_IS_UP(ifa->ifa_ifp->if_link_state) || + ifa->ifa_ifp->if_link_state == LINK_STATE_UNKNOWN) && + ifa->ifa_ifp->if_flags & IFF_UP) { + rt->rt_flags |= RTF_UP; + rt->rt_priority &= RTP_MASK; + } + else { + rt->rt_flags &= ~RTF_UP; + rtm->rtm_flags &= RTF_UP; + rt->rt_priority |= RTP_DOWN; + } + /* XXX Hack to allow some flags to be toggled */ if (rtm->rtm_fmask & RTF_FMASK) rt->rt_flags = (rt->rt_flags &