On 10/02/15(Tue) 03:04, Claudio Jeker wrote: > There is no need to not allow the same network to be configured more then > once. Instead just rely on the multipath and priority handling of the > routing table to select the right route. > Additionally this removes cloned routes (arp/npd cache) when the interface > goes down or when the any of the multipath cloning route is changed. > > With this it is possible to run 2 dhclients on wired and wireless with a > bridged network. Active TCP sessions still fail when the cable is > unplugged. To fix this more is needed. > > This changes a fundamental part of the network stack and therefor broad > testing is needed to find all the hidden dragons.
Here's version of the diff rebased on top of the recent changes. Index: net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.20 diff -u -p -r1.20 if_var.h --- net/if_var.h 9 Feb 2015 03:09:57 -0000 1.20 +++ net/if_var.h 12 Feb 2015 11:08:40 -0000 @@ -392,6 +392,7 @@ do { \ /* default interface priorities */ #define IF_WIRED_DEFAULT_PRIORITY 0 #define IF_WIRELESS_DEFAULT_PRIORITY 4 +#define IF_CARP_DEFAULT_PRIORITY 15 extern struct ifnet_head ifnet; extern struct ifnet *lo0ifp; Index: net/route.c =================================================================== RCS file: /cvs/src/sys/net/route.c,v retrieving revision 1.206 diff -u -p -r1.206 route.c --- net/route.c 11 Feb 2015 23:34:43 -0000 1.206 +++ net/route.c 12 Feb 2015 11:08:40 -0000 @@ -554,6 +554,16 @@ rtdeletemsg(struct rtentry *rt, u_int ta return (error); } +static inline int +rtequal(struct rtentry *a, struct rtentry *b) +{ + if (memcmp(rt_key(a), rt_key(b), rt_key(a)->sa_len) == 0 && + memcmp(rt_mask(a), rt_mask(b), rt_mask(a)->sa_len) == 0) + return 1; + else + return 0; +} + int rtflushclone1(struct radix_node *rn, void *arg, u_int id) { @@ -561,7 +571,8 @@ rtflushclone1(struct radix_node *rn, voi rt = (struct rtentry *)rn; parent = (struct rtentry *)arg; - if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent == parent) + if ((rt->rt_flags & RTF_CLONED) != 0 && (rt->rt_parent == parent || + rtequal(rt->rt_parent, parent))) rtdeletemsg(rt, id); return 0; } @@ -1106,16 +1117,20 @@ rt_ifa_add(struct ifaddr *ifa, int flags { struct rtentry *rt, *nrt = NULL; struct sockaddr_rtlabel sa_rl; + struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK }; struct rt_addrinfo info; u_short rtableid = ifa->ifa_ifp->if_rdomain; - u_int8_t prio = RTP_CONNECTED; + u_int8_t prio = ifa->ifa_ifp->if_priority + RTP_STATIC; int error; + sa_dl.sdl_type = ifa->ifa_ifp->if_type; + sa_dl.sdl_index = ifa->ifa_ifp->if_index; + memset(&info, 0, sizeof(info)); info.rti_ifa = ifa; - info.rti_flags = flags; + info.rti_flags = flags | RTF_MPATH; info.rti_info[RTAX_DST] = dst; - info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl; info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifa->ifa_ifp->if_rtlabelid, &sa_rl); @@ -1161,8 +1176,9 @@ rt_ifa_del(struct ifaddr *ifa, int flags struct sockaddr *deldst; struct rt_addrinfo info; struct sockaddr_rtlabel sa_rl; + struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK }; u_short rtableid = ifa->ifa_ifp->if_rdomain; - u_int8_t prio = RTP_CONNECTED; + u_int8_t prio = ifa->ifa_ifp->if_priority + RTP_STATIC; int error; if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { @@ -1187,10 +1203,14 @@ rt_ifa_del(struct ifaddr *ifa, int flags } } + sa_dl.sdl_type = ifa->ifa_ifp->if_type; + sa_dl.sdl_index = ifa->ifa_ifp->if_index; + memset(&info, 0, sizeof(info)); info.rti_ifa = ifa; info.rti_flags = flags; info.rti_info[RTAX_DST] = dst; + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl; info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifa->ifa_ifp->if_rtlabelid, &sa_rl); @@ -1692,6 +1715,15 @@ rt_if_linkstate_change(struct radix_node } } else { if (rt->rt_flags & RTF_UP) { + /* + * Remove cloned routes (mainly arp) to + * down interfaces so we have a chance to + * clone a new route from a better source. + */ + if (rt->rt_flags & RTF_CLONED) { + rtdeletemsg(rt, id); + return (0); + } /* take route down */ rt->rt_flags &= ~RTF_UP; rn_mpath_reprio(rn, rt->rt_priority | RTP_DOWN); Index: netinet/if_ether.c =================================================================== RCS file: /cvs/src/sys/netinet/if_ether.c,v retrieving revision 1.146 diff -u -p -r1.146 if_ether.c --- netinet/if_ether.c 11 Feb 2015 23:34:43 -0000 1.146 +++ netinet/if_ether.c 12 Feb 2015 11:10:11 -0000 @@ -111,8 +111,6 @@ void db_print_llinfo(caddr_t); int db_show_radix_node(struct radix_node *, void *, u_int); #endif -static const struct sockaddr_dl null_sdl = { sizeof(null_sdl), AF_LINK }; - /* * Timeout routine. Age arp_tab entries periodically. */ @@ -181,14 +179,6 @@ arp_rtrequest(int req, struct rtentry *r if (rt->rt_flags & RTF_CLONING || ((rt->rt_flags & (RTF_LLINFO | RTF_LOCAL)) && !la)) { /* - * Case 1: This route should come from a route to iface. - */ - rt_setgate(rt, (struct sockaddr *)&null_sdl, - ifp->if_rdomain); - gate = rt->rt_gateway; - SDL(gate)->sdl_type = ifp->if_type; - SDL(gate)->sdl_index = ifp->if_index; - /* * Give this route an expiration time, even though * it's a "permanent" route, so that routes cloned * from it do not need their expiration time set. @@ -251,10 +241,6 @@ arp_rtrequest(int req, struct rtentry *r } if (ifa) { rt->rt_expire = 0; - SDL(gate)->sdl_alen = ETHER_ADDR_LEN; - memcpy(LLADDR(SDL(gate)), - ((struct arpcom *)ifp)->ac_enaddr, ETHER_ADDR_LEN); - /* * XXX Since lo0 is in the default rdomain we * should not (ab)use it for any route related Index: netinet/in.c =================================================================== RCS file: /cvs/src/sys/netinet/in.c,v retrieving revision 1.115 diff -u -p -r1.115 in.c --- netinet/in.c 12 Jan 2015 13:51:45 -0000 1.115 +++ netinet/in.c 12 Feb 2015 11:08:40 -0000 @@ -93,8 +93,6 @@ int in_lifaddr_ioctl(struct socket *, u_ struct ifnet *); void in_purgeaddr(struct ifaddr *); -int in_addprefix(struct in_ifaddr *); -int in_scrubprefix(struct in_ifaddr *); int in_addhost(struct in_ifaddr *, struct sockaddr_in *); int in_scrubhost(struct in_ifaddr *, struct sockaddr_in *); int in_insert_prefix(struct in_ifaddr *); @@ -590,7 +588,8 @@ in_ifscrub(struct ifnet *ifp, struct in_ if (ISSET(ifp->if_flags, IFF_POINTOPOINT)) in_scrubhost(ia, &ia->ia_dstaddr); else if (!ISSET(ifp->if_flags, IFF_LOOPBACK)) - in_scrubprefix(ia); + if (ia->ia_flags & IFA_ROUTE) + in_remove_prefix(ia); } /* @@ -669,7 +668,7 @@ in_ifinit(struct ifnet *ifp, struct in_i goto out; error = in_addhost(ia, &ia->ia_dstaddr); } else if (!ISSET(ifp->if_flags, IFF_LOOPBACK)) { - error = in_addprefix(ia); + error = in_insert_prefix(ia); } /* @@ -759,125 +758,6 @@ in_remove_prefix(struct in_ifaddr *ia) ifa->ifa_broadaddr); ia->ia_flags &= ~IFA_ROUTE; -} - -/* - * add a route to prefix ("connected route" in cisco terminology). - * does nothing if there's some interface address with the same prefix already. - */ -int -in_addprefix(struct in_ifaddr *ia0) -{ - struct ifnet *ifp; - struct ifaddr *ifa; - struct in_ifaddr *ia; - struct in_addr prefix, mask, p, m; - - prefix = ia0->ia_addr.sin_addr; - mask = ia0->ia_sockmask.sin_addr; - prefix.s_addr &= mask.s_addr; - - TAILQ_FOREACH(ifp, &ifnet, if_list) { - if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) - continue; - - if (ifp->if_rdomain != ia0->ia_ifp->if_rdomain) - continue; - - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { - if (ifa->ifa_addr->sa_family != AF_INET) - continue; - - ia = ifatoia(ifa); - - if ((ia->ia_flags & IFA_ROUTE) == 0) - continue; - - p = ia->ia_addr.sin_addr; - m = ia->ia_sockmask.sin_addr; - p.s_addr &= m.s_addr; - - if (prefix.s_addr != p.s_addr || - mask.s_addr != m.s_addr) - continue; - -#if NCARP > 0 - /* move to a real interface instead of carp interface */ - if (ia->ia_ifp->if_type == IFT_CARP && - ia0->ia_ifp->if_type != IFT_CARP) { - in_remove_prefix(ia); - break; - } -#endif - /* - * If we got a matching prefix route inserted by other - * interface address, we don't need to bother - */ - return (0); - } - } - - /* - * noone seem to have prefix route. insert it. - */ - return in_insert_prefix(ia0); -} - -/* - * remove a route to prefix ("connected route" in cisco terminology). - * re-installs the route by using another interface address, if there's one - * with the same prefix (otherwise we lose the route mistakenly). - */ -int -in_scrubprefix(struct in_ifaddr *ia0) -{ - struct ifnet *ifp; - struct ifaddr *ifa; - struct in_ifaddr *ia; - struct in_addr prefix, mask, p, m; - - if ((ia0->ia_flags & IFA_ROUTE) == 0) - return 0; - - prefix = ia0->ia_addr.sin_addr; - mask = ia0->ia_sockmask.sin_addr; - prefix.s_addr &= mask.s_addr; - - TAILQ_FOREACH(ifp, &ifnet, if_list) { - if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) - continue; - - if (ifp->if_rdomain != ia0->ia_ifp->if_rdomain) - continue; - - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { - if (ifa->ifa_addr->sa_family != AF_INET) - continue; - - ia = ifatoia(ifa); - - if ((ia->ia_flags & IFA_ROUTE) != 0) - continue; - - p = ia->ia_addr.sin_addr; - m = ia->ia_sockmask.sin_addr; - p.s_addr &= m.s_addr; - - if (prefix.s_addr != p.s_addr || - mask.s_addr != m.s_addr) - continue; - - /* Move IFA_ROUTE to the matching prefix route. */ - in_remove_prefix(ia0); - return (in_insert_prefix(ia)); - } - } - - /* - * noone seem to have prefix route. remove it. - */ - in_remove_prefix(ia0); - return 0; } /* Index: netinet/ip_carp.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_carp.c,v retrieving revision 1.246 diff -u -p -r1.246 ip_carp.c --- netinet/ip_carp.c 11 Feb 2015 04:29:29 -0000 1.246 +++ netinet/ip_carp.c 12 Feb 2015 11:15:40 -0000 @@ -751,6 +751,7 @@ carp_clone_create(ifc, unit) ifp->if_addrlen = ETHER_ADDR_LEN; ifp->if_hdrlen = ETHER_HDR_LEN; ifp->if_mtu = ETHERMTU; + ifp->if_priority = IF_CARP_DEFAULT_PRIORITY; IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); IFQ_SET_READY(&ifp->if_snd); if_attach(ifp); Index: netinet6/nd6.c =================================================================== RCS file: /cvs/src/sys/netinet6/nd6.c,v retrieving revision 1.131 diff -u -p -r1.131 nd6.c --- netinet6/nd6.c 11 Feb 2015 23:34:43 -0000 1.131 +++ netinet6/nd6.c 12 Feb 2015 11:10:40 -0000 @@ -957,7 +957,6 @@ nd6_rtrequest(int req, struct rtentry *r { struct sockaddr *gate = rt->rt_gateway; struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo; - static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; struct ifnet *ifp = rt->rt_ifp; struct ifaddr *ifa; struct nd_defrouter *dr; @@ -1016,17 +1015,6 @@ nd6_rtrequest(int req, struct rtentry *r */ if ((rt->rt_flags & RTF_CLONING) || ((rt->rt_flags & (RTF_LLINFO | RTF_LOCAL)) && !ln)) { - /* - * Case 1: This route should come from a route to - * interface (RTF_CLONING case) or the route should be - * treated as on-link but is currently not - * (RTF_LLINFO && !ln case). - */ - rt_setgate(rt, (struct sockaddr *)&null_sdl, - ifp->if_rdomain); - gate = rt->rt_gateway; - SDL(gate)->sdl_type = ifp->if_type; - SDL(gate)->sdl_index = ifp->if_index; if (ln) nd6_llinfo_settimer(ln, 0); if ((rt->rt_flags & RTF_CLONING) != 0) @@ -1062,7 +1050,7 @@ nd6_rtrequest(int req, struct rtentry *r /* FALLTHROUGH */ case RTM_RESOLVE: if (gate->sa_family != AF_LINK || - gate->sa_len < sizeof(null_sdl)) { + gate->sa_len < sizeof(struct sockaddr_dl)) { log(LOG_DEBUG, "%s: bad gateway value: %s\n", __func__, ifp->if_xname); break; @@ -1144,14 +1132,9 @@ nd6_rtrequest(int req, struct rtentry *r ifa = &in6ifa_ifpwithaddr(ifp, &satosin6(rt_key(rt))->sin6_addr)->ia_ifa; if (ifa) { - caddr_t macp = nd6_ifptomac(ifp); nd6_llinfo_settimer(ln, -1); ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; - if (macp) { - memcpy(LLADDR(SDL(gate)), macp, ifp->if_addrlen); - SDL(gate)->sdl_alen = ifp->if_addrlen; - } /* * XXX Since lo0 is in the default rdomain we