On Wed, Mar 18, 2015 at 05:46:34AM +0100, Claudio Jeker wrote: > On Tue, Mar 17, 2015 at 05:35:21PM +0100, Martin Pieuchot wrote: > > On 12/02/15(Thu) 12:35, Martin Pieuchot wrote: > > > On 10/02/15(Tue) 03:04, Claudio Jeker wrote: > > > > There is no need to not allow the same network to be configured more > > > > then > > > > once. Instead just rely on the multipath and priority handling of the > > > > routing table to select the right route. > > > > Additionally this removes cloned routes (arp/npd cache) when the > > > > interface > > > > goes down or when the any of the multipath cloning route is changed. > > > > > > > > With this it is possible to run 2 dhclients on wired and wireless with a > > > > bridged network. Active TCP sessions still fail when the cable is > > > > unplugged. To fix this more is needed. > > > > > > > > This changes a fundamental part of the network stack and therefor broad > > > > testing is needed to find all the hidden dragons. > > > > > > Here's version of the diff rebased on top of the recent changes. > > > > I think it's the time to get this in, then as a second step put the > > dhclient(8) bits. > > > > Claudio you have my ok. > > It is broken for IPv6 and I could not find the proper fix yet. I think I > now why it goes wrong but the nd6 code is a nightmare. > > I will send out a new diff once I have IPv6 fixed. >
Unsurprisingly IPv6 needs to be special and is not using rt_ifa_add or rt_ifa_del in all cases. There are three special cases that do the same dance but use ifa->ifa_addr as the gateway and because of this the resulting interface routes are not catched by the nd6 code (RTF_LLINFO is missing). When the routes are then cloned nd6 is not invoced and everything points back to the host. Oups. The following updated diff seems to fix this but I only minimally tested the IPv6 part. People using IPv6 may want to give this a spin. IMO the net/if_var.h and netinet/ip_carp.c changes could be commited before the rest since there should be no noticeable change in how carp works. -- :wq Claudio Index: net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.24 diff -u -p -r1.24 if_var.h --- net/if_var.h 7 Apr 2015 10:46:20 -0000 1.24 +++ net/if_var.h 12 Apr 2015 11:47:03 -0000 @@ -389,6 +389,7 @@ do { \ /* default interface priorities */ #define IF_WIRED_DEFAULT_PRIORITY 0 #define IF_WIRELESS_DEFAULT_PRIORITY 4 +#define IF_CARP_DEFAULT_PRIORITY 15 /* * Network stack input queues. Index: net/route.c =================================================================== RCS file: /cvs/src/sys/net/route.c,v retrieving revision 1.208 diff -u -p -r1.208 route.c --- net/route.c 26 Mar 2015 11:02:44 -0000 1.208 +++ net/route.c 4 Apr 2015 08:31:34 -0000 @@ -554,6 +554,16 @@ rtdeletemsg(struct rtentry *rt, u_int ta return (error); } +static inline int +rtequal(struct rtentry *a, struct rtentry *b) +{ + if (memcmp(rt_key(a), rt_key(b), rt_key(a)->sa_len) == 0 && + memcmp(rt_mask(a), rt_mask(b), rt_mask(a)->sa_len) == 0) + return 1; + else + return 0; +} + int rtflushclone1(struct radix_node *rn, void *arg, u_int id) { @@ -561,7 +571,8 @@ rtflushclone1(struct radix_node *rn, voi rt = (struct rtentry *)rn; parent = (struct rtentry *)arg; - if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent == parent) + if ((rt->rt_flags & RTF_CLONED) != 0 && (rt->rt_parent == parent || + rtequal(rt->rt_parent, parent))) rtdeletemsg(rt, id); return 0; } @@ -1106,16 +1117,20 @@ rt_ifa_add(struct ifaddr *ifa, int flags { struct rtentry *rt, *nrt = NULL; struct sockaddr_rtlabel sa_rl; + struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK }; struct rt_addrinfo info; u_short rtableid = ifa->ifa_ifp->if_rdomain; - u_int8_t prio = RTP_CONNECTED; + u_int8_t prio = ifa->ifa_ifp->if_priority + RTP_STATIC; int error; + sa_dl.sdl_type = ifa->ifa_ifp->if_type; + sa_dl.sdl_index = ifa->ifa_ifp->if_index; + memset(&info, 0, sizeof(info)); info.rti_ifa = ifa; - info.rti_flags = flags; + info.rti_flags = flags | RTF_MPATH; info.rti_info[RTAX_DST] = dst; - info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl; info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifa->ifa_ifp->if_rtlabelid, &sa_rl); @@ -1170,8 +1185,9 @@ rt_ifa_del(struct ifaddr *ifa, int flags struct sockaddr *deldst; struct rt_addrinfo info; struct sockaddr_rtlabel sa_rl; + struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK }; u_short rtableid = ifa->ifa_ifp->if_rdomain; - u_int8_t prio = RTP_CONNECTED; + u_int8_t prio = ifa->ifa_ifp->if_priority + RTP_STATIC; int error; #ifdef MPLS @@ -1202,10 +1218,14 @@ rt_ifa_del(struct ifaddr *ifa, int flags } } + sa_dl.sdl_type = ifa->ifa_ifp->if_type; + sa_dl.sdl_index = ifa->ifa_ifp->if_index; + memset(&info, 0, sizeof(info)); info.rti_ifa = ifa; info.rti_flags = flags; info.rti_info[RTAX_DST] = dst; + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl; info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifa->ifa_ifp->if_rtlabelid, &sa_rl); @@ -1710,6 +1730,15 @@ rt_if_linkstate_change(struct radix_node } } else { if (rt->rt_flags & RTF_UP) { + /* + * Remove cloned routes (mainly arp) to + * down interfaces so we have a chance to + * clone a new route from a better source. + */ + if (rt->rt_flags & RTF_CLONED) { + rtdeletemsg(rt, id); + return (0); + } /* take route down */ rt->rt_flags &= ~RTF_UP; rn_mpath_reprio(rn, rt->rt_priority | RTP_DOWN); Index: netinet/if_ether.c =================================================================== RCS file: /cvs/src/sys/netinet/if_ether.c,v retrieving revision 1.150 diff -u -p -r1.150 if_ether.c --- netinet/if_ether.c 10 Apr 2015 13:58:20 -0000 1.150 +++ netinet/if_ether.c 12 Apr 2015 11:47:03 -0000 @@ -121,8 +121,6 @@ void db_print_llinfo(caddr_t); int db_show_radix_node(struct radix_node *, void *, u_int); #endif -static const struct sockaddr_dl null_sdl = { sizeof(null_sdl), AF_LINK }; - /* * Timeout routine. Age arp_tab entries periodically. */ @@ -190,14 +188,6 @@ arp_rtrequest(int req, struct rtentry *r if (rt->rt_flags & RTF_CLONING || ((rt->rt_flags & (RTF_LLINFO | RTF_LOCAL)) && !la)) { /* - * Case 1: This route should come from a route to iface. - */ - rt_setgate(rt, (struct sockaddr *)&null_sdl, - ifp->if_rdomain); - gate = rt->rt_gateway; - SDL(gate)->sdl_type = ifp->if_type; - SDL(gate)->sdl_index = ifp->if_index; - /* * Give this route an expiration time, even though * it's a "permanent" route, so that routes cloned * from it do not need their expiration time set. @@ -261,10 +251,6 @@ arp_rtrequest(int req, struct rtentry *r } if (ifa) { rt->rt_expire = 0; - SDL(gate)->sdl_alen = ETHER_ADDR_LEN; - memcpy(LLADDR(SDL(gate)), - ((struct arpcom *)ifp)->ac_enaddr, ETHER_ADDR_LEN); - /* * XXX Since lo0 is in the default rdomain we * should not (ab)use it for any route related Index: netinet/in.c =================================================================== RCS file: /cvs/src/sys/netinet/in.c,v retrieving revision 1.115 diff -u -p -r1.115 in.c --- netinet/in.c 12 Jan 2015 13:51:45 -0000 1.115 +++ netinet/in.c 10 Feb 2015 01:50:22 -0000 @@ -93,8 +93,6 @@ int in_lifaddr_ioctl(struct socket *, u_ struct ifnet *); void in_purgeaddr(struct ifaddr *); -int in_addprefix(struct in_ifaddr *); -int in_scrubprefix(struct in_ifaddr *); int in_addhost(struct in_ifaddr *, struct sockaddr_in *); int in_scrubhost(struct in_ifaddr *, struct sockaddr_in *); int in_insert_prefix(struct in_ifaddr *); @@ -590,7 +588,8 @@ in_ifscrub(struct ifnet *ifp, struct in_ if (ISSET(ifp->if_flags, IFF_POINTOPOINT)) in_scrubhost(ia, &ia->ia_dstaddr); else if (!ISSET(ifp->if_flags, IFF_LOOPBACK)) - in_scrubprefix(ia); + if (ia->ia_flags & IFA_ROUTE) + in_remove_prefix(ia); } /* @@ -669,7 +668,7 @@ in_ifinit(struct ifnet *ifp, struct in_i goto out; error = in_addhost(ia, &ia->ia_dstaddr); } else if (!ISSET(ifp->if_flags, IFF_LOOPBACK)) { - error = in_addprefix(ia); + error = in_insert_prefix(ia); } /* @@ -759,125 +758,6 @@ in_remove_prefix(struct in_ifaddr *ia) ifa->ifa_broadaddr); ia->ia_flags &= ~IFA_ROUTE; -} - -/* - * add a route to prefix ("connected route" in cisco terminology). - * does nothing if there's some interface address with the same prefix already. - */ -int -in_addprefix(struct in_ifaddr *ia0) -{ - struct ifnet *ifp; - struct ifaddr *ifa; - struct in_ifaddr *ia; - struct in_addr prefix, mask, p, m; - - prefix = ia0->ia_addr.sin_addr; - mask = ia0->ia_sockmask.sin_addr; - prefix.s_addr &= mask.s_addr; - - TAILQ_FOREACH(ifp, &ifnet, if_list) { - if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) - continue; - - if (ifp->if_rdomain != ia0->ia_ifp->if_rdomain) - continue; - - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { - if (ifa->ifa_addr->sa_family != AF_INET) - continue; - - ia = ifatoia(ifa); - - if ((ia->ia_flags & IFA_ROUTE) == 0) - continue; - - p = ia->ia_addr.sin_addr; - m = ia->ia_sockmask.sin_addr; - p.s_addr &= m.s_addr; - - if (prefix.s_addr != p.s_addr || - mask.s_addr != m.s_addr) - continue; - -#if NCARP > 0 - /* move to a real interface instead of carp interface */ - if (ia->ia_ifp->if_type == IFT_CARP && - ia0->ia_ifp->if_type != IFT_CARP) { - in_remove_prefix(ia); - break; - } -#endif - /* - * If we got a matching prefix route inserted by other - * interface address, we don't need to bother - */ - return (0); - } - } - - /* - * noone seem to have prefix route. insert it. - */ - return in_insert_prefix(ia0); -} - -/* - * remove a route to prefix ("connected route" in cisco terminology). - * re-installs the route by using another interface address, if there's one - * with the same prefix (otherwise we lose the route mistakenly). - */ -int -in_scrubprefix(struct in_ifaddr *ia0) -{ - struct ifnet *ifp; - struct ifaddr *ifa; - struct in_ifaddr *ia; - struct in_addr prefix, mask, p, m; - - if ((ia0->ia_flags & IFA_ROUTE) == 0) - return 0; - - prefix = ia0->ia_addr.sin_addr; - mask = ia0->ia_sockmask.sin_addr; - prefix.s_addr &= mask.s_addr; - - TAILQ_FOREACH(ifp, &ifnet, if_list) { - if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) - continue; - - if (ifp->if_rdomain != ia0->ia_ifp->if_rdomain) - continue; - - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { - if (ifa->ifa_addr->sa_family != AF_INET) - continue; - - ia = ifatoia(ifa); - - if ((ia->ia_flags & IFA_ROUTE) != 0) - continue; - - p = ia->ia_addr.sin_addr; - m = ia->ia_sockmask.sin_addr; - p.s_addr &= m.s_addr; - - if (prefix.s_addr != p.s_addr || - mask.s_addr != m.s_addr) - continue; - - /* Move IFA_ROUTE to the matching prefix route. */ - in_remove_prefix(ia0); - return (in_insert_prefix(ia)); - } - } - - /* - * noone seem to have prefix route. remove it. - */ - in_remove_prefix(ia0); - return 0; } /* Index: netinet/ip_carp.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_carp.c,v retrieving revision 1.251 diff -u -p -r1.251 ip_carp.c --- netinet/ip_carp.c 15 Apr 2015 15:16:17 -0000 1.251 +++ netinet/ip_carp.c 15 Apr 2015 20:45:40 -0000 @@ -751,6 +751,7 @@ carp_clone_create(ifc, unit) ether_ifattach(ifp); ifp->if_type = IFT_CARP; ifp->if_output = carp_output; + ifp->if_priority = IF_CARP_DEFAULT_PRIORITY; /* Hook carp_addr_updated to cope with address and route changes. */ sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0, Index: netinet6/in6.c =================================================================== RCS file: /cvs/src/sys/netinet6/in6.c,v retrieving revision 1.154 diff -u -p -r1.154 in6.c --- netinet6/in6.c 14 Mar 2015 03:38:52 -0000 1.154 +++ netinet6/in6.c 18 Mar 2015 18:03:16 -0000 @@ -78,6 +78,7 @@ #include <sys/syslog.h> #include <net/if.h> +#include <net/if_dl.h> #include <net/if_types.h> #include <net/route.h> @@ -827,6 +828,10 @@ in6_update_ifa(struct ifnet *ifp, struct /* join solicited multicast addr for new host id */ struct sockaddr_in6 llsol; + struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK }; + + sa_dl.sdl_type = ifp->if_type; + sa_dl.sdl_index = ifp->if_index; bzero(&llsol, sizeof(llsol)); llsol.sin6_family = AF_INET6; @@ -887,7 +892,7 @@ in6_update_ifa(struct ifnet *ifp, struct bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = sin6tosa(&mltaddr); - info.rti_info[RTAX_GATEWAY] = sin6tosa(&ia6->ia_addr); + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl; info.rti_info[RTAX_NETMASK] = sin6tosa(&mltmask); info.rti_info[RTAX_IFA] = sin6tosa(&ia6->ia_addr); /* XXX: we need RTF_CLONING to fake nd6_rtrequest */ @@ -956,7 +961,7 @@ in6_update_ifa(struct ifnet *ifp, struct bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = sin6tosa(&mltaddr); - info.rti_info[RTAX_GATEWAY] = sin6tosa(&ia6->ia_addr); + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl; info.rti_info[RTAX_NETMASK] = sin6tosa(&mltmask); info.rti_info[RTAX_IFA] = sin6tosa(&ia6->ia_addr); info.rti_flags = RTF_UP | RTF_CLONING; Index: netinet6/nd6.c =================================================================== RCS file: /cvs/src/sys/netinet6/nd6.c,v retrieving revision 1.133 diff -u -p -r1.133 nd6.c --- netinet6/nd6.c 25 Mar 2015 17:39:33 -0000 1.133 +++ netinet6/nd6.c 4 Apr 2015 08:31:34 -0000 @@ -667,6 +667,7 @@ nd6_lookup(struct in6_addr *addr6, int c } if (!rt) { if (create && ifp) { + struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK }; struct rt_addrinfo info; int e; @@ -682,6 +683,9 @@ nd6_lookup(struct in6_addr *addr6, int c if (ifa == NULL) return (NULL); + sa_dl.sdl_type = ifp->if_type; + sa_dl.sdl_index = ifp->if_index; + /* * Create a new route. RTF_LLINFO is necessary * to create a Neighbor Cache entry for the @@ -691,7 +695,7 @@ nd6_lookup(struct in6_addr *addr6, int c bzero(&info, sizeof(info)); info.rti_flags = RTF_UP | RTF_HOST | RTF_LLINFO; info.rti_info[RTAX_DST] = sin6tosa(&sin6); - info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl; if ((e = rtrequest1(RTM_ADD, &info, RTP_CONNECTED, &rt, rtableid)) != 0) { #if 0 @@ -956,7 +960,6 @@ nd6_rtrequest(int req, struct rtentry *r { struct sockaddr *gate = rt->rt_gateway; struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo; - static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; struct ifnet *ifp = rt->rt_ifp; struct ifaddr *ifa; struct nd_defrouter *dr; @@ -1015,17 +1018,6 @@ nd6_rtrequest(int req, struct rtentry *r */ if ((rt->rt_flags & RTF_CLONING) || ((rt->rt_flags & (RTF_LLINFO | RTF_LOCAL)) && !ln)) { - /* - * Case 1: This route should come from a route to - * interface (RTF_CLONING case) or the route should be - * treated as on-link but is currently not - * (RTF_LLINFO && !ln case). - */ - rt_setgate(rt, (struct sockaddr *)&null_sdl, - ifp->if_rdomain); - gate = rt->rt_gateway; - SDL(gate)->sdl_type = ifp->if_type; - SDL(gate)->sdl_index = ifp->if_index; if (ln) nd6_llinfo_settimer(ln, 0); if ((rt->rt_flags & RTF_CLONING) != 0) @@ -1061,7 +1053,7 @@ nd6_rtrequest(int req, struct rtentry *r /* FALLTHROUGH */ case RTM_RESOLVE: if (gate->sa_family != AF_LINK || - gate->sa_len < sizeof(null_sdl)) { + gate->sa_len < sizeof(struct sockaddr_dl)) { log(LOG_DEBUG, "%s: bad gateway value: %s\n", __func__, ifp->if_xname); break; @@ -1143,14 +1135,9 @@ nd6_rtrequest(int req, struct rtentry *r ifa = &in6ifa_ifpwithaddr(ifp, &satosin6(rt_key(rt))->sin6_addr)->ia_ifa; if (ifa) { - caddr_t macp = nd6_ifptomac(ifp); nd6_llinfo_settimer(ln, -1); ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; - if (macp) { - memcpy(LLADDR(SDL(gate)), macp, ifp->if_addrlen); - SDL(gate)->sdl_alen = ifp->if_addrlen; - } /* * XXX Since lo0 is in the default rdomain we Index: netinet6/nd6_rtr.c =================================================================== RCS file: /cvs/src/sys/netinet6/nd6_rtr.c,v retrieving revision 1.101 diff -u -p -r1.101 nd6_rtr.c --- netinet6/nd6_rtr.c 25 Mar 2015 17:39:33 -0000 1.101 +++ netinet6/nd6_rtr.c 4 Apr 2015 08:31:34 -0000 @@ -45,6 +45,7 @@ #include <sys/queue.h> #include <net/if.h> +#include <net/if_dl.h> #include <net/if_var.h> #include <net/if_types.h> #include <net/route.h> @@ -1650,6 +1651,7 @@ nd6_prefix_onlink(struct nd_prefix *pr) struct ifaddr *ifa; struct ifnet *ifp = pr->ndpr_ifp; struct sockaddr_in6 mask6; + struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK }; struct nd_prefix *opr; u_long rtflags; int error = 0; @@ -1722,6 +1724,10 @@ nd6_prefix_onlink(struct nd_prefix *pr) bzero(&mask6, sizeof(mask6)); mask6.sin6_len = sizeof(mask6); mask6.sin6_addr = pr->ndpr_mask; + + sa_dl.sdl_type = ifp->if_type; + sa_dl.sdl_index = ifp->if_index; + /* rtrequest1() will probably set RTF_UP, but we're not sure. */ rtflags = RTF_UP; if (nd6_need_cache(ifp)) @@ -1732,7 +1738,7 @@ nd6_prefix_onlink(struct nd_prefix *pr) bzero(&info, sizeof(info)); info.rti_flags = rtflags; info.rti_info[RTAX_DST] = sin6tosa(&pr->ndpr_prefix); - info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl; info.rti_info[RTAX_NETMASK] = sin6tosa(&mask6); error = rtrequest1(RTM_ADD, &info, RTP_CONNECTED, &rt, ifp->if_rdomain);