On Wed, Mar 18, 2015 at 05:46:34AM +0100, Claudio Jeker wrote:
> On Tue, Mar 17, 2015 at 05:35:21PM +0100, Martin Pieuchot wrote:
> > On 12/02/15(Thu) 12:35, Martin Pieuchot wrote:
> > > On 10/02/15(Tue) 03:04, Claudio Jeker wrote:
> > > > There is no need to not allow the same network to be configured more 
> > > > then
> > > > once. Instead just rely on the multipath and priority handling of the
> > > > routing table to select the right route.
> > > > Additionally this removes cloned routes (arp/npd cache) when the 
> > > > interface
> > > > goes down or when the any of the multipath cloning route is changed.
> > > > 
> > > > With this it is possible to run 2 dhclients on wired and wireless with a
> > > > bridged network. Active TCP sessions still fail when the cable is
> > > > unplugged. To fix this more is needed.
> > > > 
> > > > This changes a fundamental part of the network stack and therefor broad
> > > > testing is needed to find all the hidden dragons.
> > > 
> > > Here's version of the diff rebased on top of the recent changes.
> > 
> > I think it's the time to get this in, then as a second step put the
> > dhclient(8) bits.
> > 
> > Claudio you have my ok.
> 
> It is broken for IPv6 and I could not find the proper fix yet. I think I
> now why it goes wrong but the nd6 code is a nightmare.
> 
> I will send out a new diff once I have IPv6 fixed.
>  

Unsurprisingly IPv6 needs to be special and is not using rt_ifa_add or
rt_ifa_del in all cases. There are three special cases that do the same
dance but use ifa->ifa_addr as the gateway and because of this the
resulting interface routes are not catched by the nd6 code (RTF_LLINFO is
missing). When the routes are then cloned nd6 is not invoced and
everything points back to the host. Oups.
The following updated diff seems to fix this but I only minimally tested
the IPv6 part. People using IPv6 may want to give this a spin.

IMO the net/if_var.h and netinet/ip_carp.c changes could be commited
before the rest since there should be no noticeable change in how carp
works.
-- 
:wq Claudio

Index: net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.24
diff -u -p -r1.24 if_var.h
--- net/if_var.h        7 Apr 2015 10:46:20 -0000       1.24
+++ net/if_var.h        12 Apr 2015 11:47:03 -0000
@@ -389,6 +389,7 @@ do {                                                        
                \
 /* default interface priorities */
 #define IF_WIRED_DEFAULT_PRIORITY      0
 #define IF_WIRELESS_DEFAULT_PRIORITY   4
+#define IF_CARP_DEFAULT_PRIORITY       15
 
 /*
  * Network stack input queues.
Index: net/route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.208
diff -u -p -r1.208 route.c
--- net/route.c 26 Mar 2015 11:02:44 -0000      1.208
+++ net/route.c 4 Apr 2015 08:31:34 -0000
@@ -554,6 +554,16 @@ rtdeletemsg(struct rtentry *rt, u_int ta
        return (error);
 }
 
+static inline int
+rtequal(struct rtentry *a, struct rtentry *b)
+{
+       if (memcmp(rt_key(a), rt_key(b), rt_key(a)->sa_len) == 0 &&
+           memcmp(rt_mask(a), rt_mask(b), rt_mask(a)->sa_len) == 0)
+               return 1;
+       else
+               return 0;
+}
+
 int
 rtflushclone1(struct radix_node *rn, void *arg, u_int id)
 {
@@ -561,7 +571,8 @@ rtflushclone1(struct radix_node *rn, voi
 
        rt = (struct rtentry *)rn;
        parent = (struct rtentry *)arg;
-       if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent == parent)
+       if ((rt->rt_flags & RTF_CLONED) != 0 && (rt->rt_parent == parent ||
+           rtequal(rt->rt_parent, parent)))
                rtdeletemsg(rt, id);
        return 0;
 }
@@ -1106,16 +1117,20 @@ rt_ifa_add(struct ifaddr *ifa, int flags
 {
        struct rtentry          *rt, *nrt = NULL;
        struct sockaddr_rtlabel  sa_rl;
+       struct sockaddr_dl       sa_dl = { sizeof(sa_dl), AF_LINK };
        struct rt_addrinfo       info;
        u_short                  rtableid = ifa->ifa_ifp->if_rdomain;
-       u_int8_t                 prio = RTP_CONNECTED;
+       u_int8_t                 prio = ifa->ifa_ifp->if_priority + RTP_STATIC;
        int                      error;
 
+       sa_dl.sdl_type = ifa->ifa_ifp->if_type;
+       sa_dl.sdl_index = ifa->ifa_ifp->if_index;
+
        memset(&info, 0, sizeof(info));
        info.rti_ifa = ifa;
-       info.rti_flags = flags;
+       info.rti_flags = flags | RTF_MPATH;
        info.rti_info[RTAX_DST] = dst;
-       info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
+       info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl;
        info.rti_info[RTAX_LABEL] =
            rtlabel_id2sa(ifa->ifa_ifp->if_rtlabelid, &sa_rl);
 
@@ -1170,8 +1185,9 @@ rt_ifa_del(struct ifaddr *ifa, int flags
        struct sockaddr         *deldst;
        struct rt_addrinfo       info;
        struct sockaddr_rtlabel  sa_rl;
+       struct sockaddr_dl       sa_dl = { sizeof(sa_dl), AF_LINK };
        u_short                  rtableid = ifa->ifa_ifp->if_rdomain;
-       u_int8_t                 prio = RTP_CONNECTED;
+       u_int8_t                 prio = ifa->ifa_ifp->if_priority + RTP_STATIC;
        int                      error;
 
 #ifdef MPLS
@@ -1202,10 +1218,14 @@ rt_ifa_del(struct ifaddr *ifa, int flags
                }
        }
 
+       sa_dl.sdl_type = ifa->ifa_ifp->if_type;
+       sa_dl.sdl_index = ifa->ifa_ifp->if_index;
+
        memset(&info, 0, sizeof(info));
        info.rti_ifa = ifa;
        info.rti_flags = flags;
        info.rti_info[RTAX_DST] = dst;
+       info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl;
        info.rti_info[RTAX_LABEL] =
            rtlabel_id2sa(ifa->ifa_ifp->if_rtlabelid, &sa_rl);
 
@@ -1710,6 +1730,15 @@ rt_if_linkstate_change(struct radix_node
                        }
                } else {
                        if (rt->rt_flags & RTF_UP) {
+                               /*
+                                * Remove cloned routes (mainly arp) to
+                                * down interfaces so we have a chance to
+                                * clone a new route from a better source.
+                                */
+                               if (rt->rt_flags & RTF_CLONED) {
+                                       rtdeletemsg(rt, id);
+                                       return (0);
+                               }
                                /* take route down */
                                rt->rt_flags &= ~RTF_UP;
                                rn_mpath_reprio(rn, rt->rt_priority | RTP_DOWN);
Index: netinet/if_ether.c
===================================================================
RCS file: /cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.150
diff -u -p -r1.150 if_ether.c
--- netinet/if_ether.c  10 Apr 2015 13:58:20 -0000      1.150
+++ netinet/if_ether.c  12 Apr 2015 11:47:03 -0000
@@ -121,8 +121,6 @@ void        db_print_llinfo(caddr_t);
 int    db_show_radix_node(struct radix_node *, void *, u_int);
 #endif
 
-static const struct sockaddr_dl null_sdl = { sizeof(null_sdl), AF_LINK };
-
 /*
  * Timeout routine.  Age arp_tab entries periodically.
  */
@@ -190,14 +188,6 @@ arp_rtrequest(int req, struct rtentry *r
                if (rt->rt_flags & RTF_CLONING ||
                    ((rt->rt_flags & (RTF_LLINFO | RTF_LOCAL)) && !la)) {
                        /*
-                        * Case 1: This route should come from a route to iface.
-                        */
-                       rt_setgate(rt, (struct sockaddr *)&null_sdl,
-                           ifp->if_rdomain);
-                       gate = rt->rt_gateway;
-                       SDL(gate)->sdl_type = ifp->if_type;
-                       SDL(gate)->sdl_index = ifp->if_index;
-                       /*
                         * Give this route an expiration time, even though
                         * it's a "permanent" route, so that routes cloned
                         * from it do not need their expiration time set.
@@ -261,10 +251,6 @@ arp_rtrequest(int req, struct rtentry *r
                }
                if (ifa) {
                        rt->rt_expire = 0;
-                       SDL(gate)->sdl_alen = ETHER_ADDR_LEN;
-                       memcpy(LLADDR(SDL(gate)),
-                           ((struct arpcom *)ifp)->ac_enaddr, ETHER_ADDR_LEN);
-
                        /*
                         * XXX Since lo0 is in the default rdomain we
                         * should not (ab)use it for any route related
Index: netinet/in.c
===================================================================
RCS file: /cvs/src/sys/netinet/in.c,v
retrieving revision 1.115
diff -u -p -r1.115 in.c
--- netinet/in.c        12 Jan 2015 13:51:45 -0000      1.115
+++ netinet/in.c        10 Feb 2015 01:50:22 -0000
@@ -93,8 +93,6 @@ int in_lifaddr_ioctl(struct socket *, u_
        struct ifnet *);
 
 void in_purgeaddr(struct ifaddr *);
-int in_addprefix(struct in_ifaddr *);
-int in_scrubprefix(struct in_ifaddr *);
 int in_addhost(struct in_ifaddr *, struct sockaddr_in *);
 int in_scrubhost(struct in_ifaddr *, struct sockaddr_in *);
 int in_insert_prefix(struct in_ifaddr *);
@@ -590,7 +588,8 @@ in_ifscrub(struct ifnet *ifp, struct in_
        if (ISSET(ifp->if_flags, IFF_POINTOPOINT))
                in_scrubhost(ia, &ia->ia_dstaddr);
        else if (!ISSET(ifp->if_flags, IFF_LOOPBACK))
-               in_scrubprefix(ia);
+               if (ia->ia_flags & IFA_ROUTE)
+                       in_remove_prefix(ia);
 }
 
 /*
@@ -669,7 +668,7 @@ in_ifinit(struct ifnet *ifp, struct in_i
                        goto out;
                error = in_addhost(ia, &ia->ia_dstaddr);
        } else if (!ISSET(ifp->if_flags, IFF_LOOPBACK)) {
-               error = in_addprefix(ia);
+               error = in_insert_prefix(ia);
        }
 
        /*
@@ -759,125 +758,6 @@ in_remove_prefix(struct in_ifaddr *ia)
                    ifa->ifa_broadaddr);
 
        ia->ia_flags &= ~IFA_ROUTE;
-}
-
-/*
- * add a route to prefix ("connected route" in cisco terminology).
- * does nothing if there's some interface address with the same prefix already.
- */
-int
-in_addprefix(struct in_ifaddr *ia0)
-{
-       struct ifnet *ifp;
-       struct ifaddr *ifa;
-       struct in_ifaddr *ia;
-       struct in_addr prefix, mask, p, m;
-
-       prefix = ia0->ia_addr.sin_addr;
-       mask = ia0->ia_sockmask.sin_addr;
-       prefix.s_addr &= mask.s_addr;
-
-       TAILQ_FOREACH(ifp, &ifnet, if_list) {
-               if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
-                       continue;
-
-               if (ifp->if_rdomain != ia0->ia_ifp->if_rdomain)
-                       continue;
-
-               TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
-                       if (ifa->ifa_addr->sa_family != AF_INET)
-                               continue;
-
-                       ia = ifatoia(ifa);
-
-                       if ((ia->ia_flags & IFA_ROUTE) == 0)
-                               continue;
-
-                       p = ia->ia_addr.sin_addr;
-                       m = ia->ia_sockmask.sin_addr;
-                       p.s_addr &= m.s_addr;
-
-                       if (prefix.s_addr != p.s_addr ||
-                           mask.s_addr != m.s_addr)
-                               continue;
-
-#if NCARP > 0
-                       /* move to a real interface instead of carp interface */
-                       if (ia->ia_ifp->if_type == IFT_CARP &&
-                           ia0->ia_ifp->if_type != IFT_CARP) {
-                               in_remove_prefix(ia);
-                               break;
-                       }
-#endif
-                       /*
-                        * If we got a matching prefix route inserted by other
-                        * interface address, we don't need to bother
-                        */
-                       return (0);
-               }
-       }
-
-       /*
-        * noone seem to have prefix route.  insert it.
-        */
-       return in_insert_prefix(ia0);
-}
-
-/*
- * remove a route to prefix ("connected route" in cisco terminology).
- * re-installs the route by using another interface address, if there's one
- * with the same prefix (otherwise we lose the route mistakenly).
- */
-int
-in_scrubprefix(struct in_ifaddr *ia0)
-{
-       struct ifnet *ifp;
-       struct ifaddr *ifa;
-       struct in_ifaddr *ia;
-       struct in_addr prefix, mask, p, m;
-
-       if ((ia0->ia_flags & IFA_ROUTE) == 0)
-               return 0;
-
-       prefix = ia0->ia_addr.sin_addr;
-       mask = ia0->ia_sockmask.sin_addr;
-       prefix.s_addr &= mask.s_addr;
-
-       TAILQ_FOREACH(ifp, &ifnet, if_list) {
-               if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
-                       continue;
-
-               if (ifp->if_rdomain != ia0->ia_ifp->if_rdomain)
-                       continue;
-
-               TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
-                       if (ifa->ifa_addr->sa_family != AF_INET)
-                               continue;
-
-                       ia = ifatoia(ifa);
-
-                       if ((ia->ia_flags & IFA_ROUTE) != 0)
-                               continue;
-
-                       p = ia->ia_addr.sin_addr;
-                       m = ia->ia_sockmask.sin_addr;
-                       p.s_addr &= m.s_addr;
-
-                       if (prefix.s_addr != p.s_addr ||
-                           mask.s_addr != m.s_addr)
-                               continue;
-
-                       /* Move IFA_ROUTE to the matching prefix route. */
-                       in_remove_prefix(ia0);
-                       return (in_insert_prefix(ia));
-               }
-       }
-
-       /*
-        * noone seem to have prefix route.  remove it.
-        */
-       in_remove_prefix(ia0);
-       return 0;
 }
 
 /*
Index: netinet/ip_carp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.251
diff -u -p -r1.251 ip_carp.c
--- netinet/ip_carp.c   15 Apr 2015 15:16:17 -0000      1.251
+++ netinet/ip_carp.c   15 Apr 2015 20:45:40 -0000
@@ -751,6 +751,7 @@ carp_clone_create(ifc, unit)
        ether_ifattach(ifp);
        ifp->if_type = IFT_CARP;
        ifp->if_output = carp_output;
+       ifp->if_priority = IF_CARP_DEFAULT_PRIORITY;
 
        /* Hook carp_addr_updated to cope with address and route changes. */
        sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0,
Index: netinet6/in6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/in6.c,v
retrieving revision 1.154
diff -u -p -r1.154 in6.c
--- netinet6/in6.c      14 Mar 2015 03:38:52 -0000      1.154
+++ netinet6/in6.c      18 Mar 2015 18:03:16 -0000
@@ -78,6 +78,7 @@
 #include <sys/syslog.h>
 
 #include <net/if.h>
+#include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
@@ -827,6 +828,10 @@ in6_update_ifa(struct ifnet *ifp, struct
 
                /* join solicited multicast addr for new host id */
                struct sockaddr_in6 llsol;
+               struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK };
+
+               sa_dl.sdl_type = ifp->if_type;
+               sa_dl.sdl_index = ifp->if_index;
 
                bzero(&llsol, sizeof(llsol));
                llsol.sin6_family = AF_INET6;
@@ -887,7 +892,7 @@ in6_update_ifa(struct ifnet *ifp, struct
 
                        bzero(&info, sizeof(info));
                        info.rti_info[RTAX_DST] = sin6tosa(&mltaddr);
-                       info.rti_info[RTAX_GATEWAY] = sin6tosa(&ia6->ia_addr);
+                       info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl;
                        info.rti_info[RTAX_NETMASK] = sin6tosa(&mltmask);
                        info.rti_info[RTAX_IFA] = sin6tosa(&ia6->ia_addr);
                        /* XXX: we need RTF_CLONING to fake nd6_rtrequest */
@@ -956,7 +961,7 @@ in6_update_ifa(struct ifnet *ifp, struct
 
                        bzero(&info, sizeof(info));
                        info.rti_info[RTAX_DST] = sin6tosa(&mltaddr);
-                       info.rti_info[RTAX_GATEWAY] = sin6tosa(&ia6->ia_addr);
+                       info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl;
                        info.rti_info[RTAX_NETMASK] = sin6tosa(&mltmask);
                        info.rti_info[RTAX_IFA] = sin6tosa(&ia6->ia_addr);
                        info.rti_flags = RTF_UP | RTF_CLONING;
Index: netinet6/nd6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/nd6.c,v
retrieving revision 1.133
diff -u -p -r1.133 nd6.c
--- netinet6/nd6.c      25 Mar 2015 17:39:33 -0000      1.133
+++ netinet6/nd6.c      4 Apr 2015 08:31:34 -0000
@@ -667,6 +667,7 @@ nd6_lookup(struct in6_addr *addr6, int c
        }
        if (!rt) {
                if (create && ifp) {
+                       struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK };
                        struct rt_addrinfo info;
                        int e;
 
@@ -682,6 +683,9 @@ nd6_lookup(struct in6_addr *addr6, int c
                        if (ifa == NULL)
                                return (NULL);
 
+                       sa_dl.sdl_type = ifp->if_type;
+                       sa_dl.sdl_index = ifp->if_index;
+
                        /*
                         * Create a new route.  RTF_LLINFO is necessary
                         * to create a Neighbor Cache entry for the
@@ -691,7 +695,7 @@ nd6_lookup(struct in6_addr *addr6, int c
                        bzero(&info, sizeof(info));
                        info.rti_flags = RTF_UP | RTF_HOST | RTF_LLINFO;
                        info.rti_info[RTAX_DST] = sin6tosa(&sin6);
-                       info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
+                       info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl;
                        if ((e = rtrequest1(RTM_ADD, &info, RTP_CONNECTED,
                            &rt, rtableid)) != 0) {
 #if 0
@@ -956,7 +960,6 @@ nd6_rtrequest(int req, struct rtentry *r
 {
        struct sockaddr *gate = rt->rt_gateway;
        struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
-       static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
        struct ifnet *ifp = rt->rt_ifp;
        struct ifaddr *ifa;
        struct nd_defrouter *dr;
@@ -1015,17 +1018,6 @@ nd6_rtrequest(int req, struct rtentry *r
                 */
                if ((rt->rt_flags & RTF_CLONING) ||
                    ((rt->rt_flags & (RTF_LLINFO | RTF_LOCAL)) && !ln)) {
-                       /*
-                        * Case 1: This route should come from a route to
-                        * interface (RTF_CLONING case) or the route should be
-                        * treated as on-link but is currently not
-                        * (RTF_LLINFO && !ln case).
-                        */
-                       rt_setgate(rt, (struct sockaddr *)&null_sdl,
-                           ifp->if_rdomain);
-                       gate = rt->rt_gateway;
-                       SDL(gate)->sdl_type = ifp->if_type;
-                       SDL(gate)->sdl_index = ifp->if_index;
                        if (ln)
                                nd6_llinfo_settimer(ln, 0);
                        if ((rt->rt_flags & RTF_CLONING) != 0)
@@ -1061,7 +1053,7 @@ nd6_rtrequest(int req, struct rtentry *r
                /* FALLTHROUGH */
        case RTM_RESOLVE:
                if (gate->sa_family != AF_LINK ||
-                   gate->sa_len < sizeof(null_sdl)) {
+                   gate->sa_len < sizeof(struct sockaddr_dl)) {
                        log(LOG_DEBUG, "%s: bad gateway value: %s\n",
                            __func__, ifp->if_xname);
                        break;
@@ -1143,14 +1135,9 @@ nd6_rtrequest(int req, struct rtentry *r
                ifa = &in6ifa_ifpwithaddr(ifp,
                    &satosin6(rt_key(rt))->sin6_addr)->ia_ifa;
                if (ifa) {
-                       caddr_t macp = nd6_ifptomac(ifp);
                        nd6_llinfo_settimer(ln, -1);
                        ln->ln_state = ND6_LLINFO_REACHABLE;
                        ln->ln_byhint = 0;
-                       if (macp) {
-                               memcpy(LLADDR(SDL(gate)), macp, 
ifp->if_addrlen);
-                               SDL(gate)->sdl_alen = ifp->if_addrlen;
-                       }
 
                        /*
                         * XXX Since lo0 is in the default rdomain we
Index: netinet6/nd6_rtr.c
===================================================================
RCS file: /cvs/src/sys/netinet6/nd6_rtr.c,v
retrieving revision 1.101
diff -u -p -r1.101 nd6_rtr.c
--- netinet6/nd6_rtr.c  25 Mar 2015 17:39:33 -0000      1.101
+++ netinet6/nd6_rtr.c  4 Apr 2015 08:31:34 -0000
@@ -45,6 +45,7 @@
 #include <sys/queue.h>
 
 #include <net/if.h>
+#include <net/if_dl.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
@@ -1650,6 +1651,7 @@ nd6_prefix_onlink(struct nd_prefix *pr)
        struct ifaddr *ifa;
        struct ifnet *ifp = pr->ndpr_ifp;
        struct sockaddr_in6 mask6;
+       struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK };
        struct nd_prefix *opr;
        u_long rtflags;
        int error = 0;
@@ -1722,6 +1724,10 @@ nd6_prefix_onlink(struct nd_prefix *pr)
        bzero(&mask6, sizeof(mask6));
        mask6.sin6_len = sizeof(mask6);
        mask6.sin6_addr = pr->ndpr_mask;
+
+       sa_dl.sdl_type = ifp->if_type;
+       sa_dl.sdl_index = ifp->if_index;
+
        /* rtrequest1() will probably set RTF_UP, but we're not sure. */
        rtflags = RTF_UP;
        if (nd6_need_cache(ifp))
@@ -1732,7 +1738,7 @@ nd6_prefix_onlink(struct nd_prefix *pr)
        bzero(&info, sizeof(info));
        info.rti_flags = rtflags;
        info.rti_info[RTAX_DST] = sin6tosa(&pr->ndpr_prefix);
-       info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
+       info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&sa_dl;
        info.rti_info[RTAX_NETMASK] = sin6tosa(&mask6);
 
        error = rtrequest1(RTM_ADD, &info, RTP_CONNECTED, &rt, ifp->if_rdomain);

Reply via email to