On 08.03.2013 01:42, John-Mark Gurney wrote:
Andre Oppermann wrote this message on Thu, Mar 07, 2013 at 08:39 +0100:
Adding interface address is handled via atomically deleting old prefix and
adding interface one.

This brings up a long standing sore point of our routing code
which this patch makes more pronounced.  When an interface link
state is down I don't want the route to it to persist but to
become inactive so another path can be chosen.  This the very
point of running a routing daemon.  So on the link-down event
the installed interface routes should be removed from the routing
table.  The configured addresses though should persist and the
interface routes re-installed on a link-up event.  What's your
opinion on it?

Other than these points I think your code is fine and can go
into the tree.

The issue that I see with this is that if you bump your cable, all
your connections will be dropped, because as soon as they try to send
something, they'll get a no route to host, and this will break the
TCP connection...  If we keep the routes when the link goes down,
the packet will be queued or dropped (depending upon ethernet driver),
but the TCP connection will not break...
Yes. Older one using if_start with OS queue should queue traffic, while if_transmit ones probably drop it.

So this behavior should be configurable depending on OS role.

Patch attached. Other issues like carp, IPv6 or similar can arise, so this definitely deserves wider discussion.



Index: sys/net/if.c
===================================================================
--- sys/net/if.c        (revision 247623)
+++ sys/net/if.c        (working copy)
@@ -112,6 +112,12 @@ SYSCTL_INT(_net_link, OID_AUTO, log_link_state_cha
        &log_link_state_change, 0,
        "log interface link state change events");
 
+static VNET_DEFINE(int, remove_if_routes) = 0;
+#define        V_remove_if_routes      VNET(remove_if_routes)
+SYSCTL_VNET_INT(_net_link, OID_AUTO, remove_iface_routes_on_change, CTLFLAG_RW,
+    &VNET_NAME(remove_if_routes), 0,
+    "Remove iface routes on link state change");
+
 /* Interface description */
 static unsigned int ifdescr_maxlen = 1024;
 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW,
@@ -161,10 +167,10 @@ static int        ifconf(u_long, caddr_t);
 static void    if_freemulti(struct ifmultiaddr *);
 static void    if_init(void *);
 static void    if_grow(void);
-static void    if_route(struct ifnet *, int flag, int fam);
+static void    if_route(struct ifnet *, int fam);
 static int     if_setflag(struct ifnet *, int, int, int *, int);
 static int     if_transmit(struct ifnet *ifp, struct mbuf *m);
-static void    if_unroute(struct ifnet *, int flag, int fam);
+static void    if_unroute(struct ifnet *, int fam);
 static void    link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static int     if_rtdel(struct radix_node *, void *);
 static int     ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
@@ -1834,22 +1841,13 @@ link_rtrequest(int cmd, struct rtentry *rt, struct
  * the transition.
  */
 static void
-if_unroute(struct ifnet *ifp, int flag, int fam)
+if_unroute(struct ifnet *ifp, int fam)
 {
        struct ifaddr *ifa;
 
-       KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
-
-       ifp->if_flags &= ~flag;
-       getmicrotime(&ifp->if_lastchange);
        TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
                if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
                        pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
-       ifp->if_qflush(ifp);
-
-       if (ifp->if_carp)
-               (*carp_linkstate_p)(ifp);
-       rt_ifmsg(ifp);
 }
 
 /*
@@ -1857,23 +1855,13 @@ static void
  * the transition.
  */
 static void
-if_route(struct ifnet *ifp, int flag, int fam)
+if_route(struct ifnet *ifp, int fam)
 {
        struct ifaddr *ifa;
 
-       KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
-
-       ifp->if_flags |= flag;
-       getmicrotime(&ifp->if_lastchange);
        TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
                if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
                        pfctlinput(PRC_IFUP, ifa->ifa_addr);
-       if (ifp->if_carp)
-               (*carp_linkstate_p)(ifp);
-       rt_ifmsg(ifp);
-#ifdef INET6
-       in6_if_up(ifp);
-#endif
 }
 
 void   (*vlan_link_state_p)(struct ifnet *);   /* XXX: private from if_vlan */
@@ -1909,8 +1897,19 @@ do_link_state_change(void *arg, int pending)
        int link_state = ifp->if_link_state;
        CURVNET_SET(ifp->if_vnet);
 
+       /* Remove routes if link goes down */
+       if (V_remove_if_routes != 0 && link_state == LINK_STATE_DOWN &&
+           (ifp->if_flags & IFF_UP))
+                       if_unroute(ifp, PF_UNSPEC);
+
        /* Notify that the link state has changed. */
        rt_ifmsg(ifp);
+
+       /* Announce routes IFF Oper & Admin state is UP */
+       if (V_remove_if_routes != 0 && link_state == LINK_STATE_UP &&
+           (ifp->if_flags & IFF_UP))
+                       if_route(ifp, PF_UNSPEC);
+
        if (ifp->if_vlantrunk != NULL)
                (*vlan_link_state_p)(ifp);
 
@@ -1945,7 +1944,16 @@ void
 if_down(struct ifnet *ifp)
 {
 
-       if_unroute(ifp, IFF_UP, AF_UNSPEC);
+       ifp->if_flags &= ~IFF_UP;
+       getmicrotime(&ifp->if_lastchange);
+
+       if_unroute(ifp, AF_UNSPEC);
+
+       ifp->if_qflush(ifp);
+
+       if (ifp->if_carp)
+               (*carp_linkstate_p)(ifp);
+       rt_ifmsg(ifp);
 }
 
 /*
@@ -1956,7 +1964,18 @@ void
 if_up(struct ifnet *ifp)
 {
 
-       if_route(ifp, IFF_UP, AF_UNSPEC);
+       ifp->if_flags |= IFF_UP;
+       getmicrotime(&ifp->if_lastchange);
+
+       if (V_remove_if_routes == 0 || ifp->if_link_state == LINK_STATE_UP)
+               if_route(ifp, AF_UNSPEC);
+
+       if (ifp->if_carp)
+               (*carp_linkstate_p)(ifp);
+       rt_ifmsg(ifp);
+#ifdef INET6
+       in6_if_up(ifp);
+#endif
 }
 
 /*
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "[email protected]"

Reply via email to