From: Roopa Prabhu <ro...@cumulusnetworks.com>

Adds support for both RTNH_F_DEAD and RTNH_F_LINKDOWN flags.
This resembles ipv4 fib code. I also picked fib_rebalance from
ipv4. Enabled weights support for nexthop, just because the
infrastructure is already there.

Signed-off-by: Roopa Prabhu <ro...@cumulusnetworks.com>
---
I want to get this in before net-next closes as promised.
I have tested it for the dead/linkdown flags. The multipath selection
and hash calculation in the face of dead routes needs some more
work. I am short on cycles this week and thought of getting some 
early feedback. Hence sending this out as RFC. I will continue with some
more testing.  Robert, I am using your hash algo but it needs some more
work with dead routes. If you already have any thoughts on this, i will
take them. thanks!.


 net/mpls/af_mpls.c  | 228 +++++++++++++++++++++++++++++++++++++++++++++-------
 net/mpls/internal.h |   4 +
 2 files changed, 202 insertions(+), 30 deletions(-)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index c70d750..7db9678 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -27,6 +27,8 @@
  */
 #define MAX_MP_SELECT_LABELS 4
 
+u32 mpls_multipath_secret __read_mostly;
+
 static int zero = 0;
 static int label_limit = (1 << 20) - 1;
 
@@ -96,22 +98,52 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned 
int mtu)
 }
 EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
 
-static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
-                                            struct sk_buff *skb, bool bos)
+static void mpls_multipath_rebalance(struct mpls_route *rt)
+{
+       int total;
+       int w;
+
+       if (rt->rt_nhn < 2)
+               return;
+
+       total = 0;
+       for_nexthops(rt) {
+               if ((nh->nh_flags & RTNH_F_DEAD) ||
+                   (nh->nh_flags & RTNH_F_LINKDOWN))
+                       continue;
+
+               total += nh->nh_weight;
+       } endfor_nexthops(rt);
+
+       w = 0;
+       change_nexthops(rt) {
+               int upper_bound;
+
+               if ((nh->nh_flags & RTNH_F_DEAD) ||
+                   (nh->nh_flags & RTNH_F_LINKDOWN)) {
+                       upper_bound = -1;
+               } else {
+                       w += nh->nh_weight;
+                       upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
+                                                           total) - 1;
+               }
+
+               atomic_set(&nh->nh_upper_bound, upper_bound);
+       } endfor_nexthops(rt);
+
+       net_get_random_once(&mpls_multipath_secret,
+                           sizeof(mpls_multipath_secret));
+}
+
+static u32 mpls_multipath_hash(struct mpls_route *rt,
+                              struct sk_buff *skb, bool bos)
 {
        struct mpls_entry_decoded dec;
        struct mpls_shim_hdr *hdr;
        bool eli_seen = false;
        int label_index;
-       int nh_index = 0;
        u32 hash = 0;
 
-       /* No need to look further into packet if there's only
-        * one path
-        */
-       if (rt->rt_nhn == 1)
-               goto out;
-
        for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos;
             label_index++) {
                if (!pskb_may_pull(skb, sizeof(*hdr) * label_index))
@@ -165,9 +197,29 @@ static struct mpls_nh *mpls_select_multipath(struct 
mpls_route *rt,
                }
        }
 
-       nh_index = hash % rt->rt_nhn;
+       return hash;
+}
+
+static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
+                                            struct sk_buff *skb, bool bos)
+{
+       u32 hash = 0;
+
+       /* No need to look further into packet if there's only
+        * one path
+        */
+       if (rt->rt_nhn == 1)
+               goto out;
+
+       hash = mpls_multipath_hash(rt, skb, bos);
+       for_nexthops(rt) {
+               if (hash > atomic_read(&nh->nh_upper_bound))
+                       continue;
+               return nh;
+       } endfor_nexthops(rt);
+
 out:
-       return &rt->rt_nh[nh_index];
+       return &rt->rt_nh[0];
 }
 
 static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
@@ -577,7 +629,7 @@ errout:
 }
 
 static int mpls_nh_build(struct net *net, struct mpls_route *rt,
-                        struct mpls_nh *nh, int oif,
+                        struct mpls_nh *nh, int oif, int hops,
                         struct nlattr *via, struct nlattr *newdst)
 {
        int err = -ENOMEM;
@@ -597,6 +649,7 @@ static int mpls_nh_build(struct net *net, struct mpls_route 
*rt,
        if (err)
                goto errout;
 
+       nh->nh_weight = hops + 1;
        err = mpls_nh_assign_dev(net, rt, nh, oif);
        if (err)
                goto errout;
@@ -663,10 +716,9 @@ static int mpls_nh_build_multi(struct mpls_route_config 
*cfg,
                if (!rtnh_ok(rtnh, remaining))
                        goto errout;
 
-               /* neither weighted multipath nor any flags
-                * are supported
+               /* flags are not supported
                 */
-               if (rtnh->rtnh_hops || rtnh->rtnh_flags)
+               if (rtnh->rtnh_flags)
                        goto errout;
 
                attrlen = rtnh_attrlen(rtnh);
@@ -681,8 +733,8 @@ static int mpls_nh_build_multi(struct mpls_route_config 
*cfg,
                        goto errout;
 
                err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
-                                   rtnh->rtnh_ifindex, nla_via,
-                                   nla_newdst);
+                                   rtnh->rtnh_ifindex, rtnh->rtnh_hops,
+                                   nla_via, nla_newdst);
                if (err)
                        goto errout;
 
@@ -875,34 +927,111 @@ free:
        return ERR_PTR(err);
 }
 
-static void mpls_ifdown(struct net_device *dev)
+static void mpls_ifdown(struct net_device *dev, int event)
 {
        struct mpls_route __rcu **platform_label;
        struct net *net = dev_net(dev);
-       struct mpls_dev *mdev;
        unsigned index;
+       int dead;
 
        platform_label = rtnl_dereference(net->mpls.platform_label);
        for (index = 0; index < net->mpls.platform_labels; index++) {
                struct mpls_route *rt = rtnl_dereference(platform_label[index]);
+               int changed = 0;
+
                if (!rt)
                        continue;
+               dead = 0;
                for_nexthops(rt) {
+                       if ((event == NETDEV_DOWN &&
+                            (nh->nh_flags & RTNH_F_DEAD)) ||
+                            (event == NETDEV_CHANGE &&
+                            (nh->nh_flags & RTNH_F_LINKDOWN))) {
+                               dead++;
+                               continue;
+                       }
+
                        if (rtnl_dereference(nh->nh_dev) != dev)
                                continue;
-                       nh->nh_dev = NULL;
+                       switch (event) {
+                       case NETDEV_DOWN:
+                       case NETDEV_UNREGISTER:
+                               nh->nh_flags |= RTNH_F_DEAD;
+                               /* fall through */
+                       case NETDEV_CHANGE:
+                               nh->nh_flags |= RTNH_F_LINKDOWN;
+                               changed = 1;
+                               break;
+                       }
+                       if (event == NETDEV_UNREGISTER) {
+                               nh->nh_dev = NULL;
+                               dead = rt->rt_nhn;
+                               changed = 1;
+                               break;
+                       }
+                       dead++;
                } endfor_nexthops(rt);
+
+               if (dead == rt->rt_nhn) {
+                       switch (event) {
+                       case NETDEV_DOWN:
+                       case NETDEV_UNREGISTER:
+                               rt->rt_flags |= RTNH_F_DEAD;
+                               /* fall through */
+                       case NETDEV_CHANGE:
+                               rt->rt_flags |= RTNH_F_LINKDOWN;
+                               changed = 1;
+                               break;
+                       }
+               }
+
+               if (changed)
+                       mpls_multipath_rebalance(rt);
        }
 
-       mdev = mpls_dev_get(dev);
-       if (!mdev)
-               return;
+       return;
+}
+
+static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
+{
+       struct mpls_route __rcu **platform_label;
+       struct net *net = dev_net(dev);
+       unsigned index;
+       int alive;
+
+       platform_label = rtnl_dereference(net->mpls.platform_label);
+       for (index = 0; index < net->mpls.platform_labels; index++) {
+               struct mpls_route *rt = rtnl_dereference(platform_label[index]);
+               int changed = 0;
+
+               if (!rt)
+                       continue;
+               alive = 0;
+               for_nexthops(rt) {
+                       struct net_device *nh_dev =
+                               rtnl_dereference(nh->nh_dev);
+
+                       if (!(nh->nh_flags & nh_flags)) {
+                               alive++;
+                               continue;
+                       }
+                       if (nh_dev != dev)
+                               continue;
+                       alive++;
+                       nh->nh_flags &= ~nh_flags;
+                       changed = 1;
+               } endfor_nexthops(rt);
 
-       mpls_dev_sysctl_unregister(mdev);
+               if (alive > 0) {
+                       rt->rt_flags &= ~nh_flags;
+                       changed = 1;
+               }
 
-       RCU_INIT_POINTER(dev->mpls_ptr, NULL);
+               if (changed)
+                       mpls_multipath_rebalance(rt);
+       }
 
-       kfree_rcu(mdev, rcu);
+       return;
 }
 
 static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
@@ -910,9 +1039,9 @@ static int mpls_dev_notify(struct notifier_block *this, 
unsigned long event,
 {
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct mpls_dev *mdev;
+       unsigned int flags;
 
-       switch(event) {
-       case NETDEV_REGISTER:
+       if (event == NETDEV_REGISTER) {
                /* For now just support ethernet devices */
                if ((dev->type == ARPHRD_ETHER) ||
                    (dev->type == ARPHRD_LOOPBACK)) {
@@ -920,10 +1049,39 @@ static int mpls_dev_notify(struct notifier_block *this, 
unsigned long event,
                        if (IS_ERR(mdev))
                                return notifier_from_errno(PTR_ERR(mdev));
                }
-               break;
+               return NOTIFY_OK;
+       }
 
+       mdev = mpls_dev_get(dev);
+       if (!mdev)
+               return NOTIFY_OK;
+
+       switch (event) {
+       case NETDEV_DOWN:
+               mpls_ifdown(dev, event);
+               break;
+       case NETDEV_UP:
+               flags = dev_get_flags(dev);
+               if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+                       mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
+               else
+                       mpls_ifup(dev, RTNH_F_DEAD);
+               break;
+       case NETDEV_CHANGE:
+               flags = dev_get_flags(dev);
+               if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+                       mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
+               else
+                       mpls_ifdown(dev, event);
+               break;
        case NETDEV_UNREGISTER:
-               mpls_ifdown(dev);
+               mpls_ifdown(dev, event);
+               mdev = mpls_dev_get(dev);
+               if (mdev) {
+                       mpls_dev_sysctl_unregister(mdev);
+                       RCU_INIT_POINTER(dev->mpls_ptr, NULL);
+                       kfree_rcu(mdev, rcu);
+               }
                break;
        case NETDEV_CHANGENAME:
                mdev = mpls_dev_get(dev);
@@ -1237,6 +1395,10 @@ static int mpls_dump_route(struct sk_buff *skb, u32 
portid, u32 seq, int event,
                dev = rtnl_dereference(nh->nh_dev);
                if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
                        goto nla_put_failure;
+               if (nh->nh_flags & RTNH_F_LINKDOWN)
+                       rtm->rtm_flags |= RTNH_F_LINKDOWN;
+               if (nh->nh_flags & RTNH_F_DEAD)
+                       rtm->rtm_flags |= RTNH_F_DEAD;
        } else {
                struct rtnexthop *rtnh;
                struct nlattr *mp;
@@ -1253,6 +1415,12 @@ static int mpls_dump_route(struct sk_buff *skb, u32 
portid, u32 seq, int event,
                        dev = rtnl_dereference(nh->nh_dev);
                        if (dev)
                                rtnh->rtnh_ifindex = dev->ifindex;
+                       if (nh->nh_flags & RTNH_F_LINKDOWN)
+                               rtnh->rtnh_flags |= RTNH_F_LINKDOWN;
+                       if (nh->nh_flags & RTNH_F_DEAD)
+                               rtnh->rtnh_flags |= RTNH_F_DEAD;
+
+                       rtnh->rtnh_hops = nh->nh_weight - 1;
                        if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST,
                                                            nh->nh_labels,
                                                            nh->nh_label))
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index bde52ce..7014032 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -41,6 +41,9 @@ enum mpls_payload_type {
 
 struct mpls_nh { /* next hop label forwarding entry */
        struct net_device __rcu *nh_dev;
+       unsigned int            nh_flags;
+       int                     nh_weight;
+       atomic_t                nh_upper_bound;
        u32                     nh_label[MAX_NEW_LABELS];
        u8                      nh_labels;
        u8                      nh_via_alen;
@@ -70,6 +73,7 @@ struct mpls_nh { /* next hop label forwarding entry */
  */
 struct mpls_route { /* next hop label forwarding entry */
        struct rcu_head         rt_rcu;
+       unsigned int            rt_flags;
        u8                      rt_protocol;
        u8                      rt_payload_type;
        u8                      rt_max_alen;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to