On Tue, May 9, 2017 at 1:56 PM, Cong Wang <xiyou.wangc...@gmail.com> wrote:
> Wait... if we transfer dst->dev to loopback_dev because we don't
> want to block unregister path, then we might have a similar problem
> for rt->fi too, fib_info is still referenced by dst, so these nh_dev's still
> hold the dev references...
>

I finally come up with the attach patch... Do you mind to give it a try?
commit a431b4d969f617c5ef8711b6bf493199eecec759
Author: Cong Wang <xiyou.wangc...@gmail.com>
Date:   Tue May 9 14:35:10 2017 -0700

    ipv4: restore rt->fi for reference counting, try #2
    
    Signed-off-by: Cong Wang <xiyou.wangc...@gmail.com>

diff --git a/include/net/route.h b/include/net/route.h
index 2cc0e14..4335eb7 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -69,6 +69,7 @@ struct rtable {
 
        struct list_head        rt_uncached;
        struct uncached_list    *rt_uncached_list;
+       struct fib_info         *fi; /* for refcnt to shared metrics */
 };
 
 static inline bool rt_is_input_route(const struct rtable *rt)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index da449dd..d77c453 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1429,8 +1429,15 @@ int fib_sync_down_dev(struct net_device *dev, unsigned 
long event, bool force)
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
                        if (event == NETDEV_UNREGISTER &&
                            nexthop_nh->nh_dev == dev) {
+                               /* This should be fine, we are on unregister
+                                * path so synchronize_net() already waits for
+                                * existing readers. We have to release the
+                                * dev here because dst could still hold this
+                                * fib_info via rt->fi, we can't wait for GC.
+                                */
+                               RCU_INIT_POINTER(nexthop_nh->nh_dev, NULL);
+                               dev_put(dev);
                                dead = fi->fib_nhs;
-                               break;
                        }
 #endif
                } endfor_nexthops(fi)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 655d9ee..f647310 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1387,6 +1387,11 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 {
        struct rtable *rt = (struct rtable *) dst;
 
+       if (rt->fi) {
+               fib_info_put(rt->fi);
+               rt->fi = NULL;
+       }
+
        if (!list_empty(&rt->rt_uncached)) {
                struct uncached_list *ul = rt->rt_uncached_list;
 
@@ -1424,6 +1429,16 @@ static bool rt_cache_valid(const struct rtable *rt)
                !rt_is_expired(rt);
 }
 
+static void rt_init_metrics(struct rtable *rt, struct fib_info *fi)
+{
+       if (fi->fib_metrics != (u32 *)dst_default_metrics) {
+               fib_info_hold(fi);
+               rt->fi = fi;
+       }
+
+       dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+}
+
 static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
                           const struct fib_result *res,
                           struct fib_nh_exception *fnhe,
@@ -1438,7 +1453,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 
daddr,
                        rt->rt_gateway = nh->nh_gw;
                        rt->rt_uses_gateway = 1;
                }
-               dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+               rt_init_metrics(rt, fi);
 #ifdef CONFIG_IP_ROUTE_CLASSID
                rt->dst.tclassid = nh->nh_tclassid;
 #endif
@@ -1490,6 +1505,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
                rt->rt_gateway = 0;
                rt->rt_uses_gateway = 0;
                rt->rt_table_id = 0;
+               rt->fi = NULL;
                INIT_LIST_HEAD(&rt->rt_uncached);
 
                rt->dst.output = ip_output;

Reply via email to