IPv6 FIB will only contain FIB entries with exception routes added to the FIB entry. Once this transformation is complete, FIB lookups will return a fib6_info with the lookup functions still returning a dst based rt6_info. The current code uses rt6_info for both paths and overloads the rt6_info variable usually called 'rt'.
This patch introduces a new 'f6i' variable name for the result of the FIB lookup and keeps 'rt' as the dst based return variable. 'f6i' becomes a fib6_info in a later patch which is why it is introduced as f6i now; avoids the additional churn in the later patch. In addition, remove RTF_CACHE and dst checks from fib6 add and delete since they can not happen now and will never happen after the data type flip. Signed-off-by: David Ahern <dsah...@gmail.com> --- include/net/ip6_route.h | 1 - net/ipv6/ip6_fib.c | 16 +----- net/ipv6/route.c | 137 ++++++++++++++++++++++++++---------------------- 3 files changed, 75 insertions(+), 79 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 76d2c31e18b7..d722fa843f31 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -106,7 +106,6 @@ int ip6_ins_rt(struct net *net, struct rt6_info *rt); int ip6_del_rt(struct net *net, struct rt6_info *rt); void rt6_flush_exceptions(struct rt6_info *rt); -int rt6_remove_exception_rt(struct rt6_info *rt); void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args, unsigned long now); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 90df1589a6c2..e27a01aa64f4 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1065,7 +1065,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, static void fib6_start_gc(struct net *net, struct rt6_info *rt) { if (!timer_pending(&net->ipv6.ip6_fib_timer) && - (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) + (rt->rt6i_flags & RTF_EXPIRES)) mod_timer(&net->ipv6.ip6_fib_timer, jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } @@ -1116,8 +1116,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt))) return -EINVAL; - if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE)) - return -EINVAL; if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) @@ -1641,8 +1639,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, RT6_TRACE("fib6_del_route\n"); - WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE); - /* Unlink it */ *rtp = rt->rt6_next; rt->rt6i_node = NULL; @@ -1711,21 +1707,11 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) struct rt6_info __rcu **rtp; struct rt6_info __rcu **rtp_next; -#if RT6_DEBUG >= 2 - if (rt->dst.obsolete > 0) { - WARN_ON(fn); - return -ENOENT; - } -#endif if (!fn || rt == net->ipv6.fib6_null_entry) return -ENOENT; WARN_ON(!(fn->fn_flags & RTN_RTINFO)); - /* remove cached dst from exception table */ - if (rt->rt6i_flags & RTF_CACHE) - return rt6_remove_exception_rt(rt); - /* * Walk the leaf entries looking for ourself */ diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d14dbf5ff1db..8eda7d09729b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1013,8 +1013,8 @@ static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) BUG_ON(from->from); rt->rt6i_flags &= ~RTF_EXPIRES; - dst_hold(&from->dst); - rt->from = from; + if (dst_hold_safe(&from->dst)) + rt->from = from; dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); if (from->fib6_metrics != &dst_default_metrics) { rt->dst._metrics |= DST_METRICS_REFCOUNTED; @@ -1098,42 +1098,36 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, const struct sk_buff *skb, int flags) { - struct rt6_info *rt, *rt_cache; + struct rt6_info *f6i; struct fib6_node *fn; + struct rt6_info *rt; rcu_read_lock(); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: - rt = rcu_dereference(fn->leaf); - if (!rt) { - rt = net->ipv6.fib6_null_entry; + f6i = rcu_dereference(fn->leaf); + if (!f6i) { + f6i = net->ipv6.fib6_null_entry; } else { - rt = rt6_device_match(net, rt, &fl6->saddr, + f6i = rt6_device_match(net, f6i, &fl6->saddr, fl6->flowi6_oif, flags); - if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif, - skb, flags); + if (f6i->rt6i_nsiblings && fl6->flowi6_oif == 0) + f6i = rt6_multipath_select(net, f6i, fl6, + fl6->flowi6_oif, skb, flags); } - if (rt == net->ipv6.fib6_null_entry) { + if (f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; } + /* Search through exception table */ - rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); - if (rt_cache) { - rt = rt_cache; + rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + if (rt) { if (ip6_hold_safe(net, &rt, true)) dst_use_noref(&rt->dst, jiffies); - } else if (dst_hold_safe(&rt->dst)) { - struct rt6_info *nrt; - - nrt = ip6_create_rt_rcu(rt); - dst_release(&rt->dst); - rt = nrt; } else { - rt = net->ipv6.ip6_null_entry; - dst_hold(&rt->dst); + rt = ip6_create_rt_rcu(f6i); } rcu_read_unlock(); @@ -1216,9 +1210,6 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, * Clone the route. */ - if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = ort->from; - rcu_read_lock(); dev = ip6_rt_get_dev_rcu(ort); rt = __ip6_dst_alloc(dev_net(dev), dev, 0); @@ -1444,11 +1435,6 @@ static int rt6_insert_exception(struct rt6_info *nrt, struct rt6_exception *rt6_ex; int err = 0; - /* ort can't be a cache or pcpu route */ - if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = ort->from; - WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)); - spin_lock_bh(&rt6_exception_lock); if (ort->exception_bucket_flushed) { @@ -1587,7 +1573,7 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, } /* Remove the passed in cached rt from the hash table that contains it */ -int rt6_remove_exception_rt(struct rt6_info *rt) +static int rt6_remove_exception_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; struct rt6_info *from = rt->from; @@ -1828,7 +1814,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, const struct sk_buff *skb, int flags) { struct fib6_node *fn, *saved_fn; - struct rt6_info *rt, *rt_cache; + struct rt6_info *f6i; + struct rt6_info *rt; int strict = 0; strict |= flags & RT6_LOOKUP_F_IFACE; @@ -1845,10 +1832,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, oif = 0; redo_rt6_select: - rt = rt6_select(net, fn, oif, strict); - if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict); - if (rt == net->ipv6.fib6_null_entry) { + f6i = rt6_select(net, fn, oif, strict); + if (f6i->rt6i_nsiblings) + f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict); + if (f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto redo_rt6_select; @@ -1860,18 +1847,17 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, } } - /*Search through exception table */ - rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); - if (rt_cache) - rt = rt_cache; - - if (rt == net->ipv6.fib6_null_entry) { + if (f6i == net->ipv6.fib6_null_entry) { rt = net->ipv6.ip6_null_entry; rcu_read_unlock(); dst_hold(&rt->dst); trace_fib6_table_lookup(net, rt, table, fl6); return rt; - } else if (rt->rt6i_flags & RTF_CACHE) { + } + + /*Search through exception table */ + rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + if (rt) { if (ip6_hold_safe(net, &rt, true)) dst_use_noref(&rt->dst, jiffies); @@ -1879,7 +1865,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, trace_fib6_table_lookup(net, rt, table, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && - !(rt->rt6i_flags & RTF_GATEWAY))) { + !(f6i->rt6i_flags & RTF_GATEWAY))) { /* Create a RTF_CACHE clone which will not be * owned by the fib6 tree. It is for the special case where * the daddr in the skb during the neighbor look-up is different @@ -1888,16 +1874,16 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, struct rt6_info *uncached_rt; - if (ip6_hold_safe(net, &rt, true)) { - dst_use_noref(&rt->dst, jiffies); + if (ip6_hold_safe(net, &f6i, true)) { + dst_use_noref(&f6i->dst, jiffies); } else { rcu_read_unlock(); - uncached_rt = rt; + uncached_rt = f6i; goto uncached_rt_out; } rcu_read_unlock(); - uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); + uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL); dst_release(&rt->dst); if (uncached_rt) { @@ -1920,18 +1906,18 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, struct rt6_info *pcpu_rt; - dst_use_noref(&rt->dst, jiffies); + dst_use_noref(&f6i->dst, jiffies); local_bh_disable(); - pcpu_rt = rt6_get_pcpu_route(rt); + pcpu_rt = rt6_get_pcpu_route(f6i); if (!pcpu_rt) { /* atomic_inc_not_zero() is needed when using rcu */ - if (atomic_inc_not_zero(&rt->rt6i_ref)) { + if (atomic_inc_not_zero(&f6i->rt6i_ref)) { /* No dst_hold() on rt is needed because grabbing * rt->rt6i_ref makes sure rt can't be released. */ - pcpu_rt = rt6_make_pcpu_route(net, rt); - rt6_release(rt); + pcpu_rt = rt6_make_pcpu_route(net, f6i); + rt6_release(f6i); } else { /* rt is already removed from tree */ pcpu_rt = net->ipv6.ip6_null_entry; @@ -2376,7 +2362,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, int flags) { struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *rt, *rt_cache; + struct rt6_info *ret = NULL, *rt_cache; + struct rt6_info *rt; struct fib6_node *fn; /* Get the "current" route for this destination and @@ -2415,7 +2402,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, if (rt_cache && ipv6_addr_equal(&rdfl->gateway, &rt_cache->rt6i_gateway)) { - rt = rt_cache; + ret = rt_cache; break; } continue; @@ -2426,7 +2413,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, if (!rt) rt = net->ipv6.fib6_null_entry; else if (rt->rt6i_flags & RTF_REJECT) { - rt = net->ipv6.ip6_null_entry; + ret = net->ipv6.ip6_null_entry; goto out; } @@ -2437,12 +2424,15 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, } out: - ip6_hold_safe(net, &rt, true); + if (ret) + dst_hold(&ret->dst); + else + ret = ip6_create_rt_rcu(rt); rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table, fl6); - return rt; + trace_fib6_table_lookup(net, ret, table, fl6); + return ret; }; static struct dst_entry *ip6_route_redirect(struct net *net, @@ -3133,6 +3123,22 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) return err; } +static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg) +{ + int rc = -ESRCH; + + if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex) + goto out; + + if (cfg->fc_flags & RTF_GATEWAY && + !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) + goto out; + if (dst_hold_safe(&rt->dst)) + rc = rt6_remove_exception_rt(rt); +out: + return rc; +} + static int ip6_route_del(struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -3157,11 +3163,16 @@ static int ip6_route_del(struct fib6_config *cfg, if (fn) { for_each_fib6_node_rt_rcu(fn) { if (cfg->fc_flags & RTF_CACHE) { + int rc; + rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, &cfg->fc_src); - if (!rt_cache) - continue; - rt = rt_cache; + if (rt_cache) { + rc = ip6_del_cached_rt(rt_cache, cfg); + if (rc != -ESRCH) + return rc; + } + continue; } if (cfg->fc_ifindex && (!rt->fib6_nh.nh_dev || @@ -3278,7 +3289,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu NEIGH_UPDATE_F_ISROUTER)), NDISC_REDIRECT, &ndopts); - nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); + nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL); if (!nrt) goto out; -- 2.11.0