From: Andi Kleen <a...@linux.intel.com> Add an optional fib_list that uses a simple list to store routes. This is suitable for single homed client system which typically have only a handful of routes.
The new file is a complete 1:1 replacement for fib_trie and implements the same API for other files. The code still uses RCU, so should have similar multi-core scalability, as long as the number of routes is small. It also has all the functionality of the fib_trie, so can be used with advanced routing (except for a few missing proc files) The code started as a copy of fib_trie, with all the trie code replaced with a simple list. There's some very similar code with trie that could be factored out in a common file. Worth about 8.5k of code. In addition it uses kmalloc instead of explicit slabs, which gives better memory efficiency for very small routing table (all data <4K, so sharing with other kmalloc users is possible) text data bss dec hex filename 943211 71810 25352 1040373 fdff5 net/built-in.o-fib-list 951716 72066 25352 1049134 10022e net/built-in.o-fib-trie XXX experimential, needs more testing, review Cc: robert.ols...@its.uu.se Signed-off-by: Andi Kleen <a...@linux.intel.com> --- net/ipv4/Kconfig | 18 +- net/ipv4/Makefile | 4 +- net/ipv4/fib_list.c | 985 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1005 insertions(+), 2 deletions(-) create mode 100644 net/ipv4/fib_list.c diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index cdb4f57..578e332 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -55,9 +55,25 @@ config IP_PING ---help--- Enable ping sockets to enable suid-less ping. +config FIB_TRIE + bool "Use trie based routing table" + default y + ---help--- + This is an advanced routing table implementation that scales to a larger + number of routes. When in doubt say y. + +config FIB_LIST + bool + default y + depends on !FIB_TRIE + ---help--- + This is a very simple linear routing table implementation that is only + suitable for clients with a very small number of routes (<10). The only + advantage over FIB_TRIE is smaller code size. + config IP_FIB_TRIE_STATS bool "FIB TRIE statistics" - depends on IP_ADVANCED_ROUTER + depends on IP_ADVANCED_ROUTER && FIB_TRIE ---help--- Keep track of statistics on structure of FIB TRIE table. Useful for testing and measuring TRIE performance. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9353beb..8c2a08d 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -10,9 +10,11 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_minisocks.o tcp_cong.o \ datagram.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o \ - fib_frontend.o fib_semantics.o fib_trie.o \ + fib_frontend.o fib_semantics.o \ inet_fragment.o ip_tunnel_core.o gre_offload.o +obj-$(CONFIG_FIB_TRIE) += fib_trie.o +obj-$(CONFIG_FIB_LIST) += fib_list.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_IP_PING) += ping.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o diff --git a/net/ipv4/fib_list.c b/net/ipv4/fib_list.c new file mode 100644 index 0000000..969e54d --- /dev/null +++ b/net/ipv4/fib_list.c @@ -0,0 +1,985 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version + * 2 of the License. + * + * This is based on the fib_trie code (which was based on fib_hash), + * but all the trie code removed and replaced with a simple + * list. In addition it uses kmalloc instead of slab directly + * to save some dynamic memory. + * + * This is for systems with very small routing tables, like + * your typical single-homed client. You shouldn't be using this for + * more than a handful of routes. The main motivation is smaller + * code size. Apart from being less scalable and some missing + * proc output the functionality should be the same as fib_trie. + * + * Some code is very similar with fib_trie and could be later + * factored out into a shared file. + * + * List code by Andi Kleen, original trie file was: + * + * Robert Olsson <robert.ols...@its.uu.se> Uppsala Universitet + * & Swedish University of Agricultural Sciences. + * + * Jens Laas <jens.l...@data.slu.se> Swedish University of + * Agricultural Sciences. + * + * Hans Liss <hans.l...@its.uu.se> Uppsala Universitet + * + * Code from fib_hash has been reused which includes the following header: + * + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IPv4 FIB: lookup engine and maintenance routines. + * + * + * Authors: Alexey Kuznetsov, <kuz...@ms2.inr.ac.ru> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Substantial contributions to this work comes from: + * + * David S. Miller, <da...@davemloft.net> + * Stephen Hemminger <shemmin...@osdl.org> + * Paul E. McKenney <paul...@us.ibm.com> + * Patrick McHardy <ka...@trash.net> + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/inetdevice.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/proc_fs.h> +#include <linux/rcupdate.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <net/net_namespace.h> +#include <net/ip.h> +#include <net/protocol.h> +#include <net/route.h> +#include <net/sock.h> +#include <net/ip_fib.h> +#include "fib_lookup.h" + +/* One entry with the same key/mask */ +struct entry { + struct hlist_node nd; + u32 key; + u32 mask; + int plen; + struct hlist_head list; + struct rcu_head rcu; +}; + +struct entry_info { + struct hlist_node hlist; + int plen; + u32 mask_plen; /* ntohl(inet_make_mask(plen)) */ + struct list_head falh; + struct rcu_head rcu; +}; + +/* The list is ordered by the prefix length. Larger prefixes come + * earlier (so the default routes should be always at the end) + * Protected by the RTNL for writing, reading is handled with standard + * list RCU. + */ +struct rlist { + struct hlist_head list; +}; + +static void __alias_free_mem(struct rcu_head *head) +{ + struct fib_alias *fa = container_of(head, struct fib_alias, rcu); + kfree(fa); +} + +static inline void alias_free_mem_rcu(struct fib_alias *fa) +{ + call_rcu(&fa->rcu, __alias_free_mem); +} + +static void __entry_free_rcu(struct rcu_head *head) +{ + struct entry *l = container_of(head, struct entry, rcu); + kfree(l); +} + +static inline void free_entry(struct entry *entry) +{ + call_rcu(&entry->rcu, __entry_free_rcu); +} + +static inline void free_entry_info(struct entry_info *entry) +{ + kfree_rcu(entry, rcu); +} + +static struct entry *entry_new(void) +{ + return kzalloc(sizeof(struct entry), GFP_KERNEL); +} + +static struct entry_info *entry_info_new(int plen) +{ + struct entry_info *ei = kzalloc(sizeof(struct entry_info), GFP_KERNEL); + if (ei) { + ei->plen = plen; + ei->mask_plen = ntohl(inet_make_mask(plen)); + INIT_LIST_HEAD(&ei->falh); + } + return ei; +} + +/* readside must use rcu_read_lock currently dump routines + via get_fa_head and dump */ + +static struct entry_info *find_entry_info(struct entry *e, int plen) +{ + struct hlist_head *head = &e->list; + struct entry_info *ei; + + hlist_for_each_entry_rcu(ei, head, hlist) + if (ei->plen == plen) + return ei; + + return NULL; +} + +static inline struct list_head *get_fa_head(struct entry *l, int plen) +{ + struct entry_info *li = find_entry_info(l, plen); + + if (!li) + return NULL; + + return &li->falh; +} + +static void insert_entry_info(struct hlist_head *head, struct entry_info *new) +{ + struct entry_info *li = NULL, *last = NULL; + + if (hlist_empty(head)) { + hlist_add_head_rcu(&new->hlist, head); + } else { + hlist_for_each_entry(li, head, hlist) { + if (new->plen > li->plen) + break; + + last = li; + } + if (last) + hlist_add_after_rcu(&last->hlist, &new->hlist); + else + hlist_add_before_rcu(&new->hlist, &li->hlist); + } +} + +/* rcu_read_lock needs to be hold by caller from readside */ +static struct entry * +fib_find_node(struct rlist *rl, u32 key) +{ + struct entry *entry; + + hlist_for_each_entry_rcu (entry, &rl->list, nd) { + if (key == entry->key) + return entry; + } + return NULL; +} + +/* only used from updater-side */ + +static struct list_head *fib_insert_node(struct rlist *rl, u32 key, int plen) +{ + struct entry *entry; + struct entry_info *ei; + struct hlist_node *prev = NULL; + struct list_head *fa_head; + + hlist_for_each_entry (entry, &rl->list, nd) { + if (entry->plen < plen) + break; + prev = &entry->nd; + } + entry = entry_new(); + if (!entry) + return NULL; + entry->key = key; + entry->mask = ntohl(inet_make_mask(plen)); + entry->plen = plen; + if (prev) + hlist_add_after_rcu(prev, &entry->nd); + else + hlist_add_head_rcu(&entry->nd, &rl->list); + ei = entry_info_new(plen); + if (!ei) + return NULL; + fa_head = &ei->falh; + insert_entry_info(&entry->list, ei); + return fa_head; +} + +int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) +{ + struct rlist *rl = (struct rlist *) tb->tb_data; + struct fib_alias *fa, *new_fa; + struct list_head *fa_head = NULL; + struct fib_info *fi; + int plen = cfg->fc_dst_len; + u8 tos = cfg->fc_tos; + u32 key, mask; + int err; + struct entry *l; + bool found = false; + + if (plen > 32) + return -EINVAL; + + key = ntohl(cfg->fc_dst); + + pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); + + mask = ntohl(inet_make_mask(plen)); + + if (key & ~mask) + return -EINVAL; + + fi = fib_create_info(cfg); + if (IS_ERR(fi)) { + err = PTR_ERR(fi); + goto err; + } + + hlist_for_each_entry (l, &rl->list, nd) { + if (l->key == key) { + found = true; + break; + } + } + fa = NULL; + if (!found) + l = NULL; + + if (l) { + fa_head = get_fa_head(l, plen); + fa = fib_find_alias(fa_head, tos, fi->fib_priority); + } + + /* Now fa, if non-NULL, points to the first fib alias + * with the same keys [prefix,tos,priority], if such key already + * exists or to the node before which we will insert new one. + * + * If fa is NULL, we will need to allocate a new one and + * insert to the head of f. + * + * If f is NULL, no fib node matched the destination key + * and we need to allocate a new one of those as well. + */ + + if (fa && fa->fa_tos == tos && + fa->fa_info->fib_priority == fi->fib_priority) { + struct fib_alias *fa_first, *fa_match; + int iter = 0; + + err = -EEXIST; + if (cfg->fc_nlflags & NLM_F_EXCL) + goto out; + + /* We have 2 goals: + * 1. Find exact match for type, scope, fib_info to avoid + * duplicate routes + * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it + */ + fa_match = NULL; + fa_first = fa; + fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); + list_for_each_entry_continue(fa, fa_head, fa_list) { + if (fa->fa_tos != tos) + break; + if (fa->fa_info->fib_priority != fi->fib_priority) + break; + if (fa->fa_type == cfg->fc_type && + fa->fa_info == fi) { + fa_match = fa; + break; + } + iter++; + } + if (cfg->fc_nlflags & NLM_F_REPLACE) { + struct fib_info *fi_drop; + u8 state; + + fa = fa_first; + if (fa_match) { + if (fa == fa_match) + err = 0; + goto out; + } + err = -ENOBUFS; + new_fa = kmalloc(sizeof(struct fib_alias), GFP_KERNEL); + if (new_fa == NULL) + goto out; + + fi_drop = fa->fa_info; + new_fa->fa_tos = fa->fa_tos; + new_fa->fa_info = fi; + new_fa->fa_type = cfg->fc_type; + state = fa->fa_state; + new_fa->fa_state = state & ~FA_S_ACCESSED; + + list_replace_rcu(&fa->fa_list, &new_fa->fa_list); + alias_free_mem_rcu(fa); + + fib_release_info(fi_drop); + if (state & FA_S_ACCESSED) + rt_cache_flush(cfg->fc_nlinfo.nl_net); + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, + tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); + + goto succeeded; + } + /* Error if we find a perfect match which + * uses the same scope, type, and nexthop + * information. + */ + if (fa_match) { + goto out; + } + + if (!(cfg->fc_nlflags & NLM_F_APPEND)) + fa = fa_first; + } + err = -ENOENT; + if (!(cfg->fc_nlflags & NLM_F_CREATE)) + goto out; + + err = -ENOBUFS; + new_fa = kmalloc(sizeof(struct fib_alias), GFP_KERNEL); + if (new_fa == NULL) + goto out; + + new_fa->fa_info = fi; + new_fa->fa_tos = tos; + new_fa->fa_type = cfg->fc_type; + new_fa->fa_state = 0; + + /* + * Insert new entry to the list. + */ + + if (!fa_head) { + fa_head = fib_insert_node(rl, key, plen); + if (unlikely(!fa_head)) { + err = -ENOMEM; + goto out_free_new_fa; + } + } + if (!plen) + tb->tb_num_default++; + + list_add_tail_rcu(&new_fa->fa_list, + (fa ? &fa->fa_list : fa_head)); + + rt_cache_flush(cfg->fc_nlinfo.nl_net); + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, + &cfg->fc_nlinfo, 0); +succeeded: + return 0; + +out_free_new_fa: + kfree(new_fa); +out: + fib_release_info(fi); +err: + pr_debug("insert err %d\n", err); + return err; +} + +static int check_entry(struct fib_table *tb, struct entry *l, + u32 key, const struct flowi4 *flp, + struct fib_result *res, int fib_flags) +{ + struct entry_info *li; + struct hlist_head *hhead = &l->list; + + hlist_for_each_entry_rcu(li, hhead, hlist) { + struct fib_alias *fa; + + list_for_each_entry_rcu(fa, &li->falh, fa_list) { + struct fib_info *fi = fa->fa_info; + int nhsel, err; + + if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) + continue; + if (fi->fib_dead) + continue; + if (fa->fa_info->fib_scope < flp->flowi4_scope) + continue; + fib_alias_accessed(fa); + err = fib_props[fa->fa_type].error; + if (err) { + return err; + } + if (fi->fib_flags & RTNH_F_DEAD) + continue; + for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { + const struct fib_nh *nh = &fi->fib_nh[nhsel]; + + if (nh->nh_flags & RTNH_F_DEAD) + continue; + if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) + continue; + + res->prefixlen = li->plen; + res->nh_sel = nhsel; + res->type = fa->fa_type; + res->scope = fa->fa_info->fib_scope; + res->fi = fi; + res->table = tb; + res->fa_head = &li->falh; + if (!(fib_flags & FIB_LOOKUP_NOREF)) + atomic_inc(&fi->fib_clntref); + return 0; + } + } + } + + return 1; +} + +int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, + struct fib_result *res, int fib_flags) +{ + struct entry *entry; + struct rlist *rl = (struct rlist *)tb->tb_data; + u32 key = ntohl(flp->daddr); + int ret = 1; + + rcu_read_lock(); + hlist_for_each_entry_rcu (entry, &rl->list, nd) { + if ((key & entry->mask) == entry->key) { + ret = check_entry(tb, entry, key, flp, res, fib_flags); + if (ret <= 0) + break; + } + } + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(fib_table_lookup); + +static void entry_remove(struct entry *entry) +{ + hlist_del_rcu(&entry->nd); + free_entry(entry); +} + +/* + * Caller must hold RTNL. + */ +int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) +{ + struct rlist *rl = (struct rlist *) tb->tb_data; + u32 key, mask; + int plen = cfg->fc_dst_len; + u8 tos = cfg->fc_tos; + struct fib_alias *fa, *fa_to_delete; + struct list_head *fa_head; + struct entry *l; + struct entry_info *li; + + if (plen > 32) + return -EINVAL; + + key = ntohl(cfg->fc_dst); + mask = ntohl(inet_make_mask(plen)); + + if (key & ~mask) + return -EINVAL; + + l = fib_find_node(rl, key); + + if (!l) + return -ESRCH; + + li = find_entry_info(l, plen); + + if (!li) + return -ESRCH; + + fa_head = &li->falh; + fa = fib_find_alias(fa_head, tos, 0); + + if (!fa) + return -ESRCH; + + pr_debug("Deleting %08x/%d tos=%d\n", key, plen, tos); + + fa_to_delete = NULL; + fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); + list_for_each_entry_continue(fa, fa_head, fa_list) { + struct fib_info *fi = fa->fa_info; + + if (fa->fa_tos != tos) + break; + + if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && + (cfg->fc_scope == RT_SCOPE_NOWHERE || + fa->fa_info->fib_scope == cfg->fc_scope) && + (!cfg->fc_prefsrc || + fi->fib_prefsrc == cfg->fc_prefsrc) && + (!cfg->fc_protocol || + fi->fib_protocol == cfg->fc_protocol) && + fib_nh_match(cfg, fi) == 0) { + fa_to_delete = fa; + break; + } + } + + if (!fa_to_delete) + return -ESRCH; + + fa = fa_to_delete; + rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, + &cfg->fc_nlinfo, 0); + + list_del_rcu(&fa->fa_list); + + if (!plen) + tb->tb_num_default--; + + if (list_empty(fa_head)) { + hlist_del_rcu(&li->hlist); + free_entry_info(li); + } + + if (hlist_empty(&l->list)) + entry_remove(l); + + if (fa->fa_state & FA_S_ACCESSED) + rt_cache_flush(cfg->fc_nlinfo.nl_net); + + fib_release_info(fa->fa_info); + alias_free_mem_rcu(fa); + return 0; +} + +static int flush_list(struct list_head *head) +{ + struct fib_alias *fa, *fa_node; + int found = 0; + + list_for_each_entry_safe(fa, fa_node, head, fa_list) { + struct fib_info *fi = fa->fa_info; + + if (fi && (fi->fib_flags & RTNH_F_DEAD)) { + list_del_rcu(&fa->fa_list); + fib_release_info(fa->fa_info); + alias_free_mem_rcu(fa); + found++; + } + } + return found; +} + +static int flush_entry(struct entry *e) +{ + int found = 0; + struct hlist_head *lih = &e->list; + struct hlist_node *tmp; + struct entry_info *li = NULL; + + hlist_for_each_entry_safe(li, tmp, lih, hlist) { + found += flush_list(&li->falh); + + if (list_empty(&li->falh)) { + hlist_del_rcu(&li->hlist); + free_entry_info(li); + } + } + return found; +} + +static struct entry *first_entry(struct rlist *rl) +{ + struct hlist_node *first; + + first = rcu_dereference(hlist_first_rcu(&rl->list)); + return first ? hlist_entry(first, struct entry, nd) : NULL; +} + +static struct entry *next_entry(struct entry *e) +{ + struct hlist_node *next = hlist_next_rcu(&e->nd); + + return next ? hlist_entry(next, struct entry, nd) : NULL; +} + +static struct entry *entry_index(struct rlist *rl, int index) +{ + struct entry *e; + + hlist_for_each_entry_rcu (e, &rl->list, nd) { + if (index-- <= 0) + break; + } + return e; +} + +/* + * Caller must hold RTNL. + */ +int fib_table_flush(struct fib_table *tb) +{ + struct rlist *rl = (struct rlist *) tb->tb_data; + struct entry *ei; + struct hlist_node *tmp; + int found = 0; + + hlist_for_each_entry_safe (ei, tmp, &rl->list, nd) { + found += flush_entry(ei); + if (hlist_empty(&ei->list)) + entry_remove(ei); + } + pr_debug("flush found=%d\n", found); + return found; +} + +void fib_free_table(struct fib_table *tb) +{ + kfree(tb); +} + +static int fn_list_dump_fa(u32 key, int plen, struct list_head *fah, + struct fib_table *tb, + struct sk_buff *skb, struct netlink_callback *cb) +{ + int i, s_i; + struct fib_alias *fa; + __be32 xkey = htonl(key); + + s_i = cb->args[5]; + i = 0; + + /* rcu_read_lock is hold by caller */ + + list_for_each_entry_rcu(fa, fah, fa_list) { + if (i < s_i) { + i++; + continue; + } + + if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWROUTE, + tb->tb_id, + fa->fa_type, + xkey, + plen, + fa->fa_tos, + fa->fa_info, NLM_F_MULTI) < 0) { + cb->args[5] = i; + return -1; + } + i++; + } + cb->args[5] = i; + return skb->len; +} + +static int fn_dump_entry(struct entry *l, struct fib_table *tb, + struct sk_buff *skb, struct netlink_callback *cb) +{ + struct entry_info *li; + int i, s_i; + + s_i = cb->args[4]; + i = 0; + + /* rcu_read_lock is hold by caller */ + hlist_for_each_entry_rcu(li, &l->list, hlist) { + if (i < s_i) { + i++; + continue; + } + + if (i > s_i) + cb->args[5] = 0; + + if (list_empty(&li->falh)) + continue; + + if (fn_list_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) { + cb->args[4] = i; + return -1; + } + i++; + } + + cb->args[4] = i; + return skb->len; +} + +int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct entry *l; + struct rlist *rl = (struct rlist *) tb->tb_data; + u32 key = cb->args[2]; + int count = cb->args[3]; + + rcu_read_lock(); + if (count == 0) + l = first_entry(rl); + else { + /* Normally, continue from last key, but if that is missing + * fallback to using slow rescan + */ + l = fib_find_node(rl, key); + if (!l) + l = entry_index(rl, count); + } + + while (l) { + cb->args[2] = l->key; + if (fn_dump_entry(l, tb, skb, cb) < 0) { + cb->args[3] = count; + rcu_read_unlock(); + return -1; + } + + ++count; + l = next_entry(l); + memset(&cb->args[4], 0, + sizeof(cb->args) - 4*sizeof(cb->args[0])); + } + cb->args[3] = count; + rcu_read_unlock(); + + return skb->len; +} + +/* new name? */ +struct fib_table *fib_trie_table(u32 id) +{ + struct fib_table *tb; + + tb = kzalloc(sizeof(struct fib_table) + sizeof(struct rtable), + GFP_KERNEL); + if (tb == NULL) + return NULL; + + tb->tb_id = id; + tb->tb_default = -1; + tb->tb_num_default = 0; + + return tb; +} + +#ifdef CONFIG_PROC_FS + +struct fib_route_iter { + struct seq_net_private p; + struct rlist *rl; + loff_t pos; + u32 key; +}; + +static struct entry *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) +{ + struct entry *e = NULL; + + /* use cache location of last found key */ + if (iter->pos > 0 && pos >= iter->pos && (e = fib_find_node(iter->rl, iter->key)) != NULL) + pos -= iter->pos; + else { + iter->pos = 0; + e = first_entry(iter->rl); + } + + while (e && pos-- > 0) { + iter->pos++; + e = next_entry(e); + } + + if (e) + iter->key = pos; /* remember it */ + else + iter->pos = 0; /* forget it */ + + return e; +} + +static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + struct fib_route_iter *iter = seq->private; + struct fib_table *tb; + + rcu_read_lock(); + tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); + if (!tb) + return NULL; + + iter->rl = (struct rlist *) tb->tb_data; + if (*pos == 0) + return SEQ_START_TOKEN; + else + return fib_route_get_idx(iter, *pos - 1); +} + +static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct fib_route_iter *iter = seq->private; + struct entry *e = v; + + ++*pos; + if (v == SEQ_START_TOKEN) { + iter->pos = 0; + e = first_entry(iter->rl); + } else { + iter->pos++; + e = next_entry(e); + } + + if (e) + iter->key = e->key; + else + iter->pos = 0; + return e; +} + +static void fib_route_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + +static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) +{ + unsigned int flags = 0; + + if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) + flags = RTF_REJECT; + if (fi && fi->fib_nh->nh_gw) + flags |= RTF_GATEWAY; + if (mask == htonl(0xFFFFFFFF)) + flags |= RTF_HOST; + flags |= RTF_UP; + return flags; +} + +/* This outputs /proc/net/route. + * The format of the file is not supposed to be changed + * and needs to be same as fib_hash output to avoid breaking + * legacy utilities. + */ +static int fib_route_seq_show(struct seq_file *seq, void *v) +{ + struct entry *entry = v; + struct entry_info *ei; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " + "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" + "\tWindow\tIRTT"); + return 0; + } + + hlist_for_each_entry_rcu(ei, &entry->list, hlist) { + struct fib_alias *fa; + __be32 mask, prefix; + + mask = inet_make_mask(ei->plen); + prefix = htonl(entry->key); + + list_for_each_entry_rcu(fa, &ei->falh, fa_list) { + const struct fib_info *fi = fa->fa_info; + unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); + + if (fa->fa_type == RTN_BROADCAST + || fa->fa_type == RTN_MULTICAST) + continue; + + seq_setwidth(seq, 127); + + if (fi) + seq_printf(seq, + "%s\t%08X\t%08X\t%04X\t%d\t%u\t" + "%d\t%08X\t%d\t%u\t%u", + fi->fib_dev ? fi->fib_dev->name : "*", + prefix, + fi->fib_nh->nh_gw, flags, 0, 0, + fi->fib_priority, + mask, + (fi->fib_advmss ? + fi->fib_advmss + 40 : 0), + fi->fib_window, + fi->fib_rtt >> 3); + else + seq_printf(seq, + "*\t%08X\t%08X\t%04X\t%d\t%u\t" + "%d\t%08X\t%d\t%u\t%u", + prefix, 0, flags, 0, 0, 0, + mask, 0, 0, 0); + + seq_pad(seq, '\n'); + } + } + + return 0; +} + +static const struct seq_operations fib_route_seq_ops = { + .start = fib_route_seq_start, + .next = fib_route_seq_next, + .stop = fib_route_seq_stop, + .show = fib_route_seq_show, +}; + +static int fib_route_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &fib_route_seq_ops, + sizeof(struct fib_route_iter)); +} + +static const struct file_operations fib_route_fops = { + .owner = THIS_MODULE, + .open = fib_route_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +int __net_init fib_proc_init(struct net *net) +{ + if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops)) + return -ENOMEM; + + return 0; +} + +void __net_exit fib_proc_exit(struct net *net) +{ + remove_proc_entry("route", net->proc_net); +} + +#endif /* CONFIG_PROC_FS */ + +void __init fib_trie_init(void) {} -- 1.9.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/