Stephen Hemminger writes: > Convert fib_trie to use Read Copy Update. Since all updates to > FIB happen under RTNL mutex (semaphore),
> This version is different from earlier RCU patches because: > * only need hlist_add_before_rcu by recoding insert_leaf_info > * only use RCU for read path, update stuff already is RTNL protected > * need RCU assign for parent in update cases, but not when initializing > unlinked node. > * need synchronize_rcu in the replace case before deleting > alias info Below is some more cases pointed out by Paul McKenney. It goes on top your last RCU patch (plus 2:nd cleanup) for your convienance * rcu_read_lock() rearrangements to sync w. rcu_dereference * insert/relace alias now with list_replace_rcu (synchronize_rcu make operations so slow that RCU cannot be used with large number of entries) * Removal of bug_trap. IMO the RCU port should be a pretty good approximation now. One thing to digest if we for simpler and more maintainable code should have the updater side to use rcu versions this to make things consistent and possible to avoid fishy bugs. Cheers. --ro Signed-off-by: Robert Olsson <[EMAIL PROTECTED]> --- net/ipv4/fib_trie.c.3 2005-08-22 18:20:09.000000000 +0200 +++ net/ipv4/fib_trie.c 2005-08-23 17:49:34.000000000 +0200 @@ -169,6 +169,9 @@ static kmem_cache_t *fn_alias_kmem; static struct trie *trie_local = NULL, *trie_main = NULL; + +/* rcu_read_lock needs to be hold by caller from readside */ + static inline struct node *tnode_get_child(struct tnode *tn, int i) { BUG_ON(i >= 1 << tn->bits); @@ -827,13 +830,16 @@ return; t->size = 0; - t->trie = NULL; + rcu_assign_pointer(t->trie, NULL); t->revision = 0; #ifdef CONFIG_IP_FIB_TRIE_STATS memset(&t->stats, 0, sizeof(struct trie_use_stats)); #endif } +/* readside most use rcu_read_lock currently dump routines + via get_fa_head and dump */ + static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) { struct hlist_node *node; @@ -872,6 +878,8 @@ } } +/* rcu_read_lock needs to be hold by caller from readside */ + static struct leaf * fib_find_node(struct trie *t, u32 key) { @@ -903,17 +911,13 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) { - int i; int wasfull; t_key cindex, key; struct tnode *tp = NULL; key = tn->key; - i = 0; while (tn != NULL && NODE_PARENT(tn) != NULL) { - BUG_ON(i > 12); /* Why is this a bug? -ojn */ - i++; tp = NODE_PARENT(tn); cindex = tkey_extract_bits(key, tp->pos, tp->bits); @@ -933,6 +937,8 @@ return (struct node*) tn; } +/* only used from updater-side */ + static struct list_head * fib_insert_node(struct trie *t, int *err, u32 key, int plen) { @@ -1155,18 +1161,21 @@ struct fib_info *fi_drop; u8 state; - /* - * TODO: Make replace atomic - */ + err = -ENOBUFS; + new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); + if (new_fa == NULL) + goto out; fi_drop = fa->fa_info; - fa->fa_info = fi; - fa->fa_type = type; - fa->fa_scope = r->rtm_scope; + new_fa->fa_tos = fa->fa_tos; + new_fa->fa_info = fi; + new_fa->fa_type = type; + new_fa->fa_scope = r->rtm_scope; state = fa->fa_state; - fa->fa_state &= ~FA_S_ACCESSED; + new_fa->fa_state &= ~FA_S_ACCESSED; - synchronize_rcu(); + list_replace_rcu(&fa->fa_list, &new_fa->fa_list); + alias_free_mem_rcu(fa); fib_release_info(fi_drop); if (state & FA_S_ACCESSED) @@ -1234,6 +1243,8 @@ return err; } + +/* should be clalled with rcu_read_lock */ static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, struct fib_result *res, int *err) @@ -1456,6 +1467,7 @@ return ret; } +/* only called from updater side */ static int trie_leaf_remove(struct trie *t, t_key key) { t_key cindex; @@ -1548,6 +1560,7 @@ fa_to_delete = NULL; fa_head = fa->fa_list.prev; + list_for_each_entry(fa, fa_head, fa_list) { struct fib_info *fi = fa->fa_info; @@ -1629,6 +1642,8 @@ return found; } +/* rcu_read_lock needs to be hold by caller from readside */ + static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) { struct node *c = (struct node *) thisleaf; @@ -1695,6 +1710,7 @@ t->revision++; + rcu_read_lock(); for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { found += trie_flush_leaf(t, l); @@ -1702,6 +1718,7 @@ trie_leaf_remove(t, ll->key); ll = l; } + rcu_read_unlock(); if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll->key); @@ -1805,6 +1822,8 @@ s_i = cb->args[3]; i = 0; + /* rcu_read_lock is hold by caller */ + list_for_each_entry_rcu(fa, fah, fa_list) { if (i < s_i) { i++; @@ -2038,7 +2057,7 @@ static void trie_dump_seq(struct seq_file *seq, struct trie *t) { - struct node *n = rcu_dereference(t->trie); + struct node *n; int cindex = 0; int indent = 1; int pend = 0; @@ -2046,7 +2065,7 @@ struct tnode *tn; rcu_read_lock(); - + n = rcu_dereference(t->trie); seq_printf(seq, "------ trie_dump of t=%p ------\n", t); if (!n) { @@ -2146,7 +2165,7 @@ static struct trie_stat *trie_collect_stats(struct trie *t) { - struct node *n = rcu_dereference(t->trie); + struct node *n; struct trie_stat *s = trie_stat_new(); int cindex = 0; int pend = 0; @@ -2154,10 +2173,12 @@ if (!s) return NULL; - if (!n) - return s; rcu_read_lock(); + n = rcu_dereference(t->trie); + + if (!n) + return s; if (IS_TNODE(n)) { struct tnode *tn = (struct tnode *)n; - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html