Use RCU to allow for lock less access to the neighbour table.
This should speedup the send path because no atomic operations
will be needed to lookup ARP entries, etc.


Signed-off-by: Stephen Hemminger <[EMAIL PROTECTED]>

---
 include/net/neighbour.h |    4 -
 net/core/neighbour.c    |  158 +++++++++++++++++++++++++-----------------------
 2 files changed, 87 insertions(+), 75 deletions(-)

--- net-2.6.19.orig/include/net/neighbour.h
+++ net-2.6.19/include/net/neighbour.h
@@ -108,6 +108,7 @@ struct neighbour
        struct sk_buff_head     arp_queue;
        struct timer_list       timer;
        struct neigh_ops        *ops;
+       struct rcu_head         rcu;
        u8                      primary_key[0];
 };
 
@@ -126,6 +127,7 @@ struct pneigh_entry
 {
        struct hlist_node       hlist;
        struct net_device       *dev;
+       struct rcu_head         rcu;
        u8                      key[0];
 };
 
@@ -157,7 +159,7 @@ struct neigh_table
        struct timer_list       proxy_timer;
        struct sk_buff_head     proxy_queue;
        atomic_t                entries;
-       rwlock_t                lock;
+       spinlock_t              lock;
        unsigned long           last_rand;
        kmem_cache_t            *kmem_cachep;
        struct neigh_statistics *stats;
--- net-2.6.19.orig/net/core/neighbour.c
+++ net-2.6.19/net/core/neighbour.c
@@ -67,9 +67,10 @@ static struct file_operations neigh_stat
 #endif
 
 /*
-   Neighbour hash table buckets are protected with rwlock tbl->lock.
+   Neighbour hash table buckets are protected with lock tbl->lock.
 
-   - All the scans/updates to hash buckets MUST be made under this lock.
+   - All the scans of hash buckes must be made with RCU read lock (nopreempt)
+   - updates to hash buckets MUST be made under this lock.
    - NOTHING clever should be made under this lock: no callbacks
      to protocol backends, no attempts to send something to network.
      It will result in deadlocks, if backend/driver wants to use neighbour
@@ -117,6 +118,13 @@ unsigned long neigh_rand_reach_time(unsi
 }
 
 
+static void neigh_rcu_release(struct rcu_head *head)
+{
+       struct neighbour *neigh = container_of(head, struct neighbour, rcu);
+
+       neigh_release(neigh);
+}
+
 static int neigh_forced_gc(struct neigh_table *tbl)
 {
        int shrunk = 0;
@@ -124,7 +132,7 @@ static int neigh_forced_gc(struct neigh_
 
        NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
 
-       write_lock_bh(&tbl->lock);
+       spin_lock_bh(&tbl->lock);
        for (i = 0; i <= tbl->hash_mask; i++) {
                struct neighbour *n;
                struct hlist_node *node, *tmp;
@@ -138,11 +146,11 @@ static int neigh_forced_gc(struct neigh_
                        write_lock(&n->lock);
                        if (atomic_read(&n->refcnt) == 1 &&
                            !(n->nud_state & NUD_PERMANENT)) {
-                               hlist_del(&n->hlist);
+                               hlist_del_rcu(&n->hlist);
                                n->dead = 1;
                                shrunk  = 1;
                                write_unlock(&n->lock);
-                               neigh_release(n);
+                               call_rcu(&n->rcu, neigh_rcu_release);
                                continue;
                        }
                        write_unlock(&n->lock);
@@ -151,7 +159,7 @@ static int neigh_forced_gc(struct neigh_
 
        tbl->last_flush = jiffies;
 
-       write_unlock_bh(&tbl->lock);
+       spin_unlock_bh(&tbl->lock);
 
        return shrunk;
 }
@@ -189,7 +197,7 @@ static void neigh_flush_dev(struct neigh
                        if (dev && n->dev != dev)
                                continue;
 
-                       hlist_del(&n->hlist);
+                       hlist_del_rcu(&n->hlist);
                        write_lock(&n->lock);
                        neigh_del_timer(n);
                        n->dead = 1;
@@ -220,17 +228,17 @@ static void neigh_flush_dev(struct neigh
 
 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
 {
-       write_lock_bh(&tbl->lock);
+       spin_lock_bh(&tbl->lock);
        neigh_flush_dev(tbl, dev);
-       write_unlock_bh(&tbl->lock);
+       spin_unlock_bh(&tbl->lock);
 }
 
 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 {
-       write_lock_bh(&tbl->lock);
+       spin_lock_bh(&tbl->lock);
        neigh_flush_dev(tbl, dev);
        pneigh_ifdown(tbl, dev);
-       write_unlock_bh(&tbl->lock);
+       spin_unlock_bh(&tbl->lock);
 
        del_timer_sync(&tbl->proxy_timer);
        pneigh_queue_purge(&tbl->proxy_queue);
@@ -326,8 +334,8 @@ static void neigh_hash_grow(struct neigh
                        unsigned int hash_val = tbl->hash(n->primary_key, 
n->dev);
 
                        hash_val &= new_hash_mask;
-                       hlist_del(&n->hlist);
-                       hlist_add_head(&n->hlist, &new_hash[hash_val]);
+                       __hlist_del(&n->hlist);
+                       hlist_add_head_rcu(&n->hlist, &new_hash[hash_val]);
                }
        }
        tbl->hash_buckets = new_hash;
@@ -346,8 +354,8 @@ struct neighbour *neigh_lookup(struct ne
        
        NEIGH_CACHE_STAT_INC(tbl, lookups);
 
-       read_lock_bh(&tbl->lock);
-       hlist_for_each_entry(n, tmp, &tbl->hash_buckets[hash_val], hlist) {
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(n, tmp, &tbl->hash_buckets[hash_val], hlist) {
                if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
                        neigh_hold(n);
                        NEIGH_CACHE_STAT_INC(tbl, hits);
@@ -356,7 +364,7 @@ struct neighbour *neigh_lookup(struct ne
        }
        n = NULL;
 found:
-       read_unlock_bh(&tbl->lock);
+       rcu_read_unlock();
        return n;
 }
 
@@ -369,8 +377,8 @@ struct neighbour *neigh_lookup_nodev(str
 
        NEIGH_CACHE_STAT_INC(tbl, lookups);
 
-       read_lock_bh(&tbl->lock);
-       hlist_for_each_entry(n, tmp, &tbl->hash_buckets[hash_val], hlist) {
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(n, tmp, &tbl->hash_buckets[hash_val], hlist) {
                if (!memcmp(n->primary_key, pkey, key_len)) {
                        neigh_hold(n);
                        NEIGH_CACHE_STAT_INC(tbl, hits);
@@ -379,7 +387,7 @@ struct neighbour *neigh_lookup_nodev(str
        }
        n = NULL;
 found:
-       read_unlock_bh(&tbl->lock);
+       rcu_read_unlock();
        return n;
 }
 
@@ -416,7 +424,7 @@ struct neighbour *neigh_create(struct ne
 
        n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
 
-       write_lock_bh(&tbl->lock);
+       spin_lock_bh(&tbl->lock);
 
        if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
                neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
@@ -436,21 +444,22 @@ struct neighbour *neigh_create(struct ne
                }
        }
 
-       hlist_add_head(&n->hlist, &tbl->hash_buckets[hash_val]);
        n->dead = 0;
        neigh_hold(n);
-       write_unlock_bh(&tbl->lock);
+       hlist_add_head_rcu(&n->hlist, &tbl->hash_buckets[hash_val]);
+       spin_unlock_bh(&tbl->lock);
        NEIGH_PRINTK2("neigh %p is created.\n", n);
        rc = n;
 out:
        return rc;
 out_tbl_unlock:
-       write_unlock_bh(&tbl->lock);
+       spin_unlock_bh(&tbl->lock);
 out_neigh_release:
        neigh_release(n);
        goto out;
 }
 
+/* Assumes rcu_read_lock is held */
 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
                                    struct net_device *dev, int creat)
 {
@@ -464,16 +473,14 @@ struct pneigh_entry * pneigh_lookup(stru
        hash_val ^= hash_val >> 4;
        hash_val &= PNEIGH_HASHMASK;
 
-       read_lock_bh(&tbl->lock);
-
-       hlist_for_each_entry(n, tmp, &tbl->phash_buckets[hash_val], hlist) {
+       hlist_for_each_entry_rcu(n, tmp, &tbl->phash_buckets[hash_val], hlist) {
                if (!memcmp(n->key, pkey, key_len) &&
                    (n->dev == dev || !n->dev)) {
-                       read_unlock_bh(&tbl->lock);
+                       rcu_read_unlock();
                        goto out;
                }
        }
-       read_unlock_bh(&tbl->lock);
+
        n = NULL;
        if (!creat)
                goto out;
@@ -495,13 +502,18 @@ struct pneigh_entry * pneigh_lookup(stru
                goto out;
        }
 
-       write_lock_bh(&tbl->lock);
-       hlist_add_head(&n->hlist, &tbl->phash_buckets[hash_val]);
-       write_unlock_bh(&tbl->lock);
+       spin_lock_bh(&tbl->lock);
+       hlist_add_head_rcu(&n->hlist, &tbl->phash_buckets[hash_val]);
+       spin_unlock_bh(&tbl->lock);
 out:
        return n;
 }
 
+static void pneigh_destroy(struct rcu_head *head)
+{
+       struct pneigh_entry *n = container_of(head, struct pneigh_entry, rcu);
+       kfree(n);
+}
 
 int pneigh_delete(struct neigh_table *tbl, const void *pkey,
                  struct net_device *dev)
@@ -516,20 +528,20 @@ int pneigh_delete(struct neigh_table *tb
        hash_val ^= hash_val >> 4;
        hash_val &= PNEIGH_HASHMASK;
 
-       write_lock_bh(&tbl->lock);
+       spin_lock_bh(&tbl->lock);
        hlist_for_each_entry(n, tmp, &tbl->phash_buckets[hash_val], hlist) {
                if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
-                       hlist_del(&n->hlist);
-                       write_unlock_bh(&tbl->lock);
+                       hlist_del_rcu(&n->hlist);
+                       spin_unlock_bh(&tbl->lock);
                        if (tbl->pdestructor)
                                tbl->pdestructor(n);
                        if (n->dev)
                                dev_put(n->dev);
-                       kfree(n);
+                       call_rcu(&n->rcu, pneigh_destroy);
                        return 0;
                }
        }
-       write_unlock_bh(&tbl->lock);
+       spin_unlock_bh(&tbl->lock);
        return -ENOENT;
 }
 
@@ -543,7 +555,7 @@ static int pneigh_ifdown(struct neigh_ta
 
                hlist_for_each_entry_safe(n, tmp, nxt, &tbl->phash_buckets[h], 
hlist) {
                        if (!dev || n->dev == dev) {
-                               hlist_del(&n->hlist);
+                               hlist_del_rcu(&n->hlist);
                                if (tbl->pdestructor)
                                        tbl->pdestructor(n);
                                if (n->dev)
@@ -644,7 +656,7 @@ static void neigh_periodic_timer(unsigne
 
        NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 
-       write_lock(&tbl->lock);
+       spin_lock(&tbl->lock);
 
        /*
         *      periodically recompute ReachableTime from random function
@@ -676,7 +688,7 @@ static void neigh_periodic_timer(unsigne
                if (atomic_read(&n->refcnt) == 1 &&
                    (state == NUD_FAILED ||
                     time_after(now, n->used + n->parms->gc_staletime))) {
-                       hlist_del(&n->hlist);
+                       hlist_del_rcu(&n->hlist);
                        n->dead = 1;
                        write_unlock(&n->lock);
                        neigh_release(n);
@@ -697,7 +709,7 @@ static void neigh_periodic_timer(unsigne
 
        mod_timer(&tbl->gc_timer, now + expire);
 
-       write_unlock(&tbl->lock);
+       spin_unlock(&tbl->lock);
 }
 
 static __inline__ int neigh_max_probes(struct neighbour *n)
@@ -1285,10 +1297,10 @@ struct neigh_parms *neigh_parms_alloc(st
                        p->dev = dev;
                }
                p->sysctl_table = NULL;
-               write_lock_bh(&tbl->lock);
+               spin_lock_bh(&tbl->lock);
                p->next         = tbl->parms.next;
                tbl->parms.next = p;
-               write_unlock_bh(&tbl->lock);
+               spin_unlock_bh(&tbl->lock);
        }
        return p;
 }
@@ -1307,19 +1319,19 @@ void neigh_parms_release(struct neigh_ta
 
        if (!parms || parms == &tbl->parms)
                return;
-       write_lock_bh(&tbl->lock);
+       spin_lock_bh(&tbl->lock);
        for (p = &tbl->parms.next; *p; p = &(*p)->next) {
                if (*p == parms) {
                        *p = parms->next;
                        parms->dead = 1;
-                       write_unlock_bh(&tbl->lock);
+                       spin_unlock_bh(&tbl->lock);
                        if (parms->dev)
                                dev_put(parms->dev);
                        call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
                        return;
                }
        }
-       write_unlock_bh(&tbl->lock);
+       spin_unlock_bh(&tbl->lock);
        NEIGH_PRINTK1("neigh_parms_release: not found\n");
 }
 
@@ -1365,7 +1377,7 @@ void neigh_table_init_no_netlink(struct 
 
        get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
 
-       rwlock_init(&tbl->lock);
+       spin_lock_init(&tbl->lock);
        init_timer(&tbl->gc_timer);
        tbl->gc_timer.data     = (unsigned long)tbl;
        tbl->gc_timer.function = neigh_periodic_timer;
@@ -1620,7 +1632,7 @@ static int neightbl_fill_info(struct sk_
 
        ndtmsg = nlmsg_data(nlh);
 
-       read_lock_bh(&tbl->lock);
+       spin_lock_bh(&tbl->lock);
        ndtmsg->ndtm_family = tbl->family;
        ndtmsg->ndtm_pad1   = 0;
        ndtmsg->ndtm_pad2   = 0;
@@ -1680,11 +1692,11 @@ static int neightbl_fill_info(struct sk_
        if (neightbl_fill_parms(skb, &tbl->parms) < 0)
                goto nla_put_failure;
 
-       read_unlock_bh(&tbl->lock);
+       rcu_read_unlock();
        return nlmsg_end(skb, nlh);
 
 nla_put_failure:
-       read_unlock_bh(&tbl->lock);
+       rcu_read_unlock();
        return nlmsg_cancel(skb, nlh);
 }
 
@@ -1703,7 +1715,7 @@ static int neightbl_fill_param_info(stru
 
        ndtmsg = nlmsg_data(nlh);
 
-       read_lock_bh(&tbl->lock);
+       rcu_read_lock();                /* this maybe unnecessary */
        ndtmsg->ndtm_family = tbl->family;
        ndtmsg->ndtm_pad1   = 0;
        ndtmsg->ndtm_pad2   = 0;
@@ -1712,10 +1724,10 @@ static int neightbl_fill_param_info(stru
            neightbl_fill_parms(skb, parms) < 0)
                goto errout;
 
-       read_unlock_bh(&tbl->lock);
+       rcu_read_unlock();
        return nlmsg_end(skb, nlh);
 errout:
-       read_unlock_bh(&tbl->lock);
+       rcu_read_unlock();
        return nlmsg_cancel(skb, nlh);
 }
  
@@ -1793,7 +1805,7 @@ int neightbl_set(struct sk_buff *skb, st
         * We acquire tbl->lock to be nice to the periodic timers and
         * make sure they always see a consistent set of values.
         */
-       write_lock_bh(&tbl->lock);
+       spin_lock_bh(&tbl->lock);
 
        if (tb[NDTA_PARMS]) {
                struct nlattr *tbp[NDTPA_MAX+1];
@@ -1874,7 +1886,7 @@ int neightbl_set(struct sk_buff *skb, st
        err = 0;
 
 errout_tbl_lock:
-       write_unlock_bh(&tbl->lock);
+       spin_unlock_bh(&tbl->lock);
 errout_locked:
        rcu_read_unlock();
 errout:
@@ -1890,7 +1902,7 @@ int neightbl_dump_info(struct sk_buff *s
 
        family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
 
-       rcu_read_lock();
+       rcu_read_lock_bh();
        list_for_each_entry_rcu(tbl, &neigh_tables, list) {
                struct neigh_parms *p;
 
@@ -1986,20 +1998,20 @@ static int neigh_dump_table(struct neigh
                        continue;
                if (h > s_h)
                        s_idx = 0;
-               read_lock_bh(&tbl->lock);
+               rcu_read_lock();
                idx = 0;
-               hlist_for_each_entry(n, tmp, &tbl->hash_buckets[h], hlist) {
+               hlist_for_each_entry_rcu(n, tmp, &tbl->hash_buckets[h], hlist) {
                        if (idx >= s_idx &&
                            neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
                                            cb->nlh->nlmsg_seq,
                                            RTM_NEWNEIGH, NLM_F_MULTI) <= 0) {
-                               read_unlock_bh(&tbl->lock);
+                               rcu_read_unlock();
                                rc = -1;
                                goto out;
                        }
                        ++idx;
                }
-               read_unlock_bh(&tbl->lock);
+               rcu_read_unlock();
        }
        rc = skb->len;
 out:
@@ -2039,14 +2051,15 @@ void neigh_for_each(struct neigh_table *
 {
        int chain;
 
-       read_lock_bh(&tbl->lock);
+       rcu_read_lock();
        for (chain = 0; chain <= tbl->hash_mask; chain++) {
+               struct neighbour *n;
                struct hlist_node *p;
 
-               hlist_for_each(p, &tbl->hash_buckets[chain])
-                       cb(hlist_entry(p, struct neighbour, hlist), cookie);
+               hlist_for_each_entry_rcu(n, p, &tbl->hash_buckets[chain], hlist)
+                       cb(n, cookie);
        }
-       read_unlock_bh(&tbl->lock);
+       rcu_read_unlock();
 }
 EXPORT_SYMBOL(neigh_for_each);
 
@@ -2067,12 +2080,12 @@ void __neigh_for_each_release(struct nei
                        write_lock(&n->lock);
                        release = cb(n);
                        if (release) {
-                               hlist_del(&n->hlist);
+                               hlist_del_rcu(&n->hlist);
                                n->dead = 1;
                        }
                        write_unlock(&n->lock);
                        if (release)
-                               neigh_release(n);
+                               call_rcu(&n->rcu, neigh_rcu_release);
                }
        }
 }
@@ -2116,7 +2129,7 @@ found:
 
 static struct neighbour *next_neigh(struct hlist_node *node)
 {
-       if (node)
+       if (rcu_dereference(node))
                return hlist_entry(node, struct neighbour, hlist);
        else
                return NULL;
@@ -2191,7 +2204,7 @@ static struct pneigh_entry *pneigh_get_f
 
        state->flags |= NEIGH_SEQ_IS_PNEIGH;
        for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
-               pn = tbl->phash_buckets[bucket].first;
+               pn = rcu_dereference(tbl->phash_buckets[bucket].first);
                if (pn)
                        break;
        }
@@ -2208,12 +2221,12 @@ static struct pneigh_entry *pneigh_get_n
        struct neigh_table *tbl = state->tbl;
        struct hlist_node *tmp = &pn->hlist;
 
-       tmp = tmp->next;
+       tmp = rcu_dereference(tmp->next);
        if (tmp)
                goto found;
 
        while (++state->bucket < PNEIGH_HASHMASK) {
-               tmp = tbl->phash_buckets[state->bucket].first;
+               tmp = rcu_dereference(tbl->phash_buckets[state->bucket].first);
                if (tmp)
                        goto found;
        }
@@ -2261,7 +2274,7 @@ void *neigh_seq_start(struct seq_file *s
        state->bucket = 0;
        state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
 
-       read_lock_bh(&tbl->lock);
+       rcu_read_lock();
 
        pos_minus_one = *pos - 1;
        return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN;
@@ -2297,10 +2310,7 @@ EXPORT_SYMBOL(neigh_seq_next);
 
 void neigh_seq_stop(struct seq_file *seq, void *v)
 {
-       struct neigh_seq_state *state = seq->private;
-       struct neigh_table *tbl = state->tbl;
-
-       read_unlock_bh(&tbl->lock);
+       rcu_read_unlock();
 }
 EXPORT_SYMBOL(neigh_seq_stop);
 

--
Stephen Hemminger <[EMAIL PROTECTED]>


-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to