The reading of the hard header cache in the output path can be
made lockless using seqlock.

Signed-off-by: Stephen Hemminger <[EMAIL PROTECTED]>

---
 include/linux/netdevice.h |    3 ++-
 include/net/neighbour.h   |    2 ++
 net/core/neighbour.c      |   40 +++++++++++++++++++++++++++++++++++-----
 net/ipv4/ip_output.c      |   13 +++----------
 net/ipv6/ip6_output.c     |   13 +++----------
 5 files changed, 45 insertions(+), 26 deletions(-)

--- net-2.6.19.orig/include/linux/netdevice.h
+++ net-2.6.19/include/linux/netdevice.h
@@ -193,7 +193,7 @@ struct hh_cache
                                          */
        int             hh_len;         /* length of header */
        int             (*hh_output)(struct sk_buff *skb);
-       rwlock_t        hh_lock;
+       seqlock_t       hh_lock;
 
        /* cached hardware header; allow for machine alignment needs.        */
 #define HH_DATA_MOD    16
@@ -217,6 +217,7 @@ struct hh_cache
 #define LL_RESERVED_SPACE_EXTRA(dev,extra) \
        ((((dev)->hard_header_len+extra)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
 
+
 /* These flag bits are private to the generic network queueing
  * layer, they may not be explicitly referenced by any other
  * code.
--- net-2.6.19.orig/net/core/neighbour.c
+++ net-2.6.19/net/core/neighbour.c
@@ -591,9 +591,11 @@ void neigh_destroy(struct neighbour *nei
        while ((hh = neigh->hh) != NULL) {
                neigh->hh = hh->hh_next;
                hh->hh_next = NULL;
-               write_lock_bh(&hh->hh_lock);
+
+               write_seqlock_bh(&hh->hh_lock);
                hh->hh_output = neigh_blackhole;
-               write_unlock_bh(&hh->hh_lock);
+               write_sequnlock_bh(&hh->hh_lock);
+
                if (atomic_dec_and_test(&hh->hh_refcnt))
                        kfree(hh);
        }
@@ -912,9 +914,9 @@ static void neigh_update_hhs(struct neig
 
        if (update) {
                for (hh = neigh->hh; hh; hh = hh->hh_next) {
-                       write_lock_bh(&hh->hh_lock);
+                       write_seqlock_bh(&hh->hh_lock);
                        update(hh, neigh->dev, neigh->ha);
-                       write_unlock_bh(&hh->hh_lock);
+                       write_sequnlock_bh(&hh->hh_lock);
                }
        }
 }
@@ -1105,7 +1107,7 @@ static void neigh_hh_init(struct neighbo
                        break;
 
        if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
-               rwlock_init(&hh->hh_lock);
+               seqlock_init(&hh->hh_lock);
                hh->hh_type = protocol;
                atomic_set(&hh->hh_refcnt, 0);
                hh->hh_next = NULL;
@@ -1128,6 +1130,33 @@ static void neigh_hh_init(struct neighbo
        }
 }
 
+
+/*
+ * Add header to skb from hard header cache
+ * Handle case where cache gets changed.
+ */
+int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
+{
+       int len, alen;
+       unsigned seq;
+       int (*output)(struct sk_buff *);
+
+       for(;;) {
+               seq = read_seqbegin(&hh->hh_lock);
+               len = hh->hh_len;
+               alen = HH_DATA_ALIGN(len);
+               output = hh->hh_output;
+               memcpy(skb->data - alen, hh->hh_data, alen);
+               skb_push(skb, len);
+
+               if (likely(!read_seqretry(&hh->hh_lock, seq)))
+                       return output(skb);
+
+               /* undo and try again */
+               __skb_pull(skb, len);
+       }
+}
+
 /* This function can be used in contexts, where only old dev_queue_xmit
    worked, f.e. if you want to override normal output path (eql, shaper),
    but resolution is not made yet.
@@ -2767,6 +2796,7 @@ EXPORT_SYMBOL(neigh_delete);
 EXPORT_SYMBOL(neigh_destroy);
 EXPORT_SYMBOL(neigh_dump_info);
 EXPORT_SYMBOL(neigh_event_ns);
+EXPORT_SYMBOL(neigh_hh_output);
 EXPORT_SYMBOL(neigh_ifdown);
 EXPORT_SYMBOL(neigh_lookup);
 EXPORT_SYMBOL(neigh_lookup_nodev);
--- net-2.6.19.orig/net/ipv4/ip_output.c
+++ net-2.6.19/net/ipv4/ip_output.c
@@ -182,16 +182,9 @@ static inline int ip_finish_output2(stru
                skb = skb2;
        }
 
-       if (hh) {
-               int hh_alen;
-
-               read_lock_bh(&hh->hh_lock);
-               hh_alen = HH_DATA_ALIGN(hh->hh_len);
-               memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
-               read_unlock_bh(&hh->hh_lock);
-               skb_push(skb, hh->hh_len);
-               return hh->hh_output(skb);
-       } else if (dst->neighbour)
+       if (hh)
+               return neigh_hh_output(hh, skb);
+       else if (dst->neighbour)
                return dst->neighbour->output(skb);
 
        if (net_ratelimit())
--- net-2.6.19.orig/net/ipv6/ip6_output.c
+++ net-2.6.19/net/ipv6/ip6_output.c
@@ -76,16 +76,9 @@ static inline int ip6_output_finish(stru
        struct dst_entry *dst = skb->dst;
        struct hh_cache *hh = dst->hh;
 
-       if (hh) {
-               int hh_alen;
-
-               read_lock_bh(&hh->hh_lock);
-               hh_alen = HH_DATA_ALIGN(hh->hh_len);
-               memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
-               read_unlock_bh(&hh->hh_lock);
-               skb_push(skb, hh->hh_len);
-               return hh->hh_output(skb);
-       } else if (dst->neighbour)
+       if (hh)
+               return neigh_hh_output(hh, skb);
+       else if (dst->neighbour)
                return dst->neighbour->output(skb);
 
        IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
--- net-2.6.19.orig/include/net/neighbour.h
+++ net-2.6.19/include/net/neighbour.h
@@ -193,6 +193,8 @@ extern struct neighbour *   neigh_create(s
                                             struct net_device *dev);
 extern void                    neigh_destroy(struct neighbour *neigh);
 extern int                     __neigh_event_send(struct neighbour *neigh, 
struct sk_buff *skb);
+extern int                     neigh_hh_output(const struct hh_cache *hh, 
struct sk_buff *skb);
+
 extern int                     neigh_update(struct neighbour *neigh, const u8 
*lladdr, u8 new, 
                                             u32 flags);
 extern void                    neigh_changeaddr(struct neigh_table *tbl, 
struct net_device *dev);

--
Stephen Hemminger <[EMAIL PROTECTED]>


-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to