In __alloc_skb(), the use of skb_shinfo() which casts a u8 * to the shared info structure results in gcc being forced to do a reload of the pointer since it has no information on possible aliasing. Fix this by using a pointer to refer to skb_shared_info.
By initializing skb_shared_info sequentially, the write combining buffers can reduce the number of memory transactions to a single write. Reorder the initialization in __alloc_skb() to match the structure definition. There is also an alignment issue on 64 bit systems with skb_shared_info by converting nr_frags to a short everything packs up nicely. Also, pass the slab cache pointer according to the fclone flag instead of using two almost identical function calls. This raises bw_unix performance up to a peak of 707KB/s when combined with the spinlock patch. It should help other networking protocols, too. -ben diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 97f6580..d94a02e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -134,7 +134,7 @@ struct skb_frag_struct { */ struct skb_shared_info { atomic_t dataref; - unsigned int nr_frags; + unsigned short nr_frags; unsigned short tso_size; unsigned short tso_segs; unsigned short ufo_size; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 83fee37..25c4728 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -135,17 +135,13 @@ void skb_under_panic(struct sk_buff *skb struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, int fclone) { + struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; /* Get the HEAD */ - if (fclone) - skb = kmem_cache_alloc(skbuff_fclone_cache, - gfp_mask & ~__GFP_DMA); - else - skb = kmem_cache_alloc(skbuff_head_cache, - gfp_mask & ~__GFP_DMA); - + skb = kmem_cache_alloc(fclone ? skbuff_fclone_cache : skbuff_head_cache, + gfp_mask & ~__GFP_DMA); if (!skb) goto out; @@ -162,6 +158,16 @@ struct sk_buff *__alloc_skb(unsigned int skb->data = data; skb->tail = data; skb->end = data + size; + /* make sure we initialize shinfo sequentially */ + shinfo = skb_shinfo(skb); + atomic_set(&shinfo->dataref, 1); + shinfo->nr_frags = 0; + shinfo->tso_size = 0; + shinfo->tso_segs = 0; + shinfo->ufo_size = 0; + shinfo->ip6_frag_id = 0; + shinfo->frag_list = NULL; + if (fclone) { struct sk_buff *child = skb + 1; atomic_t *fclone_ref = (atomic_t *) (child + 1); @@ -171,13 +177,6 @@ struct sk_buff *__alloc_skb(unsigned int child->fclone = SKB_FCLONE_UNAVAILABLE; } - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->tso_size = 0; - skb_shinfo(skb)->tso_segs = 0; - skb_shinfo(skb)->frag_list = NULL; - skb_shinfo(skb)->ufo_size = 0; - skb_shinfo(skb)->ip6_frag_id = 0; out: return skb; nodata: -- "You know, I've seen some crystals do some pretty trippy shit, man." Don't Email: <[EMAIL PROTECTED]>. - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html