From: Shirley Ma mashi...@us.ibm.com
virtio_net receives packets from its pre-allocated vring buffers, then it
delivers these packets to upper layer protocols as skb buffs. So it's not
necessary to pre-allocate skb for each mergable buffer, then frees extra
skbs when buffers are merged into a large packet. This patch has deferred
skb allocation in receiving packets for both big packets and mergeable buffers
to reduce skb pre-allocations and skb frees. It frees unused buffers by calling
detach_unused_buf in vring, so recv skb queue is not needed.
Signed-off-by: Shirley Ma x...@us.ibm.com
Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---
drivers/net/virtio_net.c | 427 +++
1 file changed, 248 insertions(+), 179 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c708ecc..72b3f21 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -56,8 +56,7 @@ struct virtnet_info
/* Host will merge rx buffers for big packets (shake it! shake it!) */
bool mergeable_rx_bufs;
- /* Receive send queues. */
- struct sk_buff_head recv;
+ /* Send queue. */
struct sk_buff_head send;
/* Work struct for refilling if we run low on memory. */
@@ -75,34 +74,44 @@ struct skb_vnet_hdr {
unsigned int num_sg;
};
+struct padded_vnet_hdr {
+ struct virtio_net_hdr hdr;
+ /*
+* virtio_net_hdr should be in a separated sg buffer because of a
+* QEMU bug, and data sg buffer shares same page with this header sg.
+* This padding makes next sg 16 byte aligned after virtio_net_hdr.
+*/
+ char padding[6];
+};
+
static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
{
return (struct skb_vnet_hdr *)skb-cb;
}
-static void give_a_page(struct virtnet_info *vi, struct page *page)
-{
- page-private = (unsigned long)vi-pages;
- vi-pages = page;
-}
-
-static void trim_pages(struct virtnet_info *vi, struct sk_buff *skb)
+/*
+ * private is used to chain pages for big packets, put the whole
+ * most recent used list in the beginning for reuse
+ */
+static void give_pages(struct virtnet_info *vi, struct page *page)
{
- unsigned int i;
+ struct page *end;
- for (i = 0; i skb_shinfo(skb)-nr_frags; i++)
- give_a_page(vi, skb_shinfo(skb)-frags[i].page);
- skb_shinfo(skb)-nr_frags = 0;
- skb-data_len = 0;
+ /* Find end of list, sew whole thing into vi-pages. */
+ for (end = page; end-private; end = (struct page *)end-private);
+ end-private = (unsigned long)vi-pages;
+ vi-pages = page;
}
static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
{
struct page *p = vi-pages;
- if (p)
+ if (p) {
vi-pages = (struct page *)p-private;
- else
+ /* clear private here, it is used to chain pages */
+ p-private = 0;
+ } else
p = alloc_page(gfp_mask);
return p;
}
@@ -118,99 +127,142 @@ static void skb_xmit_done(struct virtqueue *svq)
netif_wake_queue(vi-dev);
}
-static void receive_skb(struct net_device *dev, struct sk_buff *skb,
- unsigned len)
+static void set_skb_frag(struct sk_buff *skb, struct page *page,
+unsigned int offset, unsigned int *len)
{
- struct virtnet_info *vi = netdev_priv(dev);
- struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
- int err;
- int i;
-
- if (unlikely(len sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
- pr_debug(%s: short packet %i\n, dev-name, len);
- dev-stats.rx_length_errors++;
- goto drop;
- }
+ int i = skb_shinfo(skb)-nr_frags;
+ skb_frag_t *f;
+
+ f = skb_shinfo(skb)-frags[i];
+ f-size = min((unsigned)PAGE_SIZE - offset, *len);
+ f-page_offset = offset;
+ f-page = page;
+
+ skb-data_len += f-size;
+ skb-len += f-size;
+ skb_shinfo(skb)-nr_frags++;
+ *len -= f-size;
+}
- if (vi-mergeable_rx_bufs) {
- unsigned int copy;
- char *p = page_address(skb_shinfo(skb)-frags[0].page);
+static struct sk_buff *page_to_skb(struct virtnet_info *vi,
+ struct page *page, unsigned int len)
+{
+ struct sk_buff *skb;
+ struct skb_vnet_hdr *hdr;
+ unsigned int copy, hdr_len, offset;
+ char *p;
- if (len PAGE_SIZE)
- len = PAGE_SIZE;
- len -= sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ p = page_address(page);
- memcpy(hdr-mhdr, p, sizeof(hdr-mhdr));
- p += sizeof(hdr-mhdr);
+ /* copy small packet so we can reuse these pages for small data */
+ skb = netdev_alloc_skb_ip_align(vi-dev, GOOD_COPY_LEN);
+ if (unlikely(!skb))
+