It allows a newly allocated skb to reuse the gref taken from the
pending_ring, which means xennet will grant the pages once and release
them only when freeing the device. It changes how netfront handles news
skbs to be able to reuse the allocated pages similarly to how netback
is already doing for the netback TX path.
alloc_rx_buffers() will consume pages from the pending_ring to
allocate new skbs. When responses are handled we will move the grants
from the grant_rx to the pending_grants. The latter is a shadow ring
that keeps all grants belonging to inflight skbs. Finally chaining
all skbs ubuf_info together to finally pass the packet up to the
network stack. We make use of SKBTX_DEV_ZEROCOPY to get notified
once the skb is freed to be able to reuse pages. On the destructor
callback we will then add the grant to the pending_ring.
The only catch about this approach is: when we orphan frags, there
will be a memcpy on skb_copy_ubufs() (if frags bigger than 0).
Depending on the CPU and number of queues this leads to a performance
drop of between 7-11%. For this reason, SKBTX_DEV_ZEROCOPY skbs will
only be used with persistent grants.
Signed-off-by: Joao Martins
---
drivers/net/xen-netfront.c | 212 ++---
1 file changed, 202 insertions(+), 10 deletions(-)
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index ae0a13b..7067bbb 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -67,6 +67,7 @@ static const struct ethtool_ops xennet_ethtool_ops;
struct netfront_cb {
int pull_to;
+ u16 pending_idx;
};
#define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
@@ -87,9 +88,13 @@ struct netfront_cb {
/* IRQ name is queue name with "-tx" or "-rx" appended */
#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
+#define callback_param(queue, id) \
+ (queue->pending_grants[id].callback_struct)
+
struct grant {
grant_ref_t ref;
struct page *page;
+ struct ubuf_info callback_struct;
};
struct netfront_stats {
@@ -146,6 +151,21 @@ struct netfront_queue {
struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
grant_ref_t gref_rx_head;
struct grant grant_rx[NET_RX_RING_SIZE];
+
+ /* Store the grants inflight or freed.
+* Only used when persistent grants are enabled
+*/
+ struct grant pending_grants[NET_RX_RING_SIZE];
+ /* Ring containing the indexes of the free grants */
+ u16 pending_ring[NET_RX_RING_SIZE];
+ unsigned pending_cons;
+ unsigned pending_prod;
+ /* Used to represent how many grants are still inflight */
+ unsigned pending_event;
+
+ /* Protects zerocopy callbacks to race over pending_ring */
+ spinlock_t callback_lock;
+ atomic_t inflight_packets;
};
struct netfront_info {
@@ -296,6 +316,50 @@ static void release_grant(grant_ref_t ref,
gnttab_release_grant_reference(gref_head, ref);
}
+static struct grant *xennet_get_pending_gnt(struct netfront_queue *queue,
+ unsigned ri)
+{
+ int pending_idx = xennet_rxidx(ri);
+ u16 id = queue->pending_ring[pending_idx];
+
+ return &queue->pending_grants[id];
+}
+
+static void xennet_set_pending_gnt(struct netfront_queue *queue,
+ grant_ref_t ref, struct sk_buff *skb)
+{
+ int i = xennet_rxidx(queue->pending_event++);
+ struct grant *gnt = &queue->pending_grants[i];
+
+ gnt->ref = ref;
+ gnt->page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
+ NETFRONT_SKB_CB(skb)->pending_idx = gnt->callback_struct.desc;
+}
+
+static bool pending_grant_available(struct netfront_queue *queue)
+{
+ return (queue->pending_prod - queue->pending_cons);
+}
+
+static struct page *xennet_alloc_page(struct netfront_queue *queue,
+ struct netfront_cb *cb)
+{
+ struct page *page;
+ struct grant *gnt;
+
+ if (!queue->info->feature_persistent)
+ return alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+
+ if (unlikely(!pending_grant_available(queue)))
+ return NULL;
+
+ gnt = xennet_get_pending_gnt(queue, queue->pending_cons++);
+ cb->pending_idx = gnt - queue->pending_grants;
+ page = gnt->page;
+ gnt->page = NULL;
+ return page;
+}
+
static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
{
struct sk_buff *skb;
@@ -307,7 +371,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct
netfront_queue *queue)
if (unlikely(!skb))
return NULL;
- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ page = xennet_alloc_page(queue, NETFRONT_SKB_CB(skb));
if (!page) {
kfree_skb(skb);
return NULL;
@@ -317,6 +381,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct
netfront_queue *queue)
/* Align ip he