This patch reorders the code to delay virtio header write to optimize cache
access efficiency for cases where the mrg_rxbuf feature is turned on. It
reduces CPU pipeline stall cycles significantly.


Signed-off-by: Zhihong Wang <zhihong.wang at intel.com>
---
 lib/librte_vhost/vhost_rxtx.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 60d63d3..15f7f9c 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -154,6 +154,7 @@ enqueue_packet(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
        uint32_t mbuf_len = 0;
        uint32_t mbuf_len_left = 0;
        uint32_t copy_len = 0;
+       uint32_t copy_virtio_hdr = 0;
        uint32_t extra_buffers = 0;

        /* start with the first mbuf of the packet */
@@ -168,18 +169,17 @@ enqueue_packet(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
        if (unlikely(!desc_host_write_addr))
                goto error;

-       /* handle virtio header */
+       /*
+        * handle virtio header, the actual write operation
+        * is delayed for cache optimization.
+        */
        virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)
                (uintptr_t)desc_host_write_addr;
-       memset((void *)(uintptr_t)&(virtio_hdr->hdr),
-                       0, dev->vhost_hlen);
-       virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr));
+       copy_virtio_hdr = 1;
        vhost_log_write(dev, desc->addr, dev->vhost_hlen);
        desc_write_offset = dev->vhost_hlen;
        desc_chain_len = desc_write_offset;
        desc_host_write_addr += desc_write_offset;
-       if (is_mrg_rxbuf)
-               virtio_hdr->num_buffers = 1;

        /* start copy from mbuf to desc */
        while (1) {
@@ -233,9 +233,18 @@ enqueue_packet(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
                                goto rollback;
                }

-               /* copy mbuf data */
+               /* copy virtio header and mbuf data */
                copy_len = RTE_MIN(desc->len - desc_write_offset,
                                mbuf_len_left);
+               if (copy_virtio_hdr) {
+                       copy_virtio_hdr = 0;
+                       memset((void *)(uintptr_t)&(virtio_hdr->hdr),
+                                       0, dev->vhost_hlen);
+                       virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr));
+                       if (is_mrg_rxbuf)
+                               virtio_hdr->num_buffers = extra_buffers + 1;
+               }
+
                rte_memcpy((void *)(uintptr_t)desc_host_write_addr,
                                rte_pktmbuf_mtod_offset(mbuf, void *,
                                        mbuf_len - mbuf_len_left),
-- 
2.7.4

Reply via email to