This patch reorders the code to delay virtio header write to optimize cache access efficiency for cases where the mrg_rxbuf feature is turned on. It reduces CPU pipeline stall cycles significantly.
--- Changes in v3: 1. Remove unnecessary memset which causes frontend stall on SNB & IVB. 2. Rename variables to follow naming convention. Signed-off-by: Zhihong Wang <zhihong.wang at intel.com> --- lib/librte_vhost/vhost_rxtx.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c index c4abaf1..e3ba4e0 100644 --- a/lib/librte_vhost/vhost_rxtx.c +++ b/lib/librte_vhost/vhost_rxtx.c @@ -154,6 +154,7 @@ enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq, uint32_t mbuf_len = 0; uint32_t mbuf_avail = 0; uint32_t copy_len = 0; + uint32_t copy_virtio_hdr = 0; uint32_t extra_buffers = 0; /* start with the first mbuf of the packet */ @@ -168,15 +169,16 @@ enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq, if (unlikely(!desc_addr)) goto error; - /* handle virtio header */ + /* + * handle virtio header, the actual write operation + * is delayed for cache optimization. + */ virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr; - virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr)); + copy_virtio_hdr = 1; vhost_log_write(dev, desc->addr, dev->vhost_hlen); desc_offset = dev->vhost_hlen; desc_chain_len = desc_offset; desc_addr += desc_offset; - if (is_mrg_rxbuf) - virtio_hdr->num_buffers = 1; /* start copy from mbuf to desc */ while (1) { @@ -228,8 +230,15 @@ enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq, goto rollback; } - /* copy mbuf data */ + /* copy virtio header and mbuf data */ copy_len = RTE_MIN(desc->len - desc_offset, mbuf_avail); + if (copy_virtio_hdr) { + copy_virtio_hdr = 0; + virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr)); + if (is_mrg_rxbuf) + virtio_hdr->num_buffers = extra_buffers + 1; + } + rte_memcpy((void *)(uintptr_t)desc_addr, rte_pktmbuf_mtod_offset(mbuf, void *, mbuf_len - mbuf_avail), -- 2.7.4