[dpdk-dev] [PATCH v4 2/6] vhost: rewrite enqueue

2016-09-07 Thread Yuanhan Liu
Hmmm, yet another email didn't send out successfully. Resend.

BTW, please work out v5 on top of the latest next-virtio tree.

Thanks.

--yliu

On Mon, Sep 05, 2016 at 02:39:25PM +0800, Yuanhan Liu wrote:

On Mon, Aug 29, 2016 at 11:36:00PM -0400, Zhihong Wang wrote:
> This patch implements the vhost logic from scratch into a single function
> designed for high performance and better maintainability.
> 
> This is the baseline version of the new code, more optimization will be
> added in the following patches in this patch set.
> 
> ---
> Changes in v4:
> 
>  1. Refactor the code for clearer logic.
> 
>  2. Add PRINT_PACKET for debugging.
> 
> ---
> Changes in v3:
> 
>  1. Rewrite enqueue and delete the obsolete in the same patch.

Change log should go > 

> Signed-off-by: Zhihong Wang 
> ---

... here, after the SoB.

>  lib/librte_vhost/vhost_rxtx.c | 525 
> --
>  1 file changed, 145 insertions(+), 380 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index 5806f99..629e8ae 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t 
> qp_nb)
>   return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
>  }
>  
> -static void
> +static inline void __attribute__((always_inline))
>  virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr 
> *net_hdr)
>  {
>   if (m_buf->ol_flags & PKT_TX_L4_MASK) {
> @@ -112,6 +112,10 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
> virtio_net_hdr *net_hdr)
>   cksum));
>   break;
>   }
> + } else {
> + net_hdr->flags = 0;
> + net_hdr->csum_start = 0;
> + net_hdr->csum_offset = 0;
>   }
>  
>   if (m_buf->ol_flags & PKT_TX_TCP_SEG) {
> @@ -122,437 +126,198 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
> virtio_net_hdr *net_hdr)
>   net_hdr->gso_size = m_buf->tso_segsz;
>   net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
>   + m_buf->l4_len;
> + } else {
> + net_hdr->gso_type = 0;
> + net_hdr->hdr_len = 0;
> + net_hdr->gso_size = 0;
>   }
>  }
>  
> -static inline void
> -copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
> - struct virtio_net_hdr_mrg_rxbuf hdr)
> +static inline void __attribute__((always_inline))
> +update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + uint32_t desc_chain_head, uint32_t desc_chain_len)
>  {
> - if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
> - *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
> - else
> - *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
> + uint32_t used_idx_round = vq->last_used_idx & (vq->size - 1);

I'd suggest to use "used_idx", instead of "used_idx_round".

> +
> + vq->used->ring[used_idx_round].id = desc_chain_head;
> + vq->used->ring[used_idx_round].len = desc_chain_len;
> + vhost_log_used_vring(dev, vq, offsetof(struct vring_used,
> + ring[used_idx_round]),
> + sizeof(vq->used->ring[used_idx_round]));
>  }
>  
> -static inline int __attribute__((always_inline))
> -copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> -   struct rte_mbuf *m, uint16_t desc_idx)
> +static inline uint32_t __attribute__((always_inline))
> +enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + uint16_t avail_idx, struct rte_mbuf *mbuf,
> + uint32_t is_mrg_rxbuf)
>  {
> - uint32_t desc_avail, desc_offset;
> - uint32_t mbuf_avail, mbuf_offset;
> - uint32_t cpy_len;
> + struct virtio_net_hdr_mrg_rxbuf *virtio_hdr;
>   struct vring_desc *desc;
>   uint64_t desc_addr;
> - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> -
> - desc = >desc[desc_idx];
> + uint32_t desc_chain_head;
> + uint32_t desc_chain_len;
> + uint32_t desc_current;
> + uint32_t desc_offset;
> + uint32_t mbuf_len;
> + uint32_t mbuf_avail;
> + uint32_t copy_len;
> + uint32_t extra_buffers = 0;

I'd name it to "num_buffers", to keep consistent with the virito hdr
naming style.

> +
> + /* start with the first mbuf of the packet */
> + mbuf_len = rte_pktmbuf_data_len(mbuf);
> + mbuf_avail = mbuf_len;
> +
> + /* get the current desc */
> + desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)];
> + desc_chain_head = desc_current;
> + desc = >desc[desc_current];
>   desc_addr = gpa_to_vva(dev, desc->addr);
> - /*
> -  * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
> -  * 

[dpdk-dev] [PATCH v4 2/6] vhost: rewrite enqueue

2016-09-07 Thread Wang, Zhihong


> -Original Message-
> From: Yuanhan Liu [mailto:yuanhan.liu at linux.intel.com]
> Sent: Wednesday, September 7, 2016 1:33 PM
> To: Wang, Zhihong 
> Cc: dev at dpdk.org; maxime.coquelin at redhat.com;
> thomas.monjalon at 6wind.com
> Subject: Re: [PATCH v4 2/6] vhost: rewrite enqueue
> 
> Hmmm, yet another email didn't send out successfully. Resend.
> 
> BTW, please work out v5 on top of the latest next-virtio tree.
> 
> Thanks.

Okay. Thanks.

> 
>   --yliu
> 
> On Mon, Sep 05, 2016 at 02:39:25PM +0800, Yuanhan Liu wrote:
> 
> On Mon, Aug 29, 2016 at 11:36:00PM -0400, Zhihong Wang wrote:
> > This patch implements the vhost logic from scratch into a single function
> > designed for high performance and better maintainability.
> >
> > This is the baseline version of the new code, more optimization will be
> > added in the following patches in this patch set.
> >
> > ---
> > Changes in v4:
> >
> >  1. Refactor the code for clearer logic.
> >
> >  2. Add PRINT_PACKET for debugging.
> >
> > ---
> > Changes in v3:
> >
> >  1. Rewrite enqueue and delete the obsolete in the same patch.
> 
> Change log should go >
> 
> > Signed-off-by: Zhihong Wang 
> > ---
> 
> ... here, after the SoB.
> 
> >  lib/librte_vhost/vhost_rxtx.c | 525 
> > -
> -
> >  1 file changed, 145 insertions(+), 380 deletions(-)
> >
> > diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> > index 5806f99..629e8ae 100644
> > --- a/lib/librte_vhost/vhost_rxtx.c
> > +++ b/lib/librte_vhost/vhost_rxtx.c
> > @@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx,
> uint32_t qp_nb)
> > return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
> >  }
> >
> > -static void
> > +static inline void __attribute__((always_inline))
> >  virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr
> *net_hdr)
> >  {
> > if (m_buf->ol_flags & PKT_TX_L4_MASK) {
> > @@ -112,6 +112,10 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf,
> struct virtio_net_hdr *net_hdr)
> > cksum));
> > break;
> > }
> > +   } else {
> > +   net_hdr->flags = 0;
> > +   net_hdr->csum_start = 0;
> > +   net_hdr->csum_offset = 0;
> > }
> >
> > if (m_buf->ol_flags & PKT_TX_TCP_SEG) {
> > @@ -122,437 +126,198 @@ virtio_enqueue_offload(struct rte_mbuf
> *m_buf, struct virtio_net_hdr *net_hdr)
> > net_hdr->gso_size = m_buf->tso_segsz;
> > net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
> > + m_buf->l4_len;
> > +   } else {
> > +   net_hdr->gso_type = 0;
> > +   net_hdr->hdr_len = 0;
> > +   net_hdr->gso_size = 0;
> > }
> >  }
> >
> > -static inline void
> > -copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
> > -   struct virtio_net_hdr_mrg_rxbuf hdr)
> > +static inline void __attribute__((always_inline))
> > +update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
> > +   uint32_t desc_chain_head, uint32_t desc_chain_len)
> >  {
> > -   if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
> > -   *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr =
> hdr;
> > -   else
> > -   *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
> > +   uint32_t used_idx_round = vq->last_used_idx & (vq->size - 1);
> 
> I'd suggest to use "used_idx", instead of "used_idx_round".
> 
> > +
> > +   vq->used->ring[used_idx_round].id = desc_chain_head;
> > +   vq->used->ring[used_idx_round].len = desc_chain_len;
> > +   vhost_log_used_vring(dev, vq, offsetof(struct vring_used,
> > +   ring[used_idx_round]),
> > +   sizeof(vq->used->ring[used_idx_round]));
> >  }
> >
> > -static inline int __attribute__((always_inline))
> > -copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> > - struct rte_mbuf *m, uint16_t desc_idx)
> > +static inline uint32_t __attribute__((always_inline))
> > +enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq,
> > +   uint16_t avail_idx, struct rte_mbuf *mbuf,
> > +   uint32_t is_mrg_rxbuf)
> >  {
> > -   uint32_t desc_avail, desc_offset;
> > -   uint32_t mbuf_avail, mbuf_offset;
> > -   uint32_t cpy_len;
> > +   struct virtio_net_hdr_mrg_rxbuf *virtio_hdr;
> > struct vring_desc *desc;
> > uint64_t desc_addr;
> > -   struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> > -
> > -   desc = >desc[desc_idx];
> > +   uint32_t desc_chain_head;
> > +   uint32_t desc_chain_len;
> > +   uint32_t desc_current;
> > +   uint32_t desc_offset;
> > +   uint32_t mbuf_len;
> > +   uint32_t mbuf_avail;
> > +   uint32_t copy_len;
> > +   uint32_t extra_buffers = 0;
> 
> I'd name it to "num_buffers", to keep consistent with the virito hdr
> naming style.
> 
> > +
> > +   /* start with the first 

[dpdk-dev] [PATCH v4 2/6] vhost: rewrite enqueue

2016-09-05 Thread Yuanhan Liu
On Mon, Aug 29, 2016 at 11:36:00PM -0400, Zhihong Wang wrote:
> This patch implements the vhost logic from scratch into a single function
> designed for high performance and better maintainability.
> 
> This is the baseline version of the new code, more optimization will be
> added in the following patches in this patch set.
> 
> ---
> Changes in v4:
> 
>  1. Refactor the code for clearer logic.
> 
>  2. Add PRINT_PACKET for debugging.
> 
> ---
> Changes in v3:
> 
>  1. Rewrite enqueue and delete the obsolete in the same patch.

Change log should go > 

> Signed-off-by: Zhihong Wang 
> ---

... here, after the SoB.

>  lib/librte_vhost/vhost_rxtx.c | 525 
> --
>  1 file changed, 145 insertions(+), 380 deletions(-)
> 
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index 5806f99..629e8ae 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t 
> qp_nb)
>   return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
>  }
>  
> -static void
> +static inline void __attribute__((always_inline))
>  virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr 
> *net_hdr)
>  {
>   if (m_buf->ol_flags & PKT_TX_L4_MASK) {
> @@ -112,6 +112,10 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
> virtio_net_hdr *net_hdr)
>   cksum));
>   break;
>   }
> + } else {
> + net_hdr->flags = 0;
> + net_hdr->csum_start = 0;
> + net_hdr->csum_offset = 0;
>   }
>  
>   if (m_buf->ol_flags & PKT_TX_TCP_SEG) {
> @@ -122,437 +126,198 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
> virtio_net_hdr *net_hdr)
>   net_hdr->gso_size = m_buf->tso_segsz;
>   net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
>   + m_buf->l4_len;
> + } else {
> + net_hdr->gso_type = 0;
> + net_hdr->hdr_len = 0;
> + net_hdr->gso_size = 0;
>   }
>  }
>  
> -static inline void
> -copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
> - struct virtio_net_hdr_mrg_rxbuf hdr)
> +static inline void __attribute__((always_inline))
> +update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + uint32_t desc_chain_head, uint32_t desc_chain_len)
>  {
> - if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
> - *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
> - else
> - *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
> + uint32_t used_idx_round = vq->last_used_idx & (vq->size - 1);

I'd suggest to use "used_idx", instead of "used_idx_round".

> +
> + vq->used->ring[used_idx_round].id = desc_chain_head;
> + vq->used->ring[used_idx_round].len = desc_chain_len;
> + vhost_log_used_vring(dev, vq, offsetof(struct vring_used,
> + ring[used_idx_round]),
> + sizeof(vq->used->ring[used_idx_round]));
>  }
>  
> -static inline int __attribute__((always_inline))
> -copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> -   struct rte_mbuf *m, uint16_t desc_idx)
> +static inline uint32_t __attribute__((always_inline))
> +enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + uint16_t avail_idx, struct rte_mbuf *mbuf,
> + uint32_t is_mrg_rxbuf)
>  {
> - uint32_t desc_avail, desc_offset;
> - uint32_t mbuf_avail, mbuf_offset;
> - uint32_t cpy_len;
> + struct virtio_net_hdr_mrg_rxbuf *virtio_hdr;
>   struct vring_desc *desc;
>   uint64_t desc_addr;
> - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> -
> - desc = >desc[desc_idx];
> + uint32_t desc_chain_head;
> + uint32_t desc_chain_len;
> + uint32_t desc_current;
> + uint32_t desc_offset;
> + uint32_t mbuf_len;
> + uint32_t mbuf_avail;
> + uint32_t copy_len;
> + uint32_t extra_buffers = 0;

I'd name it to "num_buffers", to keep consistent with the virito hdr
naming style.

> +
> + /* start with the first mbuf of the packet */
> + mbuf_len = rte_pktmbuf_data_len(mbuf);
> + mbuf_avail = mbuf_len;
> +
> + /* get the current desc */
> + desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)];
> + desc_chain_head = desc_current;
> + desc = >desc[desc_current];
>   desc_addr = gpa_to_vva(dev, desc->addr);
> - /*
> -  * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
> -  * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
> -  * otherwise stores offset on the stack instead of in a register.
> -  */
> - if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
> -   

[dpdk-dev] [PATCH v4 2/6] vhost: rewrite enqueue

2016-08-30 Thread Zhihong Wang
This patch implements the vhost logic from scratch into a single function
designed for high performance and better maintainability.

This is the baseline version of the new code, more optimization will be
added in the following patches in this patch set.

---
Changes in v4:

 1. Refactor the code for clearer logic.

 2. Add PRINT_PACKET for debugging.

---
Changes in v3:

 1. Rewrite enqueue and delete the obsolete in the same patch.

Signed-off-by: Zhihong Wang 
---
 lib/librte_vhost/vhost_rxtx.c | 525 --
 1 file changed, 145 insertions(+), 380 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 5806f99..629e8ae 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t 
qp_nb)
return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
 }

-static void
+static inline void __attribute__((always_inline))
 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 {
if (m_buf->ol_flags & PKT_TX_L4_MASK) {
@@ -112,6 +112,10 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
virtio_net_hdr *net_hdr)
cksum));
break;
}
+   } else {
+   net_hdr->flags = 0;
+   net_hdr->csum_start = 0;
+   net_hdr->csum_offset = 0;
}

if (m_buf->ol_flags & PKT_TX_TCP_SEG) {
@@ -122,437 +126,198 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
virtio_net_hdr *net_hdr)
net_hdr->gso_size = m_buf->tso_segsz;
net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
+ m_buf->l4_len;
+   } else {
+   net_hdr->gso_type = 0;
+   net_hdr->hdr_len = 0;
+   net_hdr->gso_size = 0;
}
 }

-static inline void
-copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
-   struct virtio_net_hdr_mrg_rxbuf hdr)
+static inline void __attribute__((always_inline))
+update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+   uint32_t desc_chain_head, uint32_t desc_chain_len)
 {
-   if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
-   *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
-   else
-   *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
+   uint32_t used_idx_round = vq->last_used_idx & (vq->size - 1);
+
+   vq->used->ring[used_idx_round].id = desc_chain_head;
+   vq->used->ring[used_idx_round].len = desc_chain_len;
+   vhost_log_used_vring(dev, vq, offsetof(struct vring_used,
+   ring[used_idx_round]),
+   sizeof(vq->used->ring[used_idx_round]));
 }

-static inline int __attribute__((always_inline))
-copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
- struct rte_mbuf *m, uint16_t desc_idx)
+static inline uint32_t __attribute__((always_inline))
+enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq,
+   uint16_t avail_idx, struct rte_mbuf *mbuf,
+   uint32_t is_mrg_rxbuf)
 {
-   uint32_t desc_avail, desc_offset;
-   uint32_t mbuf_avail, mbuf_offset;
-   uint32_t cpy_len;
+   struct virtio_net_hdr_mrg_rxbuf *virtio_hdr;
struct vring_desc *desc;
uint64_t desc_addr;
-   struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
-
-   desc = >desc[desc_idx];
+   uint32_t desc_chain_head;
+   uint32_t desc_chain_len;
+   uint32_t desc_current;
+   uint32_t desc_offset;
+   uint32_t mbuf_len;
+   uint32_t mbuf_avail;
+   uint32_t copy_len;
+   uint32_t extra_buffers = 0;
+
+   /* start with the first mbuf of the packet */
+   mbuf_len = rte_pktmbuf_data_len(mbuf);
+   mbuf_avail = mbuf_len;
+
+   /* get the current desc */
+   desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)];
+   desc_chain_head = desc_current;
+   desc = >desc[desc_current];
desc_addr = gpa_to_vva(dev, desc->addr);
-   /*
-* Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
-* performance issue with some versions of gcc (4.8.4 and 5.3.0) which
-* otherwise stores offset on the stack instead of in a register.
-*/
-   if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
-   return -1;
+   if (unlikely(!desc_addr))
+   goto error;

-   rte_prefetch0((void *)(uintptr_t)desc_addr);
+   /* handle virtio header */
+   virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr;
+   virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr));
+   if (is_mrg_rxbuf)
+   virtio_hdr->num_buffers = extra_buffers +