[dpdk-dev] [PATCH v3 1/5] vhost: rewrite enqueue

2016-08-25 Thread Yuanhan Liu
On Mon, Aug 22, 2016 at 11:35:47AM +0200, Maxime Coquelin wrote:
> >-virtio_enqueue_offload(m, _hdr.hdr);
> >-copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
> >+/* handle virtio header */
> >+virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr;
> >+virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr));
> Parenthesis around virtio_hdr->hdr shouldn't be needed.
> > vhost_log_write(dev, desc->addr, dev->vhost_hlen);
> >-PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
> Looks like you remove the PRINT_PACKET calls.
> Does it impact performance?

Yes, it does. But it's only enabled for debug mode. Besides that,
it's just a NOOP.

> In any case, it should be mentionned in the commit message.

Agreed. But for this case, we should not remove it: it breaks the
debug-ability.

--yliu


[dpdk-dev] [PATCH v3 1/5] vhost: rewrite enqueue

2016-08-23 Thread Wang, Zhihong
Hi Maxime,

Thanks very much for the detailed review.

> -Original Message-
> From: Maxime Coquelin [mailto:maxime.coquelin at redhat.com]
> Sent: Monday, August 22, 2016 5:36 PM
> To: Wang, Zhihong ; dev at dpdk.org
> Cc: yuanhan.liu at linux.intel.com
> Subject: Re: [PATCH v3 1/5] vhost: rewrite enqueue
> 
> 
> 
> On 08/19/2016 07:43 AM, Zhihong Wang wrote:
> > This patch implements the vhost logic from scratch into a single function
> > designed for high performance and better maintainability.
> >
> > ---
> > Changes in v3:
> >
> >  1. Rewrite enqueue and delete the obsolete in the same patch.
> >
> > Signed-off-by: Zhihong Wang 
> > ---
> >  lib/librte_vhost/vhost_rxtx.c | 537 
> > +-
> >  1 file changed, 160 insertions(+), 377 deletions(-)
> >
> > diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> > index 08a73fd..b09a9c3 100644
> > --- a/lib/librte_vhost/vhost_rxtx.c
> > +++ b/lib/librte_vhost/vhost_rxtx.c
> > @@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t
> qp_nb)
> > return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
> >  }
> >
> > -static void
> > +static inline void __attribute__((always_inline))
> >  virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr
> *net_hdr)
> >  {
> > if (m_buf->ol_flags & PKT_TX_L4_MASK) {
> > @@ -125,427 +125,210 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf,
> struct virtio_net_hdr *net_hdr)
> > }
> >  }
> >
> > -static inline void
> > -copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
> > -   struct virtio_net_hdr_mrg_rxbuf hdr)
> > +static inline uint32_t __attribute__((always_inline))
> > +loop_check(struct vhost_virtqueue *vq, uint16_t avail_idx, uint32_t 
> > pkt_left)
> Creating a function just for doing this doesn't make much sense.
> And the function name doesn't help.
> I think you should just remove this function.

Okay.

> 
> >  {
> > -   if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
> > -   *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
> > -   else
> > -   *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
> > +   if (pkt_left == 0 || avail_idx == vq->last_used_idx)
> > +   return 1;
> > +
> > +   return 0;
> >  }
> >
> > -static inline int __attribute__((always_inline))
> > -copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> > - struct rte_mbuf *m, uint16_t desc_idx)
> > +static inline uint32_t __attribute__((always_inline))
> > +enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq,
> > +   uint16_t avail_idx, struct rte_mbuf *mbuf,
> > +   uint32_t is_mrg_rxbuf)
> >  {
> > -   uint32_t desc_avail, desc_offset;
> > -   uint32_t mbuf_avail, mbuf_offset;
> > -   uint32_t cpy_len;
> > +   struct virtio_net_hdr_mrg_rxbuf *virtio_hdr;
> > struct vring_desc *desc;
> > -   uint64_t desc_addr;
> > -   struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> > -
> > -   desc = >desc[desc_idx];
> > +   uint64_t desc_addr = 0;
> > +   uint32_t desc_chain_head = 0;
> > +   uint32_t desc_chain_len = 0;
> > +   uint32_t desc_current = 0;
> > +   uint32_t desc_offset = 0;
> > +   uint32_t mbuf_len = 0;
> > +   uint32_t mbuf_avail = 0;
> > +   uint32_t copy_len = 0;
> > +   uint32_t extra_buffers = 0;
> > +   uint32_t used_idx_round = 0;
> Most of these variables don't need to be initialized.

Okay.

> 
> > +
> > +   /* start with the first mbuf of the packet */
> > +   mbuf_len = rte_pktmbuf_data_len(mbuf);
> > +   mbuf_avail = mbuf_len;
> > +
> > +   /* get the current desc */
> > +   desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)];
> > +   desc_chain_head = desc_current;
> > +   desc = >desc[desc_current];
> > desc_addr = gpa_to_vva(dev, desc->addr);
> > -   /*
> > -* Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
> > -* performance issue with some versions of gcc (4.8.4 and 5.3.0) which
> > -* otherwise stores offset on the stack instead of in a register.
> > -*/
> > -   if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
> > -   return -1;
> > -
> > -   rte_prefetch0((void *)(uintptr_t)desc_addr);
> > +   if (unlikely(!desc_addr))
> > +   goto error;
> >
> > -   virtio_enqueue_offload(m, _hdr.hdr);
> > -   copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
> > +   /* handle virtio header */
> > +   virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr;
> > +   virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr));
> Parenthesis around virtio_hdr->hdr shouldn't be needed.
> > vhost_log_write(dev, desc->addr, dev->vhost_hlen);
> > -   PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
> Looks like you remove the PRINT_PACKET calls.
> Does it impact performance?
> In any case, it should be mentionned in the commit message.

Will add this.

> 
> > -
> >  

[dpdk-dev] [PATCH v3 1/5] vhost: rewrite enqueue

2016-08-22 Thread Maxime Coquelin


On 08/19/2016 07:43 AM, Zhihong Wang wrote:
> This patch implements the vhost logic from scratch into a single function
> designed for high performance and better maintainability.
>
> ---
> Changes in v3:
>
>  1. Rewrite enqueue and delete the obsolete in the same patch.
>
> Signed-off-by: Zhihong Wang 
> ---
>  lib/librte_vhost/vhost_rxtx.c | 537 
> +-
>  1 file changed, 160 insertions(+), 377 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
> index 08a73fd..b09a9c3 100644
> --- a/lib/librte_vhost/vhost_rxtx.c
> +++ b/lib/librte_vhost/vhost_rxtx.c
> @@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t 
> qp_nb)
>   return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
>  }
>
> -static void
> +static inline void __attribute__((always_inline))
>  virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr 
> *net_hdr)
>  {
>   if (m_buf->ol_flags & PKT_TX_L4_MASK) {
> @@ -125,427 +125,210 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
> virtio_net_hdr *net_hdr)
>   }
>  }
>
> -static inline void
> -copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
> - struct virtio_net_hdr_mrg_rxbuf hdr)
> +static inline uint32_t __attribute__((always_inline))
> +loop_check(struct vhost_virtqueue *vq, uint16_t avail_idx, uint32_t pkt_left)
Creating a function just for doing this doesn't make much sense.
And the function name doesn't help.
I think you should just remove this function.

>  {
> - if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
> - *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
> - else
> - *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
> + if (pkt_left == 0 || avail_idx == vq->last_used_idx)
> + return 1;
> +
> + return 0;
>  }
>
> -static inline int __attribute__((always_inline))
> -copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> -   struct rte_mbuf *m, uint16_t desc_idx)
> +static inline uint32_t __attribute__((always_inline))
> +enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + uint16_t avail_idx, struct rte_mbuf *mbuf,
> + uint32_t is_mrg_rxbuf)
>  {
> - uint32_t desc_avail, desc_offset;
> - uint32_t mbuf_avail, mbuf_offset;
> - uint32_t cpy_len;
> + struct virtio_net_hdr_mrg_rxbuf *virtio_hdr;
>   struct vring_desc *desc;
> - uint64_t desc_addr;
> - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> -
> - desc = >desc[desc_idx];
> + uint64_t desc_addr = 0;
> + uint32_t desc_chain_head = 0;
> + uint32_t desc_chain_len = 0;
> + uint32_t desc_current = 0;
> + uint32_t desc_offset = 0;
> + uint32_t mbuf_len = 0;
> + uint32_t mbuf_avail = 0;
> + uint32_t copy_len = 0;
> + uint32_t extra_buffers = 0;
> + uint32_t used_idx_round = 0;
Most of these variables don't need to be initialized.

> +
> + /* start with the first mbuf of the packet */
> + mbuf_len = rte_pktmbuf_data_len(mbuf);
> + mbuf_avail = mbuf_len;
> +
> + /* get the current desc */
> + desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)];
> + desc_chain_head = desc_current;
> + desc = >desc[desc_current];
>   desc_addr = gpa_to_vva(dev, desc->addr);
> - /*
> -  * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
> -  * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
> -  * otherwise stores offset on the stack instead of in a register.
> -  */
> - if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
> - return -1;
> -
> - rte_prefetch0((void *)(uintptr_t)desc_addr);
> + if (unlikely(!desc_addr))
> + goto error;
>
> - virtio_enqueue_offload(m, _hdr.hdr);
> - copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
> + /* handle virtio header */
> + virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr;
> + virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr));
Parenthesis around virtio_hdr->hdr shouldn't be needed.
>   vhost_log_write(dev, desc->addr, dev->vhost_hlen);
> - PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
Looks like you remove the PRINT_PACKET calls.
Does it impact performance?
In any case, it should be mentionned in the commit message.

> -
>   desc_offset = dev->vhost_hlen;
> - desc_avail  = desc->len - dev->vhost_hlen;
> + desc_chain_len = desc_offset;
> + desc_addr += desc_offset;
> + if (is_mrg_rxbuf)
> + virtio_hdr->num_buffers = 1;
>
> - mbuf_avail  = rte_pktmbuf_data_len(m);
> - mbuf_offset = 0;
> - while (mbuf_avail != 0 || m->next != NULL) {
> - /* done with current mbuf, fetch next */
> - if (mbuf_avail == 0) {
> - 

[dpdk-dev] [PATCH v3 1/5] vhost: rewrite enqueue

2016-08-19 Thread Zhihong Wang
This patch implements the vhost logic from scratch into a single function
designed for high performance and better maintainability.

---
Changes in v3:

 1. Rewrite enqueue and delete the obsolete in the same patch.

Signed-off-by: Zhihong Wang 
---
 lib/librte_vhost/vhost_rxtx.c | 537 +-
 1 file changed, 160 insertions(+), 377 deletions(-)

diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 08a73fd..b09a9c3 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t 
qp_nb)
return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
 }

-static void
+static inline void __attribute__((always_inline))
 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 {
if (m_buf->ol_flags & PKT_TX_L4_MASK) {
@@ -125,427 +125,210 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
virtio_net_hdr *net_hdr)
}
 }

-static inline void
-copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
-   struct virtio_net_hdr_mrg_rxbuf hdr)
+static inline uint32_t __attribute__((always_inline))
+loop_check(struct vhost_virtqueue *vq, uint16_t avail_idx, uint32_t pkt_left)
 {
-   if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
-   *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
-   else
-   *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
+   if (pkt_left == 0 || avail_idx == vq->last_used_idx)
+   return 1;
+
+   return 0;
 }

-static inline int __attribute__((always_inline))
-copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
- struct rte_mbuf *m, uint16_t desc_idx)
+static inline uint32_t __attribute__((always_inline))
+enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq,
+   uint16_t avail_idx, struct rte_mbuf *mbuf,
+   uint32_t is_mrg_rxbuf)
 {
-   uint32_t desc_avail, desc_offset;
-   uint32_t mbuf_avail, mbuf_offset;
-   uint32_t cpy_len;
+   struct virtio_net_hdr_mrg_rxbuf *virtio_hdr;
struct vring_desc *desc;
-   uint64_t desc_addr;
-   struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
-
-   desc = >desc[desc_idx];
+   uint64_t desc_addr = 0;
+   uint32_t desc_chain_head = 0;
+   uint32_t desc_chain_len = 0;
+   uint32_t desc_current = 0;
+   uint32_t desc_offset = 0;
+   uint32_t mbuf_len = 0;
+   uint32_t mbuf_avail = 0;
+   uint32_t copy_len = 0;
+   uint32_t extra_buffers = 0;
+   uint32_t used_idx_round = 0;
+
+   /* start with the first mbuf of the packet */
+   mbuf_len = rte_pktmbuf_data_len(mbuf);
+   mbuf_avail = mbuf_len;
+
+   /* get the current desc */
+   desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)];
+   desc_chain_head = desc_current;
+   desc = >desc[desc_current];
desc_addr = gpa_to_vva(dev, desc->addr);
-   /*
-* Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
-* performance issue with some versions of gcc (4.8.4 and 5.3.0) which
-* otherwise stores offset on the stack instead of in a register.
-*/
-   if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
-   return -1;
-
-   rte_prefetch0((void *)(uintptr_t)desc_addr);
+   if (unlikely(!desc_addr))
+   goto error;

-   virtio_enqueue_offload(m, _hdr.hdr);
-   copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
+   /* handle virtio header */
+   virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr;
+   virtio_enqueue_offload(mbuf, &(virtio_hdr->hdr));
vhost_log_write(dev, desc->addr, dev->vhost_hlen);
-   PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
-
desc_offset = dev->vhost_hlen;
-   desc_avail  = desc->len - dev->vhost_hlen;
+   desc_chain_len = desc_offset;
+   desc_addr += desc_offset;
+   if (is_mrg_rxbuf)
+   virtio_hdr->num_buffers = 1;

-   mbuf_avail  = rte_pktmbuf_data_len(m);
-   mbuf_offset = 0;
-   while (mbuf_avail != 0 || m->next != NULL) {
-   /* done with current mbuf, fetch next */
-   if (mbuf_avail == 0) {
-   m = m->next;
-
-   mbuf_offset = 0;
-   mbuf_avail  = rte_pktmbuf_data_len(m);
+   /* start copy from mbuf to desc */
+   while (1) {
+   /* get the next mbuf if the current done */
+   if (!mbuf_avail) {
+   if (mbuf->next) {
+   mbuf = mbuf->next;
+   mbuf_len = rte_pktmbuf_data_len(mbuf);
+   mbuf_avail = mbuf_len;
+   } else
+