[dpdk-dev] [PATCH v5 2/6] vhost: rewrite enqueue

2016-09-15 Thread Maxime Coquelin
Hi,
On 09/14/2016 10:20 AM, Wang, Zhihong wrote:
>>> +   desc_current =
>>> +   vq->avail->ring[(vq->last_used_idx)
>> &
>>> +   (vq->size - 1)];
>>> +   desc_chain_head = desc_current;
>>> +   desc = >desc[desc_current];
>>> +   desc_addr = gpa_to_vva(dev, desc->addr);
>>> +   if (unlikely(!desc_addr))
>>> +   goto error;
>>>
>>> -   desc = >desc[desc->next];
>>> -   desc_addr = gpa_to_vva(dev, desc->addr);
>>> -   if (unlikely(!desc_addr))
>>> -   return -1;
>>> -
>>> -   desc_offset = 0;
>>> -   desc_avail  = desc->len;
>>> +   desc_chain_len = 0;
>>> +   desc_offset = 0;
>> As I commented on v3, there is code duplication between next flag, and
>> mrg buf cases:
>> desc_offset = 0;
>>
>> and:
>>
>> desc = >desc[desc_current];
>> desc_addr = gpa_to_vva(dev, desc->addr);
>> if (unlikely(!desc_addr))
>>  goto error;
>>
>
> Do you mean to add something like:
>
> static inline int __attribute__((always_inline))
> get_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> uint32_t desc_idx, struct vring_desc **desc,
> uint64_t *desc_addr)
> {
> *desc = >desc[desc_idx];
> *desc_addr = gpa_to_vva(dev, (*desc)->addr);
> if (unlikely(!(*desc_addr)))
> return -1;
>
> return 0;
> }

I meant, move this code after the if/else.
You can do it in a function if it is done elsewhere in the file.



[dpdk-dev] [PATCH v5 2/6] vhost: rewrite enqueue

2016-09-14 Thread Wang, Zhihong


> -Original Message-
> From: Maxime Coquelin [mailto:maxime.coquelin at redhat.com]
> Sent: Tuesday, September 13, 2016 12:27 AM
> To: Wang, Zhihong ; dev at dpdk.org
> Cc: yuanhan.liu at linux.intel.com; thomas.monjalon at 6wind.com
> Subject: Re: [PATCH v5 2/6] vhost: rewrite enqueue
> 
> 
> 
> On 09/09/2016 05:39 AM, Zhihong Wang wrote:
> >
> > +static inline void __attribute__((always_inline))
> > +notify_guest(struct virtio_net *dev, struct vhost_virtqueue *vq)
> > +{
> > rte_smp_wmb();
> > -
> > -   *(volatile uint16_t *)>used->idx += count;
> > -   vq->last_used_idx += count;
> > -   vhost_log_used_vring(dev, vq,
> > -   offsetof(struct vring_used, idx),
> > -   sizeof(vq->used->idx));
> > -
> > -   /* flush used->idx update before we read avail->flags. */
> Please don't remove comments if not justified.
> Here the comment is important, as it explains why the barrier is needed.

Okay.

> > +   *(volatile uint16_t *)>used->idx = vq->last_used_idx;
> > +   vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
> > +   sizeof(vq->used->idx));
> > rte_mb();
> > -
> > -   /* Kick the guest if necessary. */
> > if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
> > && (vq->callfd >= 0))
> > eventfd_write(vq->callfd, (eventfd_t)1);
> > -   return count;
> >  }


[dpdk-dev] [PATCH v5 2/6] vhost: rewrite enqueue

2016-09-14 Thread Wang, Zhihong
> > +   desc_current =
> > +   vq->avail->ring[(vq->last_used_idx)
> &
> > +   (vq->size - 1)];
> > +   desc_chain_head = desc_current;
> > +   desc = >desc[desc_current];
> > +   desc_addr = gpa_to_vva(dev, desc->addr);
> > +   if (unlikely(!desc_addr))
> > +   goto error;
> >
> > -   desc = >desc[desc->next];
> > -   desc_addr = gpa_to_vva(dev, desc->addr);
> > -   if (unlikely(!desc_addr))
> > -   return -1;
> > -
> > -   desc_offset = 0;
> > -   desc_avail  = desc->len;
> > +   desc_chain_len = 0;
> > +   desc_offset = 0;
> As I commented on v3, there is code duplication between next flag, and
> mrg buf cases:
> desc_offset = 0;
> 
> and:
> 
> desc = >desc[desc_current];
> desc_addr = gpa_to_vva(dev, desc->addr);
> if (unlikely(!desc_addr))
>  goto error;
> 

Do you mean to add something like:

static inline int __attribute__((always_inline))
get_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint32_t desc_idx, struct vring_desc **desc,
uint64_t *desc_addr)
{
*desc = >desc[desc_idx];
*desc_addr = gpa_to_vva(dev, (*desc)->addr);
if (unlikely(!(*desc_addr)))
return -1;

return 0;
}


> Regards,
> Maxime


[dpdk-dev] [PATCH v5 2/6] vhost: rewrite enqueue

2016-09-12 Thread Maxime Coquelin


On 09/09/2016 05:39 AM, Zhihong Wang wrote:
>
> +static inline void __attribute__((always_inline))
> +notify_guest(struct virtio_net *dev, struct vhost_virtqueue *vq)
> +{
>   rte_smp_wmb();
> -
> - *(volatile uint16_t *)>used->idx += count;
> - vq->last_used_idx += count;
> - vhost_log_used_vring(dev, vq,
> - offsetof(struct vring_used, idx),
> - sizeof(vq->used->idx));
> -
> - /* flush used->idx update before we read avail->flags. */
Please don't remove comments if not justified.
Here the comment is important, as it explains why the barrier is needed.
> + *(volatile uint16_t *)>used->idx = vq->last_used_idx;
> + vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
> + sizeof(vq->used->idx));
>   rte_mb();
> -
> - /* Kick the guest if necessary. */
>   if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
>   && (vq->callfd >= 0))
>   eventfd_write(vq->callfd, (eventfd_t)1);
> - return count;
>  }


[dpdk-dev] [PATCH v5 2/6] vhost: rewrite enqueue

2016-09-12 Thread Maxime Coquelin
Hi,

On 09/09/2016 05:39 AM, Zhihong Wang wrote:
> This patch implements the vhost logic from scratch into a single function
> designed for high performance and better maintainability.
>
> This is the baseline version of the new code, more optimization will be
> added in the following patches in this patch set.
>
> Signed-off-by: Zhihong Wang 
> ---
> Changes in v5:
>
>  1. Rebase to the latest branch.
>
>  2. Rename variables to keep consistent in naming style.
>
>  3. Small changes like return value adjustment and vertical alignment.
>
> ---
> Changes in v4:
>
>  1. Refactor the code for clearer logic.
>
>  2. Add PRINT_PACKET for debugging.
>
> ---
> Changes in v3:
>
>  1. Rewrite enqueue and delete the obsolete in the same patch.
>
>  lib/librte_vhost/virtio_net.c | 514 
> --
>  1 file changed, 138 insertions(+), 376 deletions(-)
>
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index 0d6e7d9..6f63968 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t 
> qp_nb)
>   return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
>  }
>
> -static void
> +static inline void __attribute__((always_inline))
>  virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr 
> *net_hdr)
>  {
>   if (m_buf->ol_flags & PKT_TX_L4_MASK) {
> @@ -112,6 +112,10 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
> virtio_net_hdr *net_hdr)
>   cksum));
>   break;
>   }
> + } else {
> + net_hdr->flags   = 0;
> + net_hdr->csum_start  = 0;
> + net_hdr->csum_offset = 0;
>   }
>
>   if (m_buf->ol_flags & PKT_TX_TCP_SEG) {
> @@ -122,439 +126,197 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
> virtio_net_hdr *net_hdr)
>   net_hdr->gso_size = m_buf->tso_segsz;
>   net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
>   + m_buf->l4_len;
> + } else {
> + net_hdr->gso_type = 0;
> + net_hdr->hdr_len  = 0;
> + net_hdr->gso_size = 0;
>   }
>  }
>
> -static inline void
> -copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
> - struct virtio_net_hdr_mrg_rxbuf hdr)
> +static inline void __attribute__((always_inline))
> +update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + uint32_t desc_chain_head, uint32_t desc_chain_len)
>  {
> - if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
> - *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
> - else
> - *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
> + uint32_t used_idx = vq->last_used_idx & (vq->size - 1);
> +
> + vq->used->ring[used_idx].id = desc_chain_head;
> + vq->used->ring[used_idx].len = desc_chain_len;
> + vq->last_used_idx++;
> + vhost_log_used_vring(dev, vq, offsetof(struct vring_used,
> + ring[used_idx]),
> + sizeof(vq->used->ring[used_idx]));
>  }
>
>  static inline int __attribute__((always_inline))
> -copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> -   struct rte_mbuf *m, uint16_t desc_idx)
> +enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + uint16_t avail_idx, struct rte_mbuf *mbuf,
> + uint32_t is_mrg_rxbuf)
>  {
> - uint32_t desc_avail, desc_offset;
> - uint32_t mbuf_avail, mbuf_offset;
> - uint32_t cpy_len;
> + struct virtio_net_hdr_mrg_rxbuf *virtio_hdr;
>   struct vring_desc *desc;
>   uint64_t desc_addr;
> - struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
> + uint32_t desc_chain_head;
> + uint32_t desc_chain_len;
> + uint32_t desc_current;
> + uint32_t desc_offset;
> + uint32_t mbuf_len;
> + uint32_t mbuf_avail;
> + uint32_t cpy_len;
> + uint32_t num_buffers = 0;
>
> - desc = >desc[desc_idx];
> + /* start with the first mbuf of the packet */
> + mbuf_len = rte_pktmbuf_data_len(mbuf);
> + mbuf_avail = mbuf_len;
> +
> + /* get the current desc */
> + desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)];
> + desc_chain_head = desc_current;
> + desc = >desc[desc_current];
>   desc_addr = gpa_to_vva(dev, desc->addr);
> - /*
> -  * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
> -  * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
> -  * otherwise stores offset on the stack instead of in a register.
> -  */
> - if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
> - return -1;
> + if (unlikely(!desc_addr))
> + goto error;
>
> - 

[dpdk-dev] [PATCH v5 2/6] vhost: rewrite enqueue

2016-09-09 Thread Zhihong Wang
This patch implements the vhost logic from scratch into a single function
designed for high performance and better maintainability.

This is the baseline version of the new code, more optimization will be
added in the following patches in this patch set.

Signed-off-by: Zhihong Wang 
---
Changes in v5:

 1. Rebase to the latest branch.

 2. Rename variables to keep consistent in naming style.

 3. Small changes like return value adjustment and vertical alignment.

---
Changes in v4:

 1. Refactor the code for clearer logic.

 2. Add PRINT_PACKET for debugging.

---
Changes in v3:

 1. Rewrite enqueue and delete the obsolete in the same patch.

 lib/librte_vhost/virtio_net.c | 514 --
 1 file changed, 138 insertions(+), 376 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 0d6e7d9..6f63968 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t 
qp_nb)
return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
 }

-static void
+static inline void __attribute__((always_inline))
 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 {
if (m_buf->ol_flags & PKT_TX_L4_MASK) {
@@ -112,6 +112,10 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
virtio_net_hdr *net_hdr)
cksum));
break;
}
+   } else {
+   net_hdr->flags   = 0;
+   net_hdr->csum_start  = 0;
+   net_hdr->csum_offset = 0;
}

if (m_buf->ol_flags & PKT_TX_TCP_SEG) {
@@ -122,439 +126,197 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
virtio_net_hdr *net_hdr)
net_hdr->gso_size = m_buf->tso_segsz;
net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
+ m_buf->l4_len;
+   } else {
+   net_hdr->gso_type = 0;
+   net_hdr->hdr_len  = 0;
+   net_hdr->gso_size = 0;
}
 }

-static inline void
-copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
-   struct virtio_net_hdr_mrg_rxbuf hdr)
+static inline void __attribute__((always_inline))
+update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+   uint32_t desc_chain_head, uint32_t desc_chain_len)
 {
-   if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
-   *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
-   else
-   *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
+   uint32_t used_idx = vq->last_used_idx & (vq->size - 1);
+
+   vq->used->ring[used_idx].id = desc_chain_head;
+   vq->used->ring[used_idx].len = desc_chain_len;
+   vq->last_used_idx++;
+   vhost_log_used_vring(dev, vq, offsetof(struct vring_used,
+   ring[used_idx]),
+   sizeof(vq->used->ring[used_idx]));
 }

 static inline int __attribute__((always_inline))
-copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
- struct rte_mbuf *m, uint16_t desc_idx)
+enqueue_packet(struct virtio_net *dev, struct vhost_virtqueue *vq,
+   uint16_t avail_idx, struct rte_mbuf *mbuf,
+   uint32_t is_mrg_rxbuf)
 {
-   uint32_t desc_avail, desc_offset;
-   uint32_t mbuf_avail, mbuf_offset;
-   uint32_t cpy_len;
+   struct virtio_net_hdr_mrg_rxbuf *virtio_hdr;
struct vring_desc *desc;
uint64_t desc_addr;
-   struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
+   uint32_t desc_chain_head;
+   uint32_t desc_chain_len;
+   uint32_t desc_current;
+   uint32_t desc_offset;
+   uint32_t mbuf_len;
+   uint32_t mbuf_avail;
+   uint32_t cpy_len;
+   uint32_t num_buffers = 0;

-   desc = >desc[desc_idx];
+   /* start with the first mbuf of the packet */
+   mbuf_len = rte_pktmbuf_data_len(mbuf);
+   mbuf_avail = mbuf_len;
+
+   /* get the current desc */
+   desc_current = vq->avail->ring[(vq->last_used_idx) & (vq->size - 1)];
+   desc_chain_head = desc_current;
+   desc = >desc[desc_current];
desc_addr = gpa_to_vva(dev, desc->addr);
-   /*
-* Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
-* performance issue with some versions of gcc (4.8.4 and 5.3.0) which
-* otherwise stores offset on the stack instead of in a register.
-*/
-   if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
-   return -1;
+   if (unlikely(!desc_addr))
+   goto error;

-   rte_prefetch0((void *)(uintptr_t)desc_addr);
+   /* handle virtio header */
+   virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr;
+