[dpdk-dev] [PATCH v5 5/6] vhost: batch update used ring

2016-09-18 Thread Yuanhan Liu
On Thu, Sep 15, 2016 at 06:38:06PM +0200, Maxime Coquelin wrote:
> >>>+static inline void __attribute__((always_inline))
> >>>+flush_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
> >>>+  uint32_t used_idx_start)
> >>>+{
> >>>+  if (used_idx_start + vq->shadow_used_idx < vq->size) {
> >>>+  rte_memcpy(>used->ring[used_idx_start],
> >>>+  >shadow_used_ring[0],
> >>>+  vq->shadow_used_idx *
> >>>+  sizeof(struct vring_used_elem));
> >>>+  vhost_log_used_vring(dev, vq,
> >>>+  offsetof(struct vring_used,
> >>>+  ring[used_idx_start]),
> >>>+  vq->shadow_used_idx *
> >>>+  sizeof(struct vring_used_elem));
> >>>+  } else {
> >>>+  uint32_t part_1 = vq->size - used_idx_start;
> >>>+  uint32_t part_2 = vq->shadow_used_idx - part_1;
> >>>+
> >>>+  rte_memcpy(>used->ring[used_idx_start],
> >>>+  >shadow_used_ring[0],
> >>>+  part_1 *
> >>>+  sizeof(struct vring_used_elem));
> >>>+  vhost_log_used_vring(dev, vq,
> >>>+  offsetof(struct vring_used,
> >>>+  ring[used_idx_start]),
> >>>+  part_1 *
> >>>+  sizeof(struct vring_used_elem));
> >>>+  rte_memcpy(>used->ring[0],
> >>>+  >shadow_used_ring[part_1],
> >>>+  part_2 *
> >>>+  sizeof(struct vring_used_elem));
> >>>+  vhost_log_used_vring(dev, vq,
> >>>+  offsetof(struct vring_used,
> >>>+  ring[0]),
> >>>+  part_2 *
> >>>+  sizeof(struct vring_used_elem));
> >>>+  }
> >>> }
> >>Is expanding the code done for performance purpose?
> >
> >Hi Maxime,
> >
> >Yes theoretically this has the least branch number.
> >And I think the logic is simpler this way.
> Ok, in that case, maybe you could create a function to
> do the rte_memcpy and the vhost_log_used on a given range.

Agreed, that will be better; it could avoid repeating similar code
block 3 times.

> I don't have a strong opinion on this, if Yuanhan is fine
> with current code, that's ok for me.

>From what I know, that's kind of DPDK prefered way, to expand code
when necessary. For example, 9ec201f5d6e7 ("mbuf: provide bulk
allocation").

So I'm fine with it.

--yliu


[dpdk-dev] [PATCH v5 5/6] vhost: batch update used ring

2016-09-18 Thread Wang, Zhihong


> -Original Message-
> From: Yuanhan Liu [mailto:yuanhan.liu at linux.intel.com]
> Sent: Sunday, September 18, 2016 10:56 AM
> To: Maxime Coquelin 
> Cc: Wang, Zhihong ; dev at dpdk.org;
> thomas.monjalon at 6wind.com
> Subject: Re: [PATCH v5 5/6] vhost: batch update used ring
> 
> On Thu, Sep 15, 2016 at 06:38:06PM +0200, Maxime Coquelin wrote:
> > >>>+static inline void __attribute__((always_inline))
> > >>>+flush_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
> > >>>+uint32_t used_idx_start)
> > >>>+{
> > >>>+if (used_idx_start + vq->shadow_used_idx < vq->size) {
> > >>>+rte_memcpy(>used->ring[used_idx_start],
> > >>>+>shadow_used_ring[0],
> > >>>+vq->shadow_used_idx *
> > >>>+sizeof(struct vring_used_elem));
> > >>>+vhost_log_used_vring(dev, vq,
> > >>>+offsetof(struct vring_used,
> > >>>+ring[used_idx_start]),
> > >>>+vq->shadow_used_idx *
> > >>>+sizeof(struct vring_used_elem));
> > >>>+} else {
> > >>>+uint32_t part_1 = vq->size - used_idx_start;
> > >>>+uint32_t part_2 = vq->shadow_used_idx - part_1;
> > >>>+
> > >>>+rte_memcpy(>used->ring[used_idx_start],
> > >>>+>shadow_used_ring[0],
> > >>>+part_1 *
> > >>>+sizeof(struct vring_used_elem));
> > >>>+vhost_log_used_vring(dev, vq,
> > >>>+offsetof(struct vring_used,
> > >>>+ring[used_idx_start]),
> > >>>+part_1 *
> > >>>+sizeof(struct vring_used_elem));
> > >>>+rte_memcpy(>used->ring[0],
> > >>>+>shadow_used_ring[part_1],
> > >>>+part_2 *
> > >>>+sizeof(struct vring_used_elem));
> > >>>+vhost_log_used_vring(dev, vq,
> > >>>+offsetof(struct vring_used,
> > >>>+ring[0]),
> > >>>+part_2 *
> > >>>+sizeof(struct vring_used_elem));
> > >>>+}
> > >>> }
> > >>Is expanding the code done for performance purpose?
> > >
> > >Hi Maxime,
> > >
> > >Yes theoretically this has the least branch number.
> > >And I think the logic is simpler this way.
> > Ok, in that case, maybe you could create a function to
> > do the rte_memcpy and the vhost_log_used on a given range.
> 
> Agreed, that will be better; it could avoid repeating similar code
> block 3 times.

Okay. Thanks for the suggestion, Maxime and Yuanhan.

> 
> > I don't have a strong opinion on this, if Yuanhan is fine
> > with current code, that's ok for me.
> 
> From what I know, that's kind of DPDK prefered way, to expand code
> when necessary. For example, 9ec201f5d6e7 ("mbuf: provide bulk
> allocation").
> 
> So I'm fine with it.
> 
>   --yliu


[dpdk-dev] [PATCH v5 5/6] vhost: batch update used ring

2016-09-15 Thread Maxime Coquelin


On 09/14/2016 10:43 AM, Wang, Zhihong wrote:
>
>
>> -Original Message-
>> From: Maxime Coquelin [mailto:maxime.coquelin at redhat.com]
>> Sent: Monday, September 12, 2016 11:46 PM
>> To: Wang, Zhihong ; dev at dpdk.org
>> Cc: yuanhan.liu at linux.intel.com; thomas.monjalon at 6wind.com
>> Subject: Re: [PATCH v5 5/6] vhost: batch update used ring
>>
>>
>>
>> On 09/09/2016 05:39 AM, Zhihong Wang wrote:
>>> This patch enables batch update of the used ring for better efficiency.
>>>
>>> Signed-off-by: Zhihong Wang 
>>> ---
>>> Changes in v4:
>>>
>>>  1. Free shadow used ring in the right place.
>>>
>>>  2. Add failure check for shadow used ring malloc.
>>>
>>>  lib/librte_vhost/vhost.c  | 20 --
>>>  lib/librte_vhost/vhost.h  |  4 +++
>>>  lib/librte_vhost/vhost_user.c | 31 +
>>>  lib/librte_vhost/virtio_net.c | 64
>> +++
>>>  4 files changed, 101 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
>>> index 46095c3..cb31cdd 100644
>>> --- a/lib/librte_vhost/vhost.c
>>> +++ b/lib/librte_vhost/vhost.c
>>> @@ -119,10 +119,26 @@ cleanup_device(struct virtio_net *dev, int
>> destroy)
>>>  static void
>>>  free_device(struct virtio_net *dev)
>>>  {
>>> +   struct vhost_virtqueue *vq_0;
>>> +   struct vhost_virtqueue *vq_1;
>>> uint32_t i;
>>>
>>> -   for (i = 0; i < dev->virt_qp_nb; i++)
>>> -   rte_free(dev->virtqueue[i * VIRTIO_QNUM]);
>>> +   for (i = 0; i < dev->virt_qp_nb; i++) {
>>> +   vq_0 = dev->virtqueue[i * VIRTIO_QNUM];
>>> +   if (vq_0->shadow_used_ring) {
>>> +   rte_free(vq_0->shadow_used_ring);
>>> +   vq_0->shadow_used_ring = NULL;
>>> +   }
>>> +
>>> +   vq_1 = dev->virtqueue[i * VIRTIO_QNUM + 1];
>>> +   if (vq_1->shadow_used_ring) {
>>> +   rte_free(vq_1->shadow_used_ring);
>>> +   vq_1->shadow_used_ring = NULL;
>>> +   }
>>> +
>>> +   /* malloc together, free together */
>>> +   rte_free(vq_0);
>>> +   }
>>>
>>> rte_free(dev);
>>>  }
>>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>>> index 9707dfc..381dc27 100644
>>> --- a/lib/librte_vhost/vhost.h
>>> +++ b/lib/librte_vhost/vhost.h
>>> @@ -85,6 +85,10 @@ struct vhost_virtqueue {
>>>
>>> /* Physical address of used ring, for logging */
>>> uint64_tlog_guest_addr;
>>> +
>>> +   /* Shadow used ring for performance */
>>> +   struct vring_used_elem  *shadow_used_ring;
>>> +   uint32_tshadow_used_idx;
>>>  } __rte_cache_aligned;
>>>
>>>  /* Old kernels have no such macro defined */
>>> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>>> index eee99e9..d7cf1ed 100644
>>> --- a/lib/librte_vhost/vhost_user.c
>>> +++ b/lib/librte_vhost/vhost_user.c
>>> @@ -193,7 +193,21 @@ static int
>>>  vhost_user_set_vring_num(struct virtio_net *dev,
>>>  struct vhost_vring_state *state)
>>>  {
>>> -   dev->virtqueue[state->index]->size = state->num;
>>> +   struct vhost_virtqueue *vq;
>>> +
>>> +   vq = dev->virtqueue[state->index];
>>> +   vq->size = state->num;
>>> +   if (!vq->shadow_used_ring) {
>>> +   vq->shadow_used_ring = rte_malloc(NULL,
>>> +   vq->size * sizeof(struct vring_used_elem),
>>> +   RTE_CACHE_LINE_SIZE);
>>> +   if (!vq->shadow_used_ring) {
>>> +   RTE_LOG(ERR, VHOST_CONFIG,
>>> +   "Failed to allocate memory"
>>> +   " for shadow used ring.\n");
>>> +   return -1;
>>> +   }
>>> +   }
>>>
>>> return 0;
>>>  }
>>> @@ -611,14 +625,21 @@ static int
>>>  vhost_user_get_vring_base(struct virtio_net *dev,
>>>   struct vhost_vring_state *state)
>>>  {
>>> +   struct vhost_virtqueue *vq;
>>> +
>>> /* We have to stop the queue (virtio) if it is running. */
>>> if (dev->flags & VIRTIO_DEV_RUNNING) {
>>> dev->flags &= ~VIRTIO_DEV_RUNNING;
>>> notify_ops->destroy_device(dev->vid);
>>> }
>>>
>>> +   vq = dev->virtqueue[state->index];
>>> /* Here we are safe to get the last used index */
>>> -   state->num = dev->virtqueue[state->index]->last_used_idx;
>>> +   state->num = vq->last_used_idx;
>>> +   if (vq->shadow_used_ring) {
>>> +   rte_free(vq->shadow_used_ring);
>>> +   vq->shadow_used_ring = NULL;
>>> +   }
>>>
>>> RTE_LOG(INFO, VHOST_CONFIG,
>>> "vring base idx:%d file:%d\n", state->index, state->num);
>>> @@ -627,10 +648,10 @@ vhost_user_get_vring_base(struct virtio_net
>> *dev,
>>>  * sent and only sent in vhost_vring_stop.
>>>  * TODO: cleanup the vring, it isn't usable since here.
>>>  */
>>> -   if (dev->virtqueue[state->index]->kickfd >= 0)
>>> -   close(dev->virtqueue[state->index]->kickfd);
>>> 

[dpdk-dev] [PATCH v5 5/6] vhost: batch update used ring

2016-09-14 Thread Wang, Zhihong


> -Original Message-
> From: Maxime Coquelin [mailto:maxime.coquelin at redhat.com]
> Sent: Monday, September 12, 2016 11:46 PM
> To: Wang, Zhihong ; dev at dpdk.org
> Cc: yuanhan.liu at linux.intel.com; thomas.monjalon at 6wind.com
> Subject: Re: [PATCH v5 5/6] vhost: batch update used ring
> 
> 
> 
> On 09/09/2016 05:39 AM, Zhihong Wang wrote:
> > This patch enables batch update of the used ring for better efficiency.
> >
> > Signed-off-by: Zhihong Wang 
> > ---
> > Changes in v4:
> >
> >  1. Free shadow used ring in the right place.
> >
> >  2. Add failure check for shadow used ring malloc.
> >
> >  lib/librte_vhost/vhost.c  | 20 --
> >  lib/librte_vhost/vhost.h  |  4 +++
> >  lib/librte_vhost/vhost_user.c | 31 +
> >  lib/librte_vhost/virtio_net.c | 64
> +++
> >  4 files changed, 101 insertions(+), 18 deletions(-)
> >
> > diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> > index 46095c3..cb31cdd 100644
> > --- a/lib/librte_vhost/vhost.c
> > +++ b/lib/librte_vhost/vhost.c
> > @@ -119,10 +119,26 @@ cleanup_device(struct virtio_net *dev, int
> destroy)
> >  static void
> >  free_device(struct virtio_net *dev)
> >  {
> > +   struct vhost_virtqueue *vq_0;
> > +   struct vhost_virtqueue *vq_1;
> > uint32_t i;
> >
> > -   for (i = 0; i < dev->virt_qp_nb; i++)
> > -   rte_free(dev->virtqueue[i * VIRTIO_QNUM]);
> > +   for (i = 0; i < dev->virt_qp_nb; i++) {
> > +   vq_0 = dev->virtqueue[i * VIRTIO_QNUM];
> > +   if (vq_0->shadow_used_ring) {
> > +   rte_free(vq_0->shadow_used_ring);
> > +   vq_0->shadow_used_ring = NULL;
> > +   }
> > +
> > +   vq_1 = dev->virtqueue[i * VIRTIO_QNUM + 1];
> > +   if (vq_1->shadow_used_ring) {
> > +   rte_free(vq_1->shadow_used_ring);
> > +   vq_1->shadow_used_ring = NULL;
> > +   }
> > +
> > +   /* malloc together, free together */
> > +   rte_free(vq_0);
> > +   }
> >
> > rte_free(dev);
> >  }
> > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> > index 9707dfc..381dc27 100644
> > --- a/lib/librte_vhost/vhost.h
> > +++ b/lib/librte_vhost/vhost.h
> > @@ -85,6 +85,10 @@ struct vhost_virtqueue {
> >
> > /* Physical address of used ring, for logging */
> > uint64_tlog_guest_addr;
> > +
> > +   /* Shadow used ring for performance */
> > +   struct vring_used_elem  *shadow_used_ring;
> > +   uint32_tshadow_used_idx;
> >  } __rte_cache_aligned;
> >
> >  /* Old kernels have no such macro defined */
> > diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> > index eee99e9..d7cf1ed 100644
> > --- a/lib/librte_vhost/vhost_user.c
> > +++ b/lib/librte_vhost/vhost_user.c
> > @@ -193,7 +193,21 @@ static int
> >  vhost_user_set_vring_num(struct virtio_net *dev,
> >  struct vhost_vring_state *state)
> >  {
> > -   dev->virtqueue[state->index]->size = state->num;
> > +   struct vhost_virtqueue *vq;
> > +
> > +   vq = dev->virtqueue[state->index];
> > +   vq->size = state->num;
> > +   if (!vq->shadow_used_ring) {
> > +   vq->shadow_used_ring = rte_malloc(NULL,
> > +   vq->size * sizeof(struct vring_used_elem),
> > +   RTE_CACHE_LINE_SIZE);
> > +   if (!vq->shadow_used_ring) {
> > +   RTE_LOG(ERR, VHOST_CONFIG,
> > +   "Failed to allocate memory"
> > +   " for shadow used ring.\n");
> > +   return -1;
> > +   }
> > +   }
> >
> > return 0;
> >  }
> > @@ -611,14 +625,21 @@ static int
> >  vhost_user_get_vring_base(struct virtio_net *dev,
> >   struct vhost_vring_state *state)
> >  {
> > +   struct vhost_virtqueue *vq;
> > +
> > /* We have to stop the queue (virtio) if it is running. */
> > if (dev->flags & VIRTIO_DEV_RUNNING) {
> > dev->flags &= ~VIRTIO_DEV_RUNNING;
> > notify_ops->destroy_device(dev->vid);
> > }
> >
> > +   vq = dev->virtqueue[state->index];
> > /* Here we are safe to get the last used index */
> > -   state->num = dev->virtqueue[state->index]->last_used_idx;
> > +   state->num = vq->last_used_idx;
> > +   if (vq->shadow_used_ring) {
> > +   rte_free(vq->shadow_used_ring);
> > +   vq->shadow_used_ring = NULL;
> > +   }
> >
> > RTE_LOG(INFO, VHOST_CONFIG,
> > "vring base idx:%d file:%d\n", state->index, state->num);
> > @@ -627,10 +648,10 @@ vhost_user_get_vring_base(struct virtio_net
> *dev,
> >  * sent and only sent in vhost_vring_stop.
> >  * TODO: cleanup the vring, it isn't usable since here.
> >  */
> > -   if (dev->virtqueue[state->index]->kickfd >= 0)
> > -   close(dev->virtqueue[state->index]->kickfd);
> > +   if (vq->kickfd >= 0)
> > +   

[dpdk-dev] [PATCH v5 5/6] vhost: batch update used ring

2016-09-12 Thread Maxime Coquelin


On 09/09/2016 05:39 AM, Zhihong Wang wrote:
> This patch enables batch update of the used ring for better efficiency.
>
> Signed-off-by: Zhihong Wang 
> ---
> Changes in v4:
>
>  1. Free shadow used ring in the right place.
>
>  2. Add failure check for shadow used ring malloc.
>
>  lib/librte_vhost/vhost.c  | 20 --
>  lib/librte_vhost/vhost.h  |  4 +++
>  lib/librte_vhost/vhost_user.c | 31 +
>  lib/librte_vhost/virtio_net.c | 64 
> +++
>  4 files changed, 101 insertions(+), 18 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 46095c3..cb31cdd 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -119,10 +119,26 @@ cleanup_device(struct virtio_net *dev, int destroy)
>  static void
>  free_device(struct virtio_net *dev)
>  {
> + struct vhost_virtqueue *vq_0;
> + struct vhost_virtqueue *vq_1;
>   uint32_t i;
>
> - for (i = 0; i < dev->virt_qp_nb; i++)
> - rte_free(dev->virtqueue[i * VIRTIO_QNUM]);
> + for (i = 0; i < dev->virt_qp_nb; i++) {
> + vq_0 = dev->virtqueue[i * VIRTIO_QNUM];
> + if (vq_0->shadow_used_ring) {
> + rte_free(vq_0->shadow_used_ring);
> + vq_0->shadow_used_ring = NULL;
> + }
> +
> + vq_1 = dev->virtqueue[i * VIRTIO_QNUM + 1];
> + if (vq_1->shadow_used_ring) {
> + rte_free(vq_1->shadow_used_ring);
> + vq_1->shadow_used_ring = NULL;
> + }
> +
> + /* malloc together, free together */
> + rte_free(vq_0);
> + }
>
>   rte_free(dev);
>  }
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 9707dfc..381dc27 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -85,6 +85,10 @@ struct vhost_virtqueue {
>
>   /* Physical address of used ring, for logging */
>   uint64_tlog_guest_addr;
> +
> + /* Shadow used ring for performance */
> + struct vring_used_elem  *shadow_used_ring;
> + uint32_tshadow_used_idx;
>  } __rte_cache_aligned;
>
>  /* Old kernels have no such macro defined */
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index eee99e9..d7cf1ed 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -193,7 +193,21 @@ static int
>  vhost_user_set_vring_num(struct virtio_net *dev,
>struct vhost_vring_state *state)
>  {
> - dev->virtqueue[state->index]->size = state->num;
> + struct vhost_virtqueue *vq;
> +
> + vq = dev->virtqueue[state->index];
> + vq->size = state->num;
> + if (!vq->shadow_used_ring) {
> + vq->shadow_used_ring = rte_malloc(NULL,
> + vq->size * sizeof(struct vring_used_elem),
> + RTE_CACHE_LINE_SIZE);
> + if (!vq->shadow_used_ring) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "Failed to allocate memory"
> + " for shadow used ring.\n");
> + return -1;
> + }
> + }
>
>   return 0;
>  }
> @@ -611,14 +625,21 @@ static int
>  vhost_user_get_vring_base(struct virtio_net *dev,
> struct vhost_vring_state *state)
>  {
> + struct vhost_virtqueue *vq;
> +
>   /* We have to stop the queue (virtio) if it is running. */
>   if (dev->flags & VIRTIO_DEV_RUNNING) {
>   dev->flags &= ~VIRTIO_DEV_RUNNING;
>   notify_ops->destroy_device(dev->vid);
>   }
>
> + vq = dev->virtqueue[state->index];
>   /* Here we are safe to get the last used index */
> - state->num = dev->virtqueue[state->index]->last_used_idx;
> + state->num = vq->last_used_idx;
> + if (vq->shadow_used_ring) {
> + rte_free(vq->shadow_used_ring);
> + vq->shadow_used_ring = NULL;
> + }
>
>   RTE_LOG(INFO, VHOST_CONFIG,
>   "vring base idx:%d file:%d\n", state->index, state->num);
> @@ -627,10 +648,10 @@ vhost_user_get_vring_base(struct virtio_net *dev,
>* sent and only sent in vhost_vring_stop.
>* TODO: cleanup the vring, it isn't usable since here.
>*/
> - if (dev->virtqueue[state->index]->kickfd >= 0)
> - close(dev->virtqueue[state->index]->kickfd);
> + if (vq->kickfd >= 0)
> + close(vq->kickfd);
>
> - dev->virtqueue[state->index]->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
> + vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
>
>   return 0;
>  }
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index b38f18f..e9f6353 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -134,17 +134,52 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
> 

[dpdk-dev] [PATCH v5 5/6] vhost: batch update used ring

2016-09-09 Thread Zhihong Wang
This patch enables batch update of the used ring for better efficiency.

Signed-off-by: Zhihong Wang 
---
Changes in v4:

 1. Free shadow used ring in the right place.

 2. Add failure check for shadow used ring malloc.

 lib/librte_vhost/vhost.c  | 20 --
 lib/librte_vhost/vhost.h  |  4 +++
 lib/librte_vhost/vhost_user.c | 31 +
 lib/librte_vhost/virtio_net.c | 64 +++
 4 files changed, 101 insertions(+), 18 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 46095c3..cb31cdd 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -119,10 +119,26 @@ cleanup_device(struct virtio_net *dev, int destroy)
 static void
 free_device(struct virtio_net *dev)
 {
+   struct vhost_virtqueue *vq_0;
+   struct vhost_virtqueue *vq_1;
uint32_t i;

-   for (i = 0; i < dev->virt_qp_nb; i++)
-   rte_free(dev->virtqueue[i * VIRTIO_QNUM]);
+   for (i = 0; i < dev->virt_qp_nb; i++) {
+   vq_0 = dev->virtqueue[i * VIRTIO_QNUM];
+   if (vq_0->shadow_used_ring) {
+   rte_free(vq_0->shadow_used_ring);
+   vq_0->shadow_used_ring = NULL;
+   }
+
+   vq_1 = dev->virtqueue[i * VIRTIO_QNUM + 1];
+   if (vq_1->shadow_used_ring) {
+   rte_free(vq_1->shadow_used_ring);
+   vq_1->shadow_used_ring = NULL;
+   }
+
+   /* malloc together, free together */
+   rte_free(vq_0);
+   }

rte_free(dev);
 }
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 9707dfc..381dc27 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -85,6 +85,10 @@ struct vhost_virtqueue {

/* Physical address of used ring, for logging */
uint64_tlog_guest_addr;
+
+   /* Shadow used ring for performance */
+   struct vring_used_elem  *shadow_used_ring;
+   uint32_tshadow_used_idx;
 } __rte_cache_aligned;

 /* Old kernels have no such macro defined */
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index eee99e9..d7cf1ed 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -193,7 +193,21 @@ static int
 vhost_user_set_vring_num(struct virtio_net *dev,
 struct vhost_vring_state *state)
 {
-   dev->virtqueue[state->index]->size = state->num;
+   struct vhost_virtqueue *vq;
+
+   vq = dev->virtqueue[state->index];
+   vq->size = state->num;
+   if (!vq->shadow_used_ring) {
+   vq->shadow_used_ring = rte_malloc(NULL,
+   vq->size * sizeof(struct vring_used_elem),
+   RTE_CACHE_LINE_SIZE);
+   if (!vq->shadow_used_ring) {
+   RTE_LOG(ERR, VHOST_CONFIG,
+   "Failed to allocate memory"
+   " for shadow used ring.\n");
+   return -1;
+   }
+   }

return 0;
 }
@@ -611,14 +625,21 @@ static int
 vhost_user_get_vring_base(struct virtio_net *dev,
  struct vhost_vring_state *state)
 {
+   struct vhost_virtqueue *vq;
+
/* We have to stop the queue (virtio) if it is running. */
if (dev->flags & VIRTIO_DEV_RUNNING) {
dev->flags &= ~VIRTIO_DEV_RUNNING;
notify_ops->destroy_device(dev->vid);
}

+   vq = dev->virtqueue[state->index];
/* Here we are safe to get the last used index */
-   state->num = dev->virtqueue[state->index]->last_used_idx;
+   state->num = vq->last_used_idx;
+   if (vq->shadow_used_ring) {
+   rte_free(vq->shadow_used_ring);
+   vq->shadow_used_ring = NULL;
+   }

RTE_LOG(INFO, VHOST_CONFIG,
"vring base idx:%d file:%d\n", state->index, state->num);
@@ -627,10 +648,10 @@ vhost_user_get_vring_base(struct virtio_net *dev,
 * sent and only sent in vhost_vring_stop.
 * TODO: cleanup the vring, it isn't usable since here.
 */
-   if (dev->virtqueue[state->index]->kickfd >= 0)
-   close(dev->virtqueue[state->index]->kickfd);
+   if (vq->kickfd >= 0)
+   close(vq->kickfd);

-   dev->virtqueue[state->index]->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
+   vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;

return 0;
 }
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index b38f18f..e9f6353 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -134,17 +134,52 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
virtio_net_hdr *net_hdr)
 }

 static inline void __attribute__((always_inline))
-update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
-