Re: [PATCH V2 3/3] vhost_net: basic polling support

2016-01-21 Thread Jason Wang


On 01/20/2016 10:35 PM, Michael S. Tsirkin wrote:
> On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote:
>> This patch tries to poll for new added tx buffer or socket receive
>> queue for a while at the end of tx/rx processing. The maximum time
>> spent on polling were specified through a new kind of vring ioctl.
>>
>> Signed-off-by: Jason Wang 
>> ---
>>  drivers/vhost/net.c| 72 
>> ++
>>  drivers/vhost/vhost.c  | 15 ++
>>  drivers/vhost/vhost.h  |  1 +
>>  include/uapi/linux/vhost.h | 11 +++
>>  4 files changed, 94 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>> index 9eda69e..ce6da77 100644
>> --- a/drivers/vhost/net.c
>> +++ b/drivers/vhost/net.c
>> @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
>> *ubuf, bool success)
>>  rcu_read_unlock_bh();
>>  }
>>  
>> +static inline unsigned long busy_clock(void)
>> +{
>> +return local_clock() >> 10;
>> +}
>> +
>> +static bool vhost_can_busy_poll(struct vhost_dev *dev,
>> +unsigned long endtime)
>> +{
>> +return likely(!need_resched()) &&
>> +   likely(!time_after(busy_clock(), endtime)) &&
>> +   likely(!signal_pending(current)) &&
>> +   !vhost_has_work(dev) &&
>> +   single_task_running();
>> +}
>> +
>> +static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
>> +struct vhost_virtqueue *vq,
>> +struct iovec iov[], unsigned int iov_size,
>> +unsigned int *out_num, unsigned int *in_num)
>> +{
>> +unsigned long uninitialized_var(endtime);
>> +
>> +if (vq->busyloop_timeout) {
>> +preempt_disable();
>> +endtime = busy_clock() + vq->busyloop_timeout;
>> +while (vhost_can_busy_poll(vq->dev, endtime) &&
>> +   !vhost_vq_more_avail(vq->dev, vq))
>> +cpu_relax();
>> +preempt_enable();
>> +}
> Isn't there a way to call all this after vhost_get_vq_desc?

We can.

> First, this will reduce the good path overhead as you
> won't have to play with timers and preemption.

For good path, yes.

>
> Second, this will reduce the chance of a pagefault on avail ring read.

Yes.

>
>> +
>> +return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
>> + out_num, in_num, NULL, NULL);
>> +}
>> +
>>  /* Expects to be always run from workqueue - which acts as
>>   * read-size critical section for our kind of RCU. */
>>  static void handle_tx(struct vhost_net *net)
>> @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
>>% UIO_MAXIOV == nvq->done_idx))
>>  break;
>>  
>> -head = vhost_get_vq_desc(vq, vq->iov,
>> - ARRAY_SIZE(vq->iov),
>> - , ,
>> - NULL, NULL);
>> +head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
>> +ARRAY_SIZE(vq->iov),
>> +, );
>>  /* On error, stop handling until the next kick. */
>>  if (unlikely(head < 0))
>>  break;
>> @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
>>  return len;
>>  }
>>  
>> +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)
> Need a hint that it's rx related in the name.

Ok.

>
>> +{
>> +struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
>> +struct vhost_virtqueue *vq = >vq;
>> +unsigned long uninitialized_var(endtime);
>> +
>> +if (vq->busyloop_timeout) {
>> +mutex_lock(>mutex);
> This appears to be called under vq mutex in handle_rx.
> So how does this work then?

This is tx mutex, an optimization here: both rx socket and tx ring is
polled.  So there's no need to tx notification anymore. This can save
lots of vmexits.

>
>
>> +vhost_disable_notify(>dev, vq);
> This appears to be called after disable notify
> in handle_rx - so why disable here again?

It disable the tx notification instead of rx.

>
>> +
>> +preempt_disable();
>> +endtime = busy_clock() + vq->busyloop_timeout;
>> +
>> +while (vhost_can_busy_poll(>dev, endtime) &&
>> +   skb_queue_empty(>sk_receive_queue) &&
>> +   !vhost_vq_more_avail(>dev, vq))
>> +cpu_relax();
> This seems to mix in several items.
> RX queue is normally not empty. I don't think
> we need to poll for that.
> So IMHO we only need to poll for sk_receive_queue really.

Same as above, tx virt queue is being polled here.

>
>> +
>> +preempt_enable();
>> +
>> +if (vhost_enable_notify(>dev, vq))
>> +vhost_poll_queue(>poll);
> But 

Re: [PATCH V2 3/3] vhost_net: basic polling support

2016-01-21 Thread Jason Wang


On 01/20/2016 10:35 PM, Michael S. Tsirkin wrote:
> On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote:
>> This patch tries to poll for new added tx buffer or socket receive
>> queue for a while at the end of tx/rx processing. The maximum time
>> spent on polling were specified through a new kind of vring ioctl.
>>
>> Signed-off-by: Jason Wang 
>> ---
>>  drivers/vhost/net.c| 72 
>> ++
>>  drivers/vhost/vhost.c  | 15 ++
>>  drivers/vhost/vhost.h  |  1 +
>>  include/uapi/linux/vhost.h | 11 +++
>>  4 files changed, 94 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>> index 9eda69e..ce6da77 100644
>> --- a/drivers/vhost/net.c
>> +++ b/drivers/vhost/net.c
>> @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
>> *ubuf, bool success)
>>  rcu_read_unlock_bh();
>>  }
>>  
>> +static inline unsigned long busy_clock(void)
>> +{
>> +return local_clock() >> 10;
>> +}
>> +
>> +static bool vhost_can_busy_poll(struct vhost_dev *dev,
>> +unsigned long endtime)
>> +{
>> +return likely(!need_resched()) &&
>> +   likely(!time_after(busy_clock(), endtime)) &&
>> +   likely(!signal_pending(current)) &&
>> +   !vhost_has_work(dev) &&
>> +   single_task_running();
>> +}
>> +
>> +static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
>> +struct vhost_virtqueue *vq,
>> +struct iovec iov[], unsigned int iov_size,
>> +unsigned int *out_num, unsigned int *in_num)
>> +{
>> +unsigned long uninitialized_var(endtime);
>> +
>> +if (vq->busyloop_timeout) {
>> +preempt_disable();
>> +endtime = busy_clock() + vq->busyloop_timeout;
>> +while (vhost_can_busy_poll(vq->dev, endtime) &&
>> +   !vhost_vq_more_avail(vq->dev, vq))
>> +cpu_relax();
>> +preempt_enable();
>> +}
> Isn't there a way to call all this after vhost_get_vq_desc?

We can.

> First, this will reduce the good path overhead as you
> won't have to play with timers and preemption.

For good path, yes.

>
> Second, this will reduce the chance of a pagefault on avail ring read.

Yes.

>
>> +
>> +return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
>> + out_num, in_num, NULL, NULL);
>> +}
>> +
>>  /* Expects to be always run from workqueue - which acts as
>>   * read-size critical section for our kind of RCU. */
>>  static void handle_tx(struct vhost_net *net)
>> @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
>>% UIO_MAXIOV == nvq->done_idx))
>>  break;
>>  
>> -head = vhost_get_vq_desc(vq, vq->iov,
>> - ARRAY_SIZE(vq->iov),
>> - , ,
>> - NULL, NULL);
>> +head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
>> +ARRAY_SIZE(vq->iov),
>> +, );
>>  /* On error, stop handling until the next kick. */
>>  if (unlikely(head < 0))
>>  break;
>> @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
>>  return len;
>>  }
>>  
>> +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)
> Need a hint that it's rx related in the name.

Ok.

>
>> +{
>> +struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
>> +struct vhost_virtqueue *vq = >vq;
>> +unsigned long uninitialized_var(endtime);
>> +
>> +if (vq->busyloop_timeout) {
>> +mutex_lock(>mutex);
> This appears to be called under vq mutex in handle_rx.
> So how does this work then?

This is tx mutex, an optimization here: both rx socket and tx ring is
polled.  So there's no need to tx notification anymore. This can save
lots of vmexits.

>
>
>> +vhost_disable_notify(>dev, vq);
> This appears to be called after disable notify
> in handle_rx - so why disable here again?

It disable the tx notification instead of rx.

>
>> +
>> +preempt_disable();
>> +endtime = busy_clock() + vq->busyloop_timeout;
>> +
>> +while (vhost_can_busy_poll(>dev, endtime) &&
>> +   skb_queue_empty(>sk_receive_queue) &&
>> +   !vhost_vq_more_avail(>dev, vq))
>> +cpu_relax();
> This seems to mix in several items.
> RX queue is normally not empty. I don't think
> we need to poll for that.
> So IMHO we only need to poll for sk_receive_queue really.

Same as above, tx virt queue is being polled here.

>
>> +
>> +preempt_enable();
>> +
>> +if (vhost_enable_notify(>dev, vq))
>> +

Re: [PATCH V2 3/3] vhost_net: basic polling support

2016-01-20 Thread Yang Zhang

On 2016/1/21 13:13, Michael S. Tsirkin wrote:

On Thu, Jan 21, 2016 at 10:11:35AM +0800, Yang Zhang wrote:

On 2016/1/20 22:35, Michael S. Tsirkin wrote:

On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote:

This patch tries to poll for new added tx buffer or socket receive
queue for a while at the end of tx/rx processing. The maximum time
spent on polling were specified through a new kind of vring ioctl.

Signed-off-by: Jason Wang 
---
  drivers/vhost/net.c| 72 ++
  drivers/vhost/vhost.c  | 15 ++
  drivers/vhost/vhost.h  |  1 +
  include/uapi/linux/vhost.h | 11 +++
  4 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9eda69e..ce6da77 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
*ubuf, bool success)
rcu_read_unlock_bh();
  }

+static inline unsigned long busy_clock(void)
+{
+   return local_clock() >> 10;
+}
+
+static bool vhost_can_busy_poll(struct vhost_dev *dev,
+   unsigned long endtime)
+{
+   return likely(!need_resched()) &&
+  likely(!time_after(busy_clock(), endtime)) &&
+  likely(!signal_pending(current)) &&
+  !vhost_has_work(dev) &&
+  single_task_running();
+}
+
+static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
+   struct vhost_virtqueue *vq,
+   struct iovec iov[], unsigned int iov_size,
+   unsigned int *out_num, unsigned int *in_num)
+{
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+   while (vhost_can_busy_poll(vq->dev, endtime) &&
+  !vhost_vq_more_avail(vq->dev, vq))
+   cpu_relax();
+   preempt_enable();
+   }


Isn't there a way to call all this after vhost_get_vq_desc?
First, this will reduce the good path overhead as you
won't have to play with timers and preemption.

Second, this will reduce the chance of a pagefault on avail ring read.


+
+   return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+out_num, in_num, NULL, NULL);
+}
+
  /* Expects to be always run from workqueue - which acts as
   * read-size critical section for our kind of RCU. */
  static void handle_tx(struct vhost_net *net)
@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
  % UIO_MAXIOV == nvq->done_idx))
break;

-   head = vhost_get_vq_desc(vq, vq->iov,
-ARRAY_SIZE(vq->iov),
-, ,
-NULL, NULL);
+   head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
+   ARRAY_SIZE(vq->iov),
+   , );
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
return len;
  }

+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)


Need a hint that it's rx related in the name.


+{
+   struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
+   struct vhost_virtqueue *vq = >vq;
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   mutex_lock(>mutex);


This appears to be called under vq mutex in handle_rx.
So how does this work then?



+   vhost_disable_notify(>dev, vq);


This appears to be called after disable notify
in handle_rx - so why disable here again?


+
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+
+   while (vhost_can_busy_poll(>dev, endtime) &&
+  skb_queue_empty(>sk_receive_queue) &&
+  !vhost_vq_more_avail(>dev, vq))
+   cpu_relax();


This seems to mix in several items.
RX queue is normally not empty. I don't think
we need to poll for that.


I have seen the RX queue is easy to be empty under some extreme conditions
like lots of small packet. So maybe the check is useful here.


It's not useful *here*.
If you have an rx packet but no space in the ring,
this will exit immediately.


Indeed!



It might be useful elsewhere but I doubt it -
if rx ring is out of buffers, you are better off
backing out and giving guest some breathing space.


--
best regards
yang



--
best regards
yang


Re: [PATCH V2 3/3] vhost_net: basic polling support

2016-01-20 Thread Michael S. Tsirkin
On Thu, Jan 21, 2016 at 10:11:35AM +0800, Yang Zhang wrote:
> On 2016/1/20 22:35, Michael S. Tsirkin wrote:
> >On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote:
> >>This patch tries to poll for new added tx buffer or socket receive
> >>queue for a while at the end of tx/rx processing. The maximum time
> >>spent on polling were specified through a new kind of vring ioctl.
> >>
> >>Signed-off-by: Jason Wang 
> >>---
> >>  drivers/vhost/net.c| 72 
> >> ++
> >>  drivers/vhost/vhost.c  | 15 ++
> >>  drivers/vhost/vhost.h  |  1 +
> >>  include/uapi/linux/vhost.h | 11 +++
> >>  4 files changed, 94 insertions(+), 5 deletions(-)
> >>
> >>diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> >>index 9eda69e..ce6da77 100644
> >>--- a/drivers/vhost/net.c
> >>+++ b/drivers/vhost/net.c
> >>@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
> >>*ubuf, bool success)
> >>rcu_read_unlock_bh();
> >>  }
> >>
> >>+static inline unsigned long busy_clock(void)
> >>+{
> >>+   return local_clock() >> 10;
> >>+}
> >>+
> >>+static bool vhost_can_busy_poll(struct vhost_dev *dev,
> >>+   unsigned long endtime)
> >>+{
> >>+   return likely(!need_resched()) &&
> >>+  likely(!time_after(busy_clock(), endtime)) &&
> >>+  likely(!signal_pending(current)) &&
> >>+  !vhost_has_work(dev) &&
> >>+  single_task_running();
> >>+}
> >>+
> >>+static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
> >>+   struct vhost_virtqueue *vq,
> >>+   struct iovec iov[], unsigned int iov_size,
> >>+   unsigned int *out_num, unsigned int *in_num)
> >>+{
> >>+   unsigned long uninitialized_var(endtime);
> >>+
> >>+   if (vq->busyloop_timeout) {
> >>+   preempt_disable();
> >>+   endtime = busy_clock() + vq->busyloop_timeout;
> >>+   while (vhost_can_busy_poll(vq->dev, endtime) &&
> >>+  !vhost_vq_more_avail(vq->dev, vq))
> >>+   cpu_relax();
> >>+   preempt_enable();
> >>+   }
> >
> >Isn't there a way to call all this after vhost_get_vq_desc?
> >First, this will reduce the good path overhead as you
> >won't have to play with timers and preemption.
> >
> >Second, this will reduce the chance of a pagefault on avail ring read.
> >
> >>+
> >>+   return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
> >>+out_num, in_num, NULL, NULL);
> >>+}
> >>+
> >>  /* Expects to be always run from workqueue - which acts as
> >>   * read-size critical section for our kind of RCU. */
> >>  static void handle_tx(struct vhost_net *net)
> >>@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
> >>  % UIO_MAXIOV == nvq->done_idx))
> >>break;
> >>
> >>-   head = vhost_get_vq_desc(vq, vq->iov,
> >>-ARRAY_SIZE(vq->iov),
> >>-, ,
> >>-NULL, NULL);
> >>+   head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
> >>+   ARRAY_SIZE(vq->iov),
> >>+   , );
> >>/* On error, stop handling until the next kick. */
> >>if (unlikely(head < 0))
> >>break;
> >>@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
> >>return len;
> >>  }
> >>
> >>+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)
> >
> >Need a hint that it's rx related in the name.
> >
> >>+{
> >>+   struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
> >>+   struct vhost_virtqueue *vq = >vq;
> >>+   unsigned long uninitialized_var(endtime);
> >>+
> >>+   if (vq->busyloop_timeout) {
> >>+   mutex_lock(>mutex);
> >
> >This appears to be called under vq mutex in handle_rx.
> >So how does this work then?
> >
> >
> >>+   vhost_disable_notify(>dev, vq);
> >
> >This appears to be called after disable notify
> >in handle_rx - so why disable here again?
> >
> >>+
> >>+   preempt_disable();
> >>+   endtime = busy_clock() + vq->busyloop_timeout;
> >>+
> >>+   while (vhost_can_busy_poll(>dev, endtime) &&
> >>+  skb_queue_empty(>sk_receive_queue) &&
> >>+  !vhost_vq_more_avail(>dev, vq))
> >>+   cpu_relax();
> >
> >This seems to mix in several items.
> >RX queue is normally not empty. I don't think
> >we need to poll for that.
> 
> I have seen the RX queue is easy to be empty under some extreme conditions
> like lots of small packet. So maybe the check is useful here.

It's not useful *here*.
If you have an rx packet but no space in the ring,
this will exit immediately.

It might be useful elsewhere but I doubt it -
if rx ring is out of buffers, you are better off
backing out 

Re: [PATCH V2 3/3] vhost_net: basic polling support

2016-01-20 Thread Yang Zhang

On 2016/1/20 22:35, Michael S. Tsirkin wrote:

On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote:

This patch tries to poll for new added tx buffer or socket receive
queue for a while at the end of tx/rx processing. The maximum time
spent on polling were specified through a new kind of vring ioctl.

Signed-off-by: Jason Wang 
---
  drivers/vhost/net.c| 72 ++
  drivers/vhost/vhost.c  | 15 ++
  drivers/vhost/vhost.h  |  1 +
  include/uapi/linux/vhost.h | 11 +++
  4 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9eda69e..ce6da77 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
*ubuf, bool success)
rcu_read_unlock_bh();
  }

+static inline unsigned long busy_clock(void)
+{
+   return local_clock() >> 10;
+}
+
+static bool vhost_can_busy_poll(struct vhost_dev *dev,
+   unsigned long endtime)
+{
+   return likely(!need_resched()) &&
+  likely(!time_after(busy_clock(), endtime)) &&
+  likely(!signal_pending(current)) &&
+  !vhost_has_work(dev) &&
+  single_task_running();
+}
+
+static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
+   struct vhost_virtqueue *vq,
+   struct iovec iov[], unsigned int iov_size,
+   unsigned int *out_num, unsigned int *in_num)
+{
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+   while (vhost_can_busy_poll(vq->dev, endtime) &&
+  !vhost_vq_more_avail(vq->dev, vq))
+   cpu_relax();
+   preempt_enable();
+   }


Isn't there a way to call all this after vhost_get_vq_desc?
First, this will reduce the good path overhead as you
won't have to play with timers and preemption.

Second, this will reduce the chance of a pagefault on avail ring read.


+
+   return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+out_num, in_num, NULL, NULL);
+}
+
  /* Expects to be always run from workqueue - which acts as
   * read-size critical section for our kind of RCU. */
  static void handle_tx(struct vhost_net *net)
@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
  % UIO_MAXIOV == nvq->done_idx))
break;

-   head = vhost_get_vq_desc(vq, vq->iov,
-ARRAY_SIZE(vq->iov),
-, ,
-NULL, NULL);
+   head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
+   ARRAY_SIZE(vq->iov),
+   , );
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
return len;
  }

+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)


Need a hint that it's rx related in the name.


+{
+   struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
+   struct vhost_virtqueue *vq = >vq;
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   mutex_lock(>mutex);


This appears to be called under vq mutex in handle_rx.
So how does this work then?



+   vhost_disable_notify(>dev, vq);


This appears to be called after disable notify
in handle_rx - so why disable here again?


+
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+
+   while (vhost_can_busy_poll(>dev, endtime) &&
+  skb_queue_empty(>sk_receive_queue) &&
+  !vhost_vq_more_avail(>dev, vq))
+   cpu_relax();


This seems to mix in several items.
RX queue is normally not empty. I don't think
we need to poll for that.


I have seen the RX queue is easy to be empty under some extreme 
conditions like lots of small packet. So maybe the check is useful here.


--
best regards
yang


Re: [PATCH V2 3/3] vhost_net: basic polling support

2016-01-20 Thread Yang Zhang

On 2016/1/20 22:35, Michael S. Tsirkin wrote:

On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote:

This patch tries to poll for new added tx buffer or socket receive
queue for a while at the end of tx/rx processing. The maximum time
spent on polling were specified through a new kind of vring ioctl.

Signed-off-by: Jason Wang 
---
  drivers/vhost/net.c| 72 ++
  drivers/vhost/vhost.c  | 15 ++
  drivers/vhost/vhost.h  |  1 +
  include/uapi/linux/vhost.h | 11 +++
  4 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9eda69e..ce6da77 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
*ubuf, bool success)
rcu_read_unlock_bh();
  }

+static inline unsigned long busy_clock(void)
+{
+   return local_clock() >> 10;
+}
+
+static bool vhost_can_busy_poll(struct vhost_dev *dev,
+   unsigned long endtime)
+{
+   return likely(!need_resched()) &&
+  likely(!time_after(busy_clock(), endtime)) &&
+  likely(!signal_pending(current)) &&
+  !vhost_has_work(dev) &&
+  single_task_running();
+}
+
+static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
+   struct vhost_virtqueue *vq,
+   struct iovec iov[], unsigned int iov_size,
+   unsigned int *out_num, unsigned int *in_num)
+{
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+   while (vhost_can_busy_poll(vq->dev, endtime) &&
+  !vhost_vq_more_avail(vq->dev, vq))
+   cpu_relax();
+   preempt_enable();
+   }


Isn't there a way to call all this after vhost_get_vq_desc?
First, this will reduce the good path overhead as you
won't have to play with timers and preemption.

Second, this will reduce the chance of a pagefault on avail ring read.


+
+   return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+out_num, in_num, NULL, NULL);
+}
+
  /* Expects to be always run from workqueue - which acts as
   * read-size critical section for our kind of RCU. */
  static void handle_tx(struct vhost_net *net)
@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
  % UIO_MAXIOV == nvq->done_idx))
break;

-   head = vhost_get_vq_desc(vq, vq->iov,
-ARRAY_SIZE(vq->iov),
-, ,
-NULL, NULL);
+   head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
+   ARRAY_SIZE(vq->iov),
+   , );
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
return len;
  }

+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)


Need a hint that it's rx related in the name.


+{
+   struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
+   struct vhost_virtqueue *vq = >vq;
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   mutex_lock(>mutex);


This appears to be called under vq mutex in handle_rx.
So how does this work then?



+   vhost_disable_notify(>dev, vq);


This appears to be called after disable notify
in handle_rx - so why disable here again?


+
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+
+   while (vhost_can_busy_poll(>dev, endtime) &&
+  skb_queue_empty(>sk_receive_queue) &&
+  !vhost_vq_more_avail(>dev, vq))
+   cpu_relax();


This seems to mix in several items.
RX queue is normally not empty. I don't think
we need to poll for that.


I have seen the RX queue is easy to be empty under some extreme 
conditions like lots of small packet. So maybe the check is useful here.


--
best regards
yang


Re: [PATCH V2 3/3] vhost_net: basic polling support

2016-01-20 Thread Michael S. Tsirkin
On Thu, Jan 21, 2016 at 10:11:35AM +0800, Yang Zhang wrote:
> On 2016/1/20 22:35, Michael S. Tsirkin wrote:
> >On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote:
> >>This patch tries to poll for new added tx buffer or socket receive
> >>queue for a while at the end of tx/rx processing. The maximum time
> >>spent on polling were specified through a new kind of vring ioctl.
> >>
> >>Signed-off-by: Jason Wang 
> >>---
> >>  drivers/vhost/net.c| 72 
> >> ++
> >>  drivers/vhost/vhost.c  | 15 ++
> >>  drivers/vhost/vhost.h  |  1 +
> >>  include/uapi/linux/vhost.h | 11 +++
> >>  4 files changed, 94 insertions(+), 5 deletions(-)
> >>
> >>diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> >>index 9eda69e..ce6da77 100644
> >>--- a/drivers/vhost/net.c
> >>+++ b/drivers/vhost/net.c
> >>@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
> >>*ubuf, bool success)
> >>rcu_read_unlock_bh();
> >>  }
> >>
> >>+static inline unsigned long busy_clock(void)
> >>+{
> >>+   return local_clock() >> 10;
> >>+}
> >>+
> >>+static bool vhost_can_busy_poll(struct vhost_dev *dev,
> >>+   unsigned long endtime)
> >>+{
> >>+   return likely(!need_resched()) &&
> >>+  likely(!time_after(busy_clock(), endtime)) &&
> >>+  likely(!signal_pending(current)) &&
> >>+  !vhost_has_work(dev) &&
> >>+  single_task_running();
> >>+}
> >>+
> >>+static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
> >>+   struct vhost_virtqueue *vq,
> >>+   struct iovec iov[], unsigned int iov_size,
> >>+   unsigned int *out_num, unsigned int *in_num)
> >>+{
> >>+   unsigned long uninitialized_var(endtime);
> >>+
> >>+   if (vq->busyloop_timeout) {
> >>+   preempt_disable();
> >>+   endtime = busy_clock() + vq->busyloop_timeout;
> >>+   while (vhost_can_busy_poll(vq->dev, endtime) &&
> >>+  !vhost_vq_more_avail(vq->dev, vq))
> >>+   cpu_relax();
> >>+   preempt_enable();
> >>+   }
> >
> >Isn't there a way to call all this after vhost_get_vq_desc?
> >First, this will reduce the good path overhead as you
> >won't have to play with timers and preemption.
> >
> >Second, this will reduce the chance of a pagefault on avail ring read.
> >
> >>+
> >>+   return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
> >>+out_num, in_num, NULL, NULL);
> >>+}
> >>+
> >>  /* Expects to be always run from workqueue - which acts as
> >>   * read-size critical section for our kind of RCU. */
> >>  static void handle_tx(struct vhost_net *net)
> >>@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
> >>  % UIO_MAXIOV == nvq->done_idx))
> >>break;
> >>
> >>-   head = vhost_get_vq_desc(vq, vq->iov,
> >>-ARRAY_SIZE(vq->iov),
> >>-, ,
> >>-NULL, NULL);
> >>+   head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
> >>+   ARRAY_SIZE(vq->iov),
> >>+   , );
> >>/* On error, stop handling until the next kick. */
> >>if (unlikely(head < 0))
> >>break;
> >>@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
> >>return len;
> >>  }
> >>
> >>+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)
> >
> >Need a hint that it's rx related in the name.
> >
> >>+{
> >>+   struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
> >>+   struct vhost_virtqueue *vq = >vq;
> >>+   unsigned long uninitialized_var(endtime);
> >>+
> >>+   if (vq->busyloop_timeout) {
> >>+   mutex_lock(>mutex);
> >
> >This appears to be called under vq mutex in handle_rx.
> >So how does this work then?
> >
> >
> >>+   vhost_disable_notify(>dev, vq);
> >
> >This appears to be called after disable notify
> >in handle_rx - so why disable here again?
> >
> >>+
> >>+   preempt_disable();
> >>+   endtime = busy_clock() + vq->busyloop_timeout;
> >>+
> >>+   while (vhost_can_busy_poll(>dev, endtime) &&
> >>+  skb_queue_empty(>sk_receive_queue) &&
> >>+  !vhost_vq_more_avail(>dev, vq))
> >>+   cpu_relax();
> >
> >This seems to mix in several items.
> >RX queue is normally not empty. I don't think
> >we need to poll for that.
> 
> I have seen the RX queue is easy to be empty under some extreme conditions
> like lots of small packet. So maybe the check is useful here.

It's not useful *here*.
If you have an rx packet but no space in the ring,
this will exit immediately.

It might be useful elsewhere but I doubt it -
if rx ring is out of buffers, you are 

Re: [PATCH V2 3/3] vhost_net: basic polling support

2016-01-20 Thread Yang Zhang

On 2016/1/21 13:13, Michael S. Tsirkin wrote:

On Thu, Jan 21, 2016 at 10:11:35AM +0800, Yang Zhang wrote:

On 2016/1/20 22:35, Michael S. Tsirkin wrote:

On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote:

This patch tries to poll for new added tx buffer or socket receive
queue for a while at the end of tx/rx processing. The maximum time
spent on polling were specified through a new kind of vring ioctl.

Signed-off-by: Jason Wang 
---
  drivers/vhost/net.c| 72 ++
  drivers/vhost/vhost.c  | 15 ++
  drivers/vhost/vhost.h  |  1 +
  include/uapi/linux/vhost.h | 11 +++
  4 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9eda69e..ce6da77 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
*ubuf, bool success)
rcu_read_unlock_bh();
  }

+static inline unsigned long busy_clock(void)
+{
+   return local_clock() >> 10;
+}
+
+static bool vhost_can_busy_poll(struct vhost_dev *dev,
+   unsigned long endtime)
+{
+   return likely(!need_resched()) &&
+  likely(!time_after(busy_clock(), endtime)) &&
+  likely(!signal_pending(current)) &&
+  !vhost_has_work(dev) &&
+  single_task_running();
+}
+
+static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
+   struct vhost_virtqueue *vq,
+   struct iovec iov[], unsigned int iov_size,
+   unsigned int *out_num, unsigned int *in_num)
+{
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+   while (vhost_can_busy_poll(vq->dev, endtime) &&
+  !vhost_vq_more_avail(vq->dev, vq))
+   cpu_relax();
+   preempt_enable();
+   }


Isn't there a way to call all this after vhost_get_vq_desc?
First, this will reduce the good path overhead as you
won't have to play with timers and preemption.

Second, this will reduce the chance of a pagefault on avail ring read.


+
+   return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+out_num, in_num, NULL, NULL);
+}
+
  /* Expects to be always run from workqueue - which acts as
   * read-size critical section for our kind of RCU. */
  static void handle_tx(struct vhost_net *net)
@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
  % UIO_MAXIOV == nvq->done_idx))
break;

-   head = vhost_get_vq_desc(vq, vq->iov,
-ARRAY_SIZE(vq->iov),
-, ,
-NULL, NULL);
+   head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
+   ARRAY_SIZE(vq->iov),
+   , );
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
return len;
  }

+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)


Need a hint that it's rx related in the name.


+{
+   struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
+   struct vhost_virtqueue *vq = >vq;
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   mutex_lock(>mutex);


This appears to be called under vq mutex in handle_rx.
So how does this work then?



+   vhost_disable_notify(>dev, vq);


This appears to be called after disable notify
in handle_rx - so why disable here again?


+
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+
+   while (vhost_can_busy_poll(>dev, endtime) &&
+  skb_queue_empty(>sk_receive_queue) &&
+  !vhost_vq_more_avail(>dev, vq))
+   cpu_relax();


This seems to mix in several items.
RX queue is normally not empty. I don't think
we need to poll for that.


I have seen the RX queue is easy to be empty under some extreme conditions
like lots of small packet. So maybe the check is useful here.


It's not useful *here*.
If you have an rx packet but no space in the ring,
this will exit immediately.


Indeed!



It might be useful elsewhere but I doubt it -
if rx ring is out of buffers, you are better off
backing out and giving guest some breathing space.


--
best regards
yang



--
best regards
yang


[PATCH V2 3/3] vhost_net: basic polling support

2015-11-30 Thread Jason Wang
This patch tries to poll for new added tx buffer or socket receive
queue for a while at the end of tx/rx processing. The maximum time
spent on polling were specified through a new kind of vring ioctl.

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c| 72 ++
 drivers/vhost/vhost.c  | 15 ++
 drivers/vhost/vhost.h  |  1 +
 include/uapi/linux/vhost.h | 11 +++
 4 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9eda69e..ce6da77 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
*ubuf, bool success)
rcu_read_unlock_bh();
 }
 
+static inline unsigned long busy_clock(void)
+{
+   return local_clock() >> 10;
+}
+
+static bool vhost_can_busy_poll(struct vhost_dev *dev,
+   unsigned long endtime)
+{
+   return likely(!need_resched()) &&
+  likely(!time_after(busy_clock(), endtime)) &&
+  likely(!signal_pending(current)) &&
+  !vhost_has_work(dev) &&
+  single_task_running();
+}
+
+static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
+   struct vhost_virtqueue *vq,
+   struct iovec iov[], unsigned int iov_size,
+   unsigned int *out_num, unsigned int *in_num)
+{
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+   while (vhost_can_busy_poll(vq->dev, endtime) &&
+  !vhost_vq_more_avail(vq->dev, vq))
+   cpu_relax();
+   preempt_enable();
+   }
+
+   return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+out_num, in_num, NULL, NULL);
+}
+
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
 static void handle_tx(struct vhost_net *net)
@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
  % UIO_MAXIOV == nvq->done_idx))
break;
 
-   head = vhost_get_vq_desc(vq, vq->iov,
-ARRAY_SIZE(vq->iov),
-, ,
-NULL, NULL);
+   head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
+   ARRAY_SIZE(vq->iov),
+   , );
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
return len;
 }
 
+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)
+{
+   struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
+   struct vhost_virtqueue *vq = >vq;
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   mutex_lock(>mutex);
+   vhost_disable_notify(>dev, vq);
+
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+
+   while (vhost_can_busy_poll(>dev, endtime) &&
+  skb_queue_empty(>sk_receive_queue) &&
+  !vhost_vq_more_avail(>dev, vq))
+   cpu_relax();
+
+   preempt_enable();
+
+   if (vhost_enable_notify(>dev, vq))
+   vhost_poll_queue(>poll);
+   mutex_unlock(>mutex);
+   }
+
+   return peek_head_len(sk);
+}
+
 /* This is a multi-buffer version of vhost_get_desc, that works if
  * vq has read descriptors only.
  * @vq - the relevant virtqueue
@@ -553,7 +615,7 @@ static void handle_rx(struct vhost_net *net)
vq->log : NULL;
mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
 
-   while ((sock_len = peek_head_len(sock->sk))) {
+   while ((sock_len = vhost_net_peek_head_len(net, sock->sk))) {
sock_len += sock_hlen;
vhost_len = sock_len + vhost_hlen;
headcount = get_rx_bufs(vq, vq->heads, vhost_len,
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 4f45a03..b8ca873 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -285,6 +285,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->memory = NULL;
vq->is_le = virtio_legacy_is_little_endian();
vhost_vq_reset_user_be(vq);
+   vq->busyloop_timeout = 0;
 }
 
 static int vhost_worker(void *data)
@@ -747,6 +748,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void 
__user *argp)
struct vhost_vring_state s;

[PATCH V2 3/3] vhost_net: basic polling support

2015-11-30 Thread Jason Wang
This patch tries to poll for new added tx buffer or socket receive
queue for a while at the end of tx/rx processing. The maximum time
spent on polling were specified through a new kind of vring ioctl.

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c| 72 ++
 drivers/vhost/vhost.c  | 15 ++
 drivers/vhost/vhost.h  |  1 +
 include/uapi/linux/vhost.h | 11 +++
 4 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9eda69e..ce6da77 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info 
*ubuf, bool success)
rcu_read_unlock_bh();
 }
 
+static inline unsigned long busy_clock(void)
+{
+   return local_clock() >> 10;
+}
+
+static bool vhost_can_busy_poll(struct vhost_dev *dev,
+   unsigned long endtime)
+{
+   return likely(!need_resched()) &&
+  likely(!time_after(busy_clock(), endtime)) &&
+  likely(!signal_pending(current)) &&
+  !vhost_has_work(dev) &&
+  single_task_running();
+}
+
+static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
+   struct vhost_virtqueue *vq,
+   struct iovec iov[], unsigned int iov_size,
+   unsigned int *out_num, unsigned int *in_num)
+{
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+   while (vhost_can_busy_poll(vq->dev, endtime) &&
+  !vhost_vq_more_avail(vq->dev, vq))
+   cpu_relax();
+   preempt_enable();
+   }
+
+   return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+out_num, in_num, NULL, NULL);
+}
+
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
 static void handle_tx(struct vhost_net *net)
@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net)
  % UIO_MAXIOV == nvq->done_idx))
break;
 
-   head = vhost_get_vq_desc(vq, vq->iov,
-ARRAY_SIZE(vq->iov),
-, ,
-NULL, NULL);
+   head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
+   ARRAY_SIZE(vq->iov),
+   , );
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk)
return len;
 }
 
+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk)
+{
+   struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX];
+   struct vhost_virtqueue *vq = >vq;
+   unsigned long uninitialized_var(endtime);
+
+   if (vq->busyloop_timeout) {
+   mutex_lock(>mutex);
+   vhost_disable_notify(>dev, vq);
+
+   preempt_disable();
+   endtime = busy_clock() + vq->busyloop_timeout;
+
+   while (vhost_can_busy_poll(>dev, endtime) &&
+  skb_queue_empty(>sk_receive_queue) &&
+  !vhost_vq_more_avail(>dev, vq))
+   cpu_relax();
+
+   preempt_enable();
+
+   if (vhost_enable_notify(>dev, vq))
+   vhost_poll_queue(>poll);
+   mutex_unlock(>mutex);
+   }
+
+   return peek_head_len(sk);
+}
+
 /* This is a multi-buffer version of vhost_get_desc, that works if
  * vq has read descriptors only.
  * @vq - the relevant virtqueue
@@ -553,7 +615,7 @@ static void handle_rx(struct vhost_net *net)
vq->log : NULL;
mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
 
-   while ((sock_len = peek_head_len(sock->sk))) {
+   while ((sock_len = vhost_net_peek_head_len(net, sock->sk))) {
sock_len += sock_hlen;
vhost_len = sock_len + vhost_hlen;
headcount = get_rx_bufs(vq, vq->heads, vhost_len,
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 4f45a03..b8ca873 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -285,6 +285,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->memory = NULL;
vq->is_le = virtio_legacy_is_little_endian();
vhost_vq_reset_user_be(vq);
+   vq->busyloop_timeout = 0;
 }
 
 static int vhost_worker(void *data)
@@ -747,6 +748,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void 
__user *argp)
struct