Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-14 Thread Michael S. Tsirkin
On Mon, Jan 14, 2013 at 03:37:58PM +0800, Jason Wang wrote:
> On 01/14/2013 02:57 PM, Michael S. Tsirkin wrote:
> > On Mon, Jan 14, 2013 at 10:59:02AM +0800, Jason Wang wrote:
> >> On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote:
> >>> On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote:
>  On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
> > On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
> >> On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
> >>> On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
>  Polling errors were ignored by vhost/vhost_net, this may lead to 
>  crash when
>  trying to remove vhost from waitqueue when after the polling is 
>  failed. Solve
>  this problem by:
> 
>  - checking the poll->wqh before trying to remove from waitqueue
>  - report an error when poll() returns a POLLERR in vhost_start_poll()
>  - report an error when vhost_start_poll() fails in
>    vhost_vring_ioctl()/vhost_net_set_backend() which is used to 
>  notify the
>    failure to userspace.
>  - report an error in the data path in vhost_net when meet polling 
>  errors.
> 
>  After those changes, we can safely drop the tx polling state in 
>  vhost_net since
>  it was replaced by the checking of poll->wqh.
> 
>  Signed-off-by: Jason Wang 
>  ---
>   drivers/vhost/net.c   |   74 
>  
>   drivers/vhost/vhost.c |   31 +++-
>   drivers/vhost/vhost.h |2 +-
>   3 files changed, 49 insertions(+), 58 deletions(-)
> 
>  diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>  index d10ad6f..125c1e5 100644
>  --- a/drivers/vhost/net.c
>  +++ b/drivers/vhost/net.c
>  @@ -64,20 +64,10 @@ enum {
>   VHOST_NET_VQ_MAX = 2,
>   };
>   
>  -enum vhost_net_poll_state {
>  -VHOST_NET_POLL_DISABLED = 0,
>  -VHOST_NET_POLL_STARTED = 1,
>  -VHOST_NET_POLL_STOPPED = 2,
>  -};
>  -
>   struct vhost_net {
>   struct vhost_dev dev;
>   struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
>   struct vhost_poll poll[VHOST_NET_VQ_MAX];
>  -/* Tells us whether we are polling a socket for TX.
>  - * We only do this when socket buffer fills up.
>  - * Protected by tx vq lock. */
>  -enum vhost_net_poll_state tx_poll_state;
>   /* Number of TX recently submitted.
>    * Protected by tx vq lock. */
>   unsigned tx_packets;
>  @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec 
>  *from, struct iovec *to,
>   }
>   }
>   
>  -/* Caller must have TX VQ lock */
>  -static void tx_poll_stop(struct vhost_net *net)
>  -{
>  -if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
>  -return;
>  -vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
>  -net->tx_poll_state = VHOST_NET_POLL_STOPPED;
>  -}
>  -
>  -/* Caller must have TX VQ lock */
>  -static void tx_poll_start(struct vhost_net *net, struct socket 
>  *sock)
>  -{
>  -if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
>  -return;
>  -vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
>  -net->tx_poll_state = VHOST_NET_POLL_STARTED;
>  -}
>  -
>   /* In case of DMA done not in order in lower device driver for some 
>  reason.
>    * upend_idx is used to track end of used idx, done_idx is used to 
>  track head
>    * of used idx. Once lower device DMA done contiguously, we will 
>  signal KVM
>  @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct 
>  ubuf_info *ubuf, bool success)
>   static void handle_tx(struct vhost_net *net)
>   {
>   struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX];
>  +struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
>   unsigned out, in, s;
>   int head;
>   struct msghdr msg = {
>  @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
>   wmem = atomic_read(>sk->sk_wmem_alloc);
>   if (wmem >= sock->sk->sk_sndbuf) {
>   mutex_lock(>mutex);
>  -tx_poll_start(net, sock);
>  +if (vhost_poll_start(poll, sock->file))
>  +vq_err(vq, "Fail to start TX polling\n");
> >>> s/Fail/Failed/
> >>>
> >>> A question though: how can this happen? 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-14 Thread Michael S. Tsirkin
On Mon, Jan 14, 2013 at 03:37:58PM +0800, Jason Wang wrote:
 On 01/14/2013 02:57 PM, Michael S. Tsirkin wrote:
  On Mon, Jan 14, 2013 at 10:59:02AM +0800, Jason Wang wrote:
  On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote:
  On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote:
  On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
  On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
  On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
  On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
  Polling errors were ignored by vhost/vhost_net, this may lead to 
  crash when
  trying to remove vhost from waitqueue when after the polling is 
  failed. Solve
  this problem by:
 
  - checking the poll-wqh before trying to remove from waitqueue
  - report an error when poll() returns a POLLERR in vhost_start_poll()
  - report an error when vhost_start_poll() fails in
vhost_vring_ioctl()/vhost_net_set_backend() which is used to 
  notify the
failure to userspace.
  - report an error in the data path in vhost_net when meet polling 
  errors.
 
  After those changes, we can safely drop the tx polling state in 
  vhost_net since
  it was replaced by the checking of poll-wqh.
 
  Signed-off-by: Jason Wang jasow...@redhat.com
  ---
   drivers/vhost/net.c   |   74 
  
   drivers/vhost/vhost.c |   31 +++-
   drivers/vhost/vhost.h |2 +-
   3 files changed, 49 insertions(+), 58 deletions(-)
 
  diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
  index d10ad6f..125c1e5 100644
  --- a/drivers/vhost/net.c
  +++ b/drivers/vhost/net.c
  @@ -64,20 +64,10 @@ enum {
   VHOST_NET_VQ_MAX = 2,
   };
   
  -enum vhost_net_poll_state {
  -VHOST_NET_POLL_DISABLED = 0,
  -VHOST_NET_POLL_STARTED = 1,
  -VHOST_NET_POLL_STOPPED = 2,
  -};
  -
   struct vhost_net {
   struct vhost_dev dev;
   struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
   struct vhost_poll poll[VHOST_NET_VQ_MAX];
  -/* Tells us whether we are polling a socket for TX.
  - * We only do this when socket buffer fills up.
  - * Protected by tx vq lock. */
  -enum vhost_net_poll_state tx_poll_state;
   /* Number of TX recently submitted.
* Protected by tx vq lock. */
   unsigned tx_packets;
  @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec 
  *from, struct iovec *to,
   }
   }
   
  -/* Caller must have TX VQ lock */
  -static void tx_poll_stop(struct vhost_net *net)
  -{
  -if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED))
  -return;
  -vhost_poll_stop(net-poll + VHOST_NET_VQ_TX);
  -net-tx_poll_state = VHOST_NET_POLL_STOPPED;
  -}
  -
  -/* Caller must have TX VQ lock */
  -static void tx_poll_start(struct vhost_net *net, struct socket 
  *sock)
  -{
  -if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED))
  -return;
  -vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file);
  -net-tx_poll_state = VHOST_NET_POLL_STARTED;
  -}
  -
   /* In case of DMA done not in order in lower device driver for some 
  reason.
* upend_idx is used to track end of used idx, done_idx is used to 
  track head
* of used idx. Once lower device DMA done contiguously, we will 
  signal KVM
  @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct 
  ubuf_info *ubuf, bool success)
   static void handle_tx(struct vhost_net *net)
   {
   struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX];
  +struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX;
   unsigned out, in, s;
   int head;
   struct msghdr msg = {
  @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
   wmem = atomic_read(sock-sk-sk_wmem_alloc);
   if (wmem = sock-sk-sk_sndbuf) {
   mutex_lock(vq-mutex);
  -tx_poll_start(net, sock);
  +if (vhost_poll_start(poll, sock-file))
  +vq_err(vq, Fail to start TX polling\n);
  s/Fail/Failed/
 
  A question though: how can this happen? Could you clarify please?
  Maybe we can find a way to prevent this error?
  Two conditions I think this can happen:
 
  1) a buggy userspace disable a queue through TUNSETQUEUE
  2) the net device were gone
 
  For 1, looks like we can delay the disabling until the refcnt goes to
  zero. For 2 may needs more changes.
  I'd expect keeping a socket reference would prevent both issues.
  Doesn't it?
  Doesn't work for 2 I think, the socket didn't hold a refcnt of the
  device, so the device can go away at anytime. Although we can change
  this, but it's the behaviour before multiqueue support.
  Hmm there's one scenario that does seem to
  trigger this: queue can get disabled
  and then poll fails.
 
  Is this the only issue?
  Another one I think we can trigger is:
 
  - start vhost thread
  - do ip link del link dev tap0 to delete the tap device
 
  In this case, the netdevice is unregistered but the file/socket still 
 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-13 Thread Jason Wang
On 01/14/2013 02:57 PM, Michael S. Tsirkin wrote:
> On Mon, Jan 14, 2013 at 10:59:02AM +0800, Jason Wang wrote:
>> On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote:
>>> On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote:
 On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
> On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
>> On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
>>> On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
 Polling errors were ignored by vhost/vhost_net, this may lead to crash 
 when
 trying to remove vhost from waitqueue when after the polling is 
 failed. Solve
 this problem by:

 - checking the poll->wqh before trying to remove from waitqueue
 - report an error when poll() returns a POLLERR in vhost_start_poll()
 - report an error when vhost_start_poll() fails in
   vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify 
 the
   failure to userspace.
 - report an error in the data path in vhost_net when meet polling 
 errors.

 After those changes, we can safely drop the tx polling state in 
 vhost_net since
 it was replaced by the checking of poll->wqh.

 Signed-off-by: Jason Wang 
 ---
  drivers/vhost/net.c   |   74 
 
  drivers/vhost/vhost.c |   31 +++-
  drivers/vhost/vhost.h |2 +-
  3 files changed, 49 insertions(+), 58 deletions(-)

 diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
 index d10ad6f..125c1e5 100644
 --- a/drivers/vhost/net.c
 +++ b/drivers/vhost/net.c
 @@ -64,20 +64,10 @@ enum {
VHOST_NET_VQ_MAX = 2,
  };
  
 -enum vhost_net_poll_state {
 -  VHOST_NET_POLL_DISABLED = 0,
 -  VHOST_NET_POLL_STARTED = 1,
 -  VHOST_NET_POLL_STOPPED = 2,
 -};
 -
  struct vhost_net {
struct vhost_dev dev;
struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
struct vhost_poll poll[VHOST_NET_VQ_MAX];
 -  /* Tells us whether we are polling a socket for TX.
 -   * We only do this when socket buffer fills up.
 -   * Protected by tx vq lock. */
 -  enum vhost_net_poll_state tx_poll_state;
/* Number of TX recently submitted.
 * Protected by tx vq lock. */
unsigned tx_packets;
 @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec 
 *from, struct iovec *to,
}
  }
  
 -/* Caller must have TX VQ lock */
 -static void tx_poll_stop(struct vhost_net *net)
 -{
 -  if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
 -  return;
 -  vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
 -  net->tx_poll_state = VHOST_NET_POLL_STOPPED;
 -}
 -
 -/* Caller must have TX VQ lock */
 -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
 -{
 -  if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
 -  return;
 -  vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
 -  net->tx_poll_state = VHOST_NET_POLL_STARTED;
 -}
 -
  /* In case of DMA done not in order in lower device driver for some 
 reason.
   * upend_idx is used to track end of used idx, done_idx is used to 
 track head
   * of used idx. Once lower device DMA done contiguously, we will 
 signal KVM
 @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct 
 ubuf_info *ubuf, bool success)
  static void handle_tx(struct vhost_net *net)
  {
struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX];
 +  struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
unsigned out, in, s;
int head;
struct msghdr msg = {
 @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
wmem = atomic_read(>sk->sk_wmem_alloc);
if (wmem >= sock->sk->sk_sndbuf) {
mutex_lock(>mutex);
 -  tx_poll_start(net, sock);
 +  if (vhost_poll_start(poll, sock->file))
 +  vq_err(vq, "Fail to start TX polling\n");
>>> s/Fail/Failed/
>>>
>>> A question though: how can this happen? Could you clarify please?
>>> Maybe we can find a way to prevent this error?
>> Two conditions I think this can happen:
>>
>> 1) a buggy userspace disable a queue through TUNSETQUEUE
>> 2) the net 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-13 Thread Michael S. Tsirkin
On Mon, Jan 14, 2013 at 10:59:02AM +0800, Jason Wang wrote:
> On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote:
> > On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote:
> >> On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
> >>> On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
>  On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
> > On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
> >> Polling errors were ignored by vhost/vhost_net, this may lead to crash 
> >> when
> >> trying to remove vhost from waitqueue when after the polling is 
> >> failed. Solve
> >> this problem by:
> >>
> >> - checking the poll->wqh before trying to remove from waitqueue
> >> - report an error when poll() returns a POLLERR in vhost_start_poll()
> >> - report an error when vhost_start_poll() fails in
> >>   vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify 
> >> the
> >>   failure to userspace.
> >> - report an error in the data path in vhost_net when meet polling 
> >> errors.
> >>
> >> After those changes, we can safely drop the tx polling state in 
> >> vhost_net since
> >> it was replaced by the checking of poll->wqh.
> >>
> >> Signed-off-by: Jason Wang 
> >> ---
> >>  drivers/vhost/net.c   |   74 
> >> 
> >>  drivers/vhost/vhost.c |   31 +++-
> >>  drivers/vhost/vhost.h |2 +-
> >>  3 files changed, 49 insertions(+), 58 deletions(-)
> >>
> >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> >> index d10ad6f..125c1e5 100644
> >> --- a/drivers/vhost/net.c
> >> +++ b/drivers/vhost/net.c
> >> @@ -64,20 +64,10 @@ enum {
> >>VHOST_NET_VQ_MAX = 2,
> >>  };
> >>  
> >> -enum vhost_net_poll_state {
> >> -  VHOST_NET_POLL_DISABLED = 0,
> >> -  VHOST_NET_POLL_STARTED = 1,
> >> -  VHOST_NET_POLL_STOPPED = 2,
> >> -};
> >> -
> >>  struct vhost_net {
> >>struct vhost_dev dev;
> >>struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
> >>struct vhost_poll poll[VHOST_NET_VQ_MAX];
> >> -  /* Tells us whether we are polling a socket for TX.
> >> -   * We only do this when socket buffer fills up.
> >> -   * Protected by tx vq lock. */
> >> -  enum vhost_net_poll_state tx_poll_state;
> >>/* Number of TX recently submitted.
> >> * Protected by tx vq lock. */
> >>unsigned tx_packets;
> >> @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec 
> >> *from, struct iovec *to,
> >>}
> >>  }
> >>  
> >> -/* Caller must have TX VQ lock */
> >> -static void tx_poll_stop(struct vhost_net *net)
> >> -{
> >> -  if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
> >> -  return;
> >> -  vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
> >> -  net->tx_poll_state = VHOST_NET_POLL_STOPPED;
> >> -}
> >> -
> >> -/* Caller must have TX VQ lock */
> >> -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
> >> -{
> >> -  if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
> >> -  return;
> >> -  vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
> >> -  net->tx_poll_state = VHOST_NET_POLL_STARTED;
> >> -}
> >> -
> >>  /* In case of DMA done not in order in lower device driver for some 
> >> reason.
> >>   * upend_idx is used to track end of used idx, done_idx is used to 
> >> track head
> >>   * of used idx. Once lower device DMA done contiguously, we will 
> >> signal KVM
> >> @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct 
> >> ubuf_info *ubuf, bool success)
> >>  static void handle_tx(struct vhost_net *net)
> >>  {
> >>struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX];
> >> +  struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
> >>unsigned out, in, s;
> >>int head;
> >>struct msghdr msg = {
> >> @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
> >>wmem = atomic_read(>sk->sk_wmem_alloc);
> >>if (wmem >= sock->sk->sk_sndbuf) {
> >>mutex_lock(>mutex);
> >> -  tx_poll_start(net, sock);
> >> +  if (vhost_poll_start(poll, sock->file))
> >> +  vq_err(vq, "Fail to start TX polling\n");
> > s/Fail/Failed/
> >
> > A question though: how can this happen? Could you clarify please?
> > Maybe we can find a way to prevent this error?
>  Two conditions I think this can happen:
> 
>  1) a buggy userspace disable a queue through TUNSETQUEUE
>  2) the net device were gone
> 
>  For 1, looks like we can 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-13 Thread Jason Wang
On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote:
> On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote:
>> On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
>>> On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
 On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
> On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
>> Polling errors were ignored by vhost/vhost_net, this may lead to crash 
>> when
>> trying to remove vhost from waitqueue when after the polling is failed. 
>> Solve
>> this problem by:
>>
>> - checking the poll->wqh before trying to remove from waitqueue
>> - report an error when poll() returns a POLLERR in vhost_start_poll()
>> - report an error when vhost_start_poll() fails in
>>   vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
>>   failure to userspace.
>> - report an error in the data path in vhost_net when meet polling errors.
>>
>> After those changes, we can safely drop the tx polling state in 
>> vhost_net since
>> it was replaced by the checking of poll->wqh.
>>
>> Signed-off-by: Jason Wang 
>> ---
>>  drivers/vhost/net.c   |   74 
>> 
>>  drivers/vhost/vhost.c |   31 +++-
>>  drivers/vhost/vhost.h |2 +-
>>  3 files changed, 49 insertions(+), 58 deletions(-)
>>
>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>> index d10ad6f..125c1e5 100644
>> --- a/drivers/vhost/net.c
>> +++ b/drivers/vhost/net.c
>> @@ -64,20 +64,10 @@ enum {
>>  VHOST_NET_VQ_MAX = 2,
>>  };
>>  
>> -enum vhost_net_poll_state {
>> -VHOST_NET_POLL_DISABLED = 0,
>> -VHOST_NET_POLL_STARTED = 1,
>> -VHOST_NET_POLL_STOPPED = 2,
>> -};
>> -
>>  struct vhost_net {
>>  struct vhost_dev dev;
>>  struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
>>  struct vhost_poll poll[VHOST_NET_VQ_MAX];
>> -/* Tells us whether we are polling a socket for TX.
>> - * We only do this when socket buffer fills up.
>> - * Protected by tx vq lock. */
>> -enum vhost_net_poll_state tx_poll_state;
>>  /* Number of TX recently submitted.
>>   * Protected by tx vq lock. */
>>  unsigned tx_packets;
>> @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec 
>> *from, struct iovec *to,
>>  }
>>  }
>>  
>> -/* Caller must have TX VQ lock */
>> -static void tx_poll_stop(struct vhost_net *net)
>> -{
>> -if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
>> -return;
>> -vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
>> -net->tx_poll_state = VHOST_NET_POLL_STOPPED;
>> -}
>> -
>> -/* Caller must have TX VQ lock */
>> -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
>> -{
>> -if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
>> -return;
>> -vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
>> -net->tx_poll_state = VHOST_NET_POLL_STARTED;
>> -}
>> -
>>  /* In case of DMA done not in order in lower device driver for some 
>> reason.
>>   * upend_idx is used to track end of used idx, done_idx is used to 
>> track head
>>   * of used idx. Once lower device DMA done contiguously, we will signal 
>> KVM
>> @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
>> *ubuf, bool success)
>>  static void handle_tx(struct vhost_net *net)
>>  {
>>  struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX];
>> +struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
>>  unsigned out, in, s;
>>  int head;
>>  struct msghdr msg = {
>> @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
>>  wmem = atomic_read(>sk->sk_wmem_alloc);
>>  if (wmem >= sock->sk->sk_sndbuf) {
>>  mutex_lock(>mutex);
>> -tx_poll_start(net, sock);
>> +if (vhost_poll_start(poll, sock->file))
>> +vq_err(vq, "Fail to start TX polling\n");
> s/Fail/Failed/
>
> A question though: how can this happen? Could you clarify please?
> Maybe we can find a way to prevent this error?
 Two conditions I think this can happen:

 1) a buggy userspace disable a queue through TUNSETQUEUE
 2) the net device were gone

 For 1, looks like we can delay the disabling until the refcnt goes to
 zero. For 2 may needs more changes.
>>> I'd expect keeping a socket reference would prevent both issues.
>>> Doesn't it?
>> Doesn't work for 2 I think, the socket didn't hold a refcnt of 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-13 Thread Michael S. Tsirkin
On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote:
> On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
> > On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
> >> On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
> >>> On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
>  Polling errors were ignored by vhost/vhost_net, this may lead to crash 
>  when
>  trying to remove vhost from waitqueue when after the polling is failed. 
>  Solve
>  this problem by:
> 
>  - checking the poll->wqh before trying to remove from waitqueue
>  - report an error when poll() returns a POLLERR in vhost_start_poll()
>  - report an error when vhost_start_poll() fails in
>    vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
>    failure to userspace.
>  - report an error in the data path in vhost_net when meet polling errors.
> 
>  After those changes, we can safely drop the tx polling state in 
>  vhost_net since
>  it was replaced by the checking of poll->wqh.
> 
>  Signed-off-by: Jason Wang 
>  ---
>   drivers/vhost/net.c   |   74 
>  
>   drivers/vhost/vhost.c |   31 +++-
>   drivers/vhost/vhost.h |2 +-
>   3 files changed, 49 insertions(+), 58 deletions(-)
> 
>  diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>  index d10ad6f..125c1e5 100644
>  --- a/drivers/vhost/net.c
>  +++ b/drivers/vhost/net.c
>  @@ -64,20 +64,10 @@ enum {
>   VHOST_NET_VQ_MAX = 2,
>   };
>   
>  -enum vhost_net_poll_state {
>  -VHOST_NET_POLL_DISABLED = 0,
>  -VHOST_NET_POLL_STARTED = 1,
>  -VHOST_NET_POLL_STOPPED = 2,
>  -};
>  -
>   struct vhost_net {
>   struct vhost_dev dev;
>   struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
>   struct vhost_poll poll[VHOST_NET_VQ_MAX];
>  -/* Tells us whether we are polling a socket for TX.
>  - * We only do this when socket buffer fills up.
>  - * Protected by tx vq lock. */
>  -enum vhost_net_poll_state tx_poll_state;
>   /* Number of TX recently submitted.
>    * Protected by tx vq lock. */
>   unsigned tx_packets;
>  @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec 
>  *from, struct iovec *to,
>   }
>   }
>   
>  -/* Caller must have TX VQ lock */
>  -static void tx_poll_stop(struct vhost_net *net)
>  -{
>  -if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
>  -return;
>  -vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
>  -net->tx_poll_state = VHOST_NET_POLL_STOPPED;
>  -}
>  -
>  -/* Caller must have TX VQ lock */
>  -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
>  -{
>  -if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
>  -return;
>  -vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
>  -net->tx_poll_state = VHOST_NET_POLL_STARTED;
>  -}
>  -
>   /* In case of DMA done not in order in lower device driver for some 
>  reason.
>    * upend_idx is used to track end of used idx, done_idx is used to 
>  track head
>    * of used idx. Once lower device DMA done contiguously, we will signal 
>  KVM
>  @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
>  *ubuf, bool success)
>   static void handle_tx(struct vhost_net *net)
>   {
>   struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX];
>  +struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
>   unsigned out, in, s;
>   int head;
>   struct msghdr msg = {
>  @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
>   wmem = atomic_read(>sk->sk_wmem_alloc);
>   if (wmem >= sock->sk->sk_sndbuf) {
>   mutex_lock(>mutex);
>  -tx_poll_start(net, sock);
>  +if (vhost_poll_start(poll, sock->file))
>  +vq_err(vq, "Fail to start TX polling\n");
> >>> s/Fail/Failed/
> >>>
> >>> A question though: how can this happen? Could you clarify please?
> >>> Maybe we can find a way to prevent this error?
> >> Two conditions I think this can happen:
> >>
> >> 1) a buggy userspace disable a queue through TUNSETQUEUE
> >> 2) the net device were gone
> >>
> >> For 1, looks like we can delay the disabling until the refcnt goes to
> >> zero. For 2 may needs more changes.
> > I'd expect keeping a socket reference would prevent both issues.
> > Doesn't it?
> 
> Doesn't work for 2 I think, the socket didn't hold a refcnt of the
> device, so the device can go away at 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-13 Thread Michael S. Tsirkin
On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote:
> On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
> > On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
> >> On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
> >>> On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
>  Polling errors were ignored by vhost/vhost_net, this may lead to crash 
>  when
>  trying to remove vhost from waitqueue when after the polling is failed. 
>  Solve
>  this problem by:
> 
>  - checking the poll->wqh before trying to remove from waitqueue
>  - report an error when poll() returns a POLLERR in vhost_start_poll()
>  - report an error when vhost_start_poll() fails in
>    vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
>    failure to userspace.
>  - report an error in the data path in vhost_net when meet polling errors.
> 
>  After those changes, we can safely drop the tx polling state in 
>  vhost_net since
>  it was replaced by the checking of poll->wqh.
> 
>  Signed-off-by: Jason Wang 
>  ---
>   drivers/vhost/net.c   |   74 
>  
>   drivers/vhost/vhost.c |   31 +++-
>   drivers/vhost/vhost.h |2 +-
>   3 files changed, 49 insertions(+), 58 deletions(-)
> 
>  diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>  index d10ad6f..125c1e5 100644
>  --- a/drivers/vhost/net.c
>  +++ b/drivers/vhost/net.c
>  @@ -64,20 +64,10 @@ enum {
>   VHOST_NET_VQ_MAX = 2,
>   };
>   
>  -enum vhost_net_poll_state {
>  -VHOST_NET_POLL_DISABLED = 0,
>  -VHOST_NET_POLL_STARTED = 1,
>  -VHOST_NET_POLL_STOPPED = 2,
>  -};
>  -
>   struct vhost_net {
>   struct vhost_dev dev;
>   struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
>   struct vhost_poll poll[VHOST_NET_VQ_MAX];
>  -/* Tells us whether we are polling a socket for TX.
>  - * We only do this when socket buffer fills up.
>  - * Protected by tx vq lock. */
>  -enum vhost_net_poll_state tx_poll_state;
>   /* Number of TX recently submitted.
>    * Protected by tx vq lock. */
>   unsigned tx_packets;
>  @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec 
>  *from, struct iovec *to,
>   }
>   }
>   
>  -/* Caller must have TX VQ lock */
>  -static void tx_poll_stop(struct vhost_net *net)
>  -{
>  -if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
>  -return;
>  -vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
>  -net->tx_poll_state = VHOST_NET_POLL_STOPPED;
>  -}
>  -
>  -/* Caller must have TX VQ lock */
>  -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
>  -{
>  -if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
>  -return;
>  -vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
>  -net->tx_poll_state = VHOST_NET_POLL_STARTED;
>  -}
>  -
>   /* In case of DMA done not in order in lower device driver for some 
>  reason.
>    * upend_idx is used to track end of used idx, done_idx is used to 
>  track head
>    * of used idx. Once lower device DMA done contiguously, we will signal 
>  KVM
>  @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
>  *ubuf, bool success)
>   static void handle_tx(struct vhost_net *net)
>   {
>   struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX];
>  +struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
>   unsigned out, in, s;
>   int head;
>   struct msghdr msg = {
>  @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
>   wmem = atomic_read(>sk->sk_wmem_alloc);
>   if (wmem >= sock->sk->sk_sndbuf) {
>   mutex_lock(>mutex);
>  -tx_poll_start(net, sock);
>  +if (vhost_poll_start(poll, sock->file))
>  +vq_err(vq, "Fail to start TX polling\n");
> >>> s/Fail/Failed/
> >>>
> >>> A question though: how can this happen? Could you clarify please?
> >>> Maybe we can find a way to prevent this error?
> >> Two conditions I think this can happen:
> >>
> >> 1) a buggy userspace disable a queue through TUNSETQUEUE
> >> 2) the net device were gone
> >>
> >> For 1, looks like we can delay the disabling until the refcnt goes to
> >> zero. For 2 may needs more changes.
> > I'd expect keeping a socket reference would prevent both issues.
> > Doesn't it?
> 
> Doesn't work for 2 I think, the socket didn't hold a refcnt of the
> device, so the device can go away at 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-13 Thread Michael S. Tsirkin
On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote:
 On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
  On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
  On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
  On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
  Polling errors were ignored by vhost/vhost_net, this may lead to crash 
  when
  trying to remove vhost from waitqueue when after the polling is failed. 
  Solve
  this problem by:
 
  - checking the poll-wqh before trying to remove from waitqueue
  - report an error when poll() returns a POLLERR in vhost_start_poll()
  - report an error when vhost_start_poll() fails in
vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
failure to userspace.
  - report an error in the data path in vhost_net when meet polling errors.
 
  After those changes, we can safely drop the tx polling state in 
  vhost_net since
  it was replaced by the checking of poll-wqh.
 
  Signed-off-by: Jason Wang jasow...@redhat.com
  ---
   drivers/vhost/net.c   |   74 
  
   drivers/vhost/vhost.c |   31 +++-
   drivers/vhost/vhost.h |2 +-
   3 files changed, 49 insertions(+), 58 deletions(-)
 
  diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
  index d10ad6f..125c1e5 100644
  --- a/drivers/vhost/net.c
  +++ b/drivers/vhost/net.c
  @@ -64,20 +64,10 @@ enum {
   VHOST_NET_VQ_MAX = 2,
   };
   
  -enum vhost_net_poll_state {
  -VHOST_NET_POLL_DISABLED = 0,
  -VHOST_NET_POLL_STARTED = 1,
  -VHOST_NET_POLL_STOPPED = 2,
  -};
  -
   struct vhost_net {
   struct vhost_dev dev;
   struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
   struct vhost_poll poll[VHOST_NET_VQ_MAX];
  -/* Tells us whether we are polling a socket for TX.
  - * We only do this when socket buffer fills up.
  - * Protected by tx vq lock. */
  -enum vhost_net_poll_state tx_poll_state;
   /* Number of TX recently submitted.
* Protected by tx vq lock. */
   unsigned tx_packets;
  @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec 
  *from, struct iovec *to,
   }
   }
   
  -/* Caller must have TX VQ lock */
  -static void tx_poll_stop(struct vhost_net *net)
  -{
  -if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED))
  -return;
  -vhost_poll_stop(net-poll + VHOST_NET_VQ_TX);
  -net-tx_poll_state = VHOST_NET_POLL_STOPPED;
  -}
  -
  -/* Caller must have TX VQ lock */
  -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
  -{
  -if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED))
  -return;
  -vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file);
  -net-tx_poll_state = VHOST_NET_POLL_STARTED;
  -}
  -
   /* In case of DMA done not in order in lower device driver for some 
  reason.
* upend_idx is used to track end of used idx, done_idx is used to 
  track head
* of used idx. Once lower device DMA done contiguously, we will signal 
  KVM
  @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
  *ubuf, bool success)
   static void handle_tx(struct vhost_net *net)
   {
   struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX];
  +struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX;
   unsigned out, in, s;
   int head;
   struct msghdr msg = {
  @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
   wmem = atomic_read(sock-sk-sk_wmem_alloc);
   if (wmem = sock-sk-sk_sndbuf) {
   mutex_lock(vq-mutex);
  -tx_poll_start(net, sock);
  +if (vhost_poll_start(poll, sock-file))
  +vq_err(vq, Fail to start TX polling\n);
  s/Fail/Failed/
 
  A question though: how can this happen? Could you clarify please?
  Maybe we can find a way to prevent this error?
  Two conditions I think this can happen:
 
  1) a buggy userspace disable a queue through TUNSETQUEUE
  2) the net device were gone
 
  For 1, looks like we can delay the disabling until the refcnt goes to
  zero. For 2 may needs more changes.
  I'd expect keeping a socket reference would prevent both issues.
  Doesn't it?
 
 Doesn't work for 2 I think, the socket didn't hold a refcnt of the
 device, so the device can go away at anytime. Although we can change
 this, but it's the behaviour before multiqueue support.

Hmm there's one scenario that does seem to
trigger this: queue can get disabled
and then poll fails.

Is this the only issue?


 
  Not sure it's worth to do this work,
  maybe a warning is enough just like other failure.
  With other failures, you normally can correct the error then
  kick to have it restart. This is soomething thagt would not
  work here.
 
 If userspace is wrote correctly, (e.g passing a fd with correct state)
 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-13 Thread Michael S. Tsirkin
On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote:
 On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
  On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
  On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
  On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
  Polling errors were ignored by vhost/vhost_net, this may lead to crash 
  when
  trying to remove vhost from waitqueue when after the polling is failed. 
  Solve
  this problem by:
 
  - checking the poll-wqh before trying to remove from waitqueue
  - report an error when poll() returns a POLLERR in vhost_start_poll()
  - report an error when vhost_start_poll() fails in
vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
failure to userspace.
  - report an error in the data path in vhost_net when meet polling errors.
 
  After those changes, we can safely drop the tx polling state in 
  vhost_net since
  it was replaced by the checking of poll-wqh.
 
  Signed-off-by: Jason Wang jasow...@redhat.com
  ---
   drivers/vhost/net.c   |   74 
  
   drivers/vhost/vhost.c |   31 +++-
   drivers/vhost/vhost.h |2 +-
   3 files changed, 49 insertions(+), 58 deletions(-)
 
  diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
  index d10ad6f..125c1e5 100644
  --- a/drivers/vhost/net.c
  +++ b/drivers/vhost/net.c
  @@ -64,20 +64,10 @@ enum {
   VHOST_NET_VQ_MAX = 2,
   };
   
  -enum vhost_net_poll_state {
  -VHOST_NET_POLL_DISABLED = 0,
  -VHOST_NET_POLL_STARTED = 1,
  -VHOST_NET_POLL_STOPPED = 2,
  -};
  -
   struct vhost_net {
   struct vhost_dev dev;
   struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
   struct vhost_poll poll[VHOST_NET_VQ_MAX];
  -/* Tells us whether we are polling a socket for TX.
  - * We only do this when socket buffer fills up.
  - * Protected by tx vq lock. */
  -enum vhost_net_poll_state tx_poll_state;
   /* Number of TX recently submitted.
* Protected by tx vq lock. */
   unsigned tx_packets;
  @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec 
  *from, struct iovec *to,
   }
   }
   
  -/* Caller must have TX VQ lock */
  -static void tx_poll_stop(struct vhost_net *net)
  -{
  -if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED))
  -return;
  -vhost_poll_stop(net-poll + VHOST_NET_VQ_TX);
  -net-tx_poll_state = VHOST_NET_POLL_STOPPED;
  -}
  -
  -/* Caller must have TX VQ lock */
  -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
  -{
  -if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED))
  -return;
  -vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file);
  -net-tx_poll_state = VHOST_NET_POLL_STARTED;
  -}
  -
   /* In case of DMA done not in order in lower device driver for some 
  reason.
* upend_idx is used to track end of used idx, done_idx is used to 
  track head
* of used idx. Once lower device DMA done contiguously, we will signal 
  KVM
  @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
  *ubuf, bool success)
   static void handle_tx(struct vhost_net *net)
   {
   struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX];
  +struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX;
   unsigned out, in, s;
   int head;
   struct msghdr msg = {
  @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
   wmem = atomic_read(sock-sk-sk_wmem_alloc);
   if (wmem = sock-sk-sk_sndbuf) {
   mutex_lock(vq-mutex);
  -tx_poll_start(net, sock);
  +if (vhost_poll_start(poll, sock-file))
  +vq_err(vq, Fail to start TX polling\n);
  s/Fail/Failed/
 
  A question though: how can this happen? Could you clarify please?
  Maybe we can find a way to prevent this error?
  Two conditions I think this can happen:
 
  1) a buggy userspace disable a queue through TUNSETQUEUE
  2) the net device were gone
 
  For 1, looks like we can delay the disabling until the refcnt goes to
  zero. For 2 may needs more changes.
  I'd expect keeping a socket reference would prevent both issues.
  Doesn't it?
 
 Doesn't work for 2 I think, the socket didn't hold a refcnt of the
 device, so the device can go away at anytime.

Hmm I don't really understand.
All we care about is that socket is around no?
Could you please show how a problematic case can
be triggered?


 Although we can change
 this, but it's the behaviour before multiqueue support.
 
  Not sure it's worth to do this work,
  maybe a warning is enough just like other failure.
  With other failures, you normally can correct the error then
  kick to have it restart. This is soomething thagt would not
  work here.
 
 If userspace is wrote correctly, (e.g passing a fd with 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-13 Thread Jason Wang
On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote:
 On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote:
 On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
 On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
 On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
 On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
 Polling errors were ignored by vhost/vhost_net, this may lead to crash 
 when
 trying to remove vhost from waitqueue when after the polling is failed. 
 Solve
 this problem by:

 - checking the poll-wqh before trying to remove from waitqueue
 - report an error when poll() returns a POLLERR in vhost_start_poll()
 - report an error when vhost_start_poll() fails in
   vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
   failure to userspace.
 - report an error in the data path in vhost_net when meet polling errors.

 After those changes, we can safely drop the tx polling state in 
 vhost_net since
 it was replaced by the checking of poll-wqh.

 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  drivers/vhost/net.c   |   74 
 
  drivers/vhost/vhost.c |   31 +++-
  drivers/vhost/vhost.h |2 +-
  3 files changed, 49 insertions(+), 58 deletions(-)

 diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
 index d10ad6f..125c1e5 100644
 --- a/drivers/vhost/net.c
 +++ b/drivers/vhost/net.c
 @@ -64,20 +64,10 @@ enum {
  VHOST_NET_VQ_MAX = 2,
  };
  
 -enum vhost_net_poll_state {
 -VHOST_NET_POLL_DISABLED = 0,
 -VHOST_NET_POLL_STARTED = 1,
 -VHOST_NET_POLL_STOPPED = 2,
 -};
 -
  struct vhost_net {
  struct vhost_dev dev;
  struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
  struct vhost_poll poll[VHOST_NET_VQ_MAX];
 -/* Tells us whether we are polling a socket for TX.
 - * We only do this when socket buffer fills up.
 - * Protected by tx vq lock. */
 -enum vhost_net_poll_state tx_poll_state;
  /* Number of TX recently submitted.
   * Protected by tx vq lock. */
  unsigned tx_packets;
 @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec 
 *from, struct iovec *to,
  }
  }
  
 -/* Caller must have TX VQ lock */
 -static void tx_poll_stop(struct vhost_net *net)
 -{
 -if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED))
 -return;
 -vhost_poll_stop(net-poll + VHOST_NET_VQ_TX);
 -net-tx_poll_state = VHOST_NET_POLL_STOPPED;
 -}
 -
 -/* Caller must have TX VQ lock */
 -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
 -{
 -if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED))
 -return;
 -vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file);
 -net-tx_poll_state = VHOST_NET_POLL_STARTED;
 -}
 -
  /* In case of DMA done not in order in lower device driver for some 
 reason.
   * upend_idx is used to track end of used idx, done_idx is used to 
 track head
   * of used idx. Once lower device DMA done contiguously, we will signal 
 KVM
 @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
 *ubuf, bool success)
  static void handle_tx(struct vhost_net *net)
  {
  struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX];
 +struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX;
  unsigned out, in, s;
  int head;
  struct msghdr msg = {
 @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
  wmem = atomic_read(sock-sk-sk_wmem_alloc);
  if (wmem = sock-sk-sk_sndbuf) {
  mutex_lock(vq-mutex);
 -tx_poll_start(net, sock);
 +if (vhost_poll_start(poll, sock-file))
 +vq_err(vq, Fail to start TX polling\n);
 s/Fail/Failed/

 A question though: how can this happen? Could you clarify please?
 Maybe we can find a way to prevent this error?
 Two conditions I think this can happen:

 1) a buggy userspace disable a queue through TUNSETQUEUE
 2) the net device were gone

 For 1, looks like we can delay the disabling until the refcnt goes to
 zero. For 2 may needs more changes.
 I'd expect keeping a socket reference would prevent both issues.
 Doesn't it?
 Doesn't work for 2 I think, the socket didn't hold a refcnt of the
 device, so the device can go away at anytime. Although we can change
 this, but it's the behaviour before multiqueue support.
 Hmm there's one scenario that does seem to
 trigger this: queue can get disabled
 and then poll fails.

 Is this the only issue?

Another one I think we can trigger is:

- start vhost thread
- do ip link del link dev tap0 to delete the tap device

In this case, the netdevice is unregistered but the file/socket still exist.

 Not sure it's worth to do this work,
 maybe a warning is enough just like other failure.
 With other failures, you normally can correct the error then
 kick to have it 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-13 Thread Michael S. Tsirkin
On Mon, Jan 14, 2013 at 10:59:02AM +0800, Jason Wang wrote:
 On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote:
  On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote:
  On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
  On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
  On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
  On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
  Polling errors were ignored by vhost/vhost_net, this may lead to crash 
  when
  trying to remove vhost from waitqueue when after the polling is 
  failed. Solve
  this problem by:
 
  - checking the poll-wqh before trying to remove from waitqueue
  - report an error when poll() returns a POLLERR in vhost_start_poll()
  - report an error when vhost_start_poll() fails in
vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify 
  the
failure to userspace.
  - report an error in the data path in vhost_net when meet polling 
  errors.
 
  After those changes, we can safely drop the tx polling state in 
  vhost_net since
  it was replaced by the checking of poll-wqh.
 
  Signed-off-by: Jason Wang jasow...@redhat.com
  ---
   drivers/vhost/net.c   |   74 
  
   drivers/vhost/vhost.c |   31 +++-
   drivers/vhost/vhost.h |2 +-
   3 files changed, 49 insertions(+), 58 deletions(-)
 
  diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
  index d10ad6f..125c1e5 100644
  --- a/drivers/vhost/net.c
  +++ b/drivers/vhost/net.c
  @@ -64,20 +64,10 @@ enum {
 VHOST_NET_VQ_MAX = 2,
   };
   
  -enum vhost_net_poll_state {
  -  VHOST_NET_POLL_DISABLED = 0,
  -  VHOST_NET_POLL_STARTED = 1,
  -  VHOST_NET_POLL_STOPPED = 2,
  -};
  -
   struct vhost_net {
 struct vhost_dev dev;
 struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
 struct vhost_poll poll[VHOST_NET_VQ_MAX];
  -  /* Tells us whether we are polling a socket for TX.
  -   * We only do this when socket buffer fills up.
  -   * Protected by tx vq lock. */
  -  enum vhost_net_poll_state tx_poll_state;
 /* Number of TX recently submitted.
  * Protected by tx vq lock. */
 unsigned tx_packets;
  @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec 
  *from, struct iovec *to,
 }
   }
   
  -/* Caller must have TX VQ lock */
  -static void tx_poll_stop(struct vhost_net *net)
  -{
  -  if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED))
  -  return;
  -  vhost_poll_stop(net-poll + VHOST_NET_VQ_TX);
  -  net-tx_poll_state = VHOST_NET_POLL_STOPPED;
  -}
  -
  -/* Caller must have TX VQ lock */
  -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
  -{
  -  if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED))
  -  return;
  -  vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file);
  -  net-tx_poll_state = VHOST_NET_POLL_STARTED;
  -}
  -
   /* In case of DMA done not in order in lower device driver for some 
  reason.
* upend_idx is used to track end of used idx, done_idx is used to 
  track head
* of used idx. Once lower device DMA done contiguously, we will 
  signal KVM
  @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct 
  ubuf_info *ubuf, bool success)
   static void handle_tx(struct vhost_net *net)
   {
 struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX];
  +  struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX;
 unsigned out, in, s;
 int head;
 struct msghdr msg = {
  @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
 wmem = atomic_read(sock-sk-sk_wmem_alloc);
 if (wmem = sock-sk-sk_sndbuf) {
 mutex_lock(vq-mutex);
  -  tx_poll_start(net, sock);
  +  if (vhost_poll_start(poll, sock-file))
  +  vq_err(vq, Fail to start TX polling\n);
  s/Fail/Failed/
 
  A question though: how can this happen? Could you clarify please?
  Maybe we can find a way to prevent this error?
  Two conditions I think this can happen:
 
  1) a buggy userspace disable a queue through TUNSETQUEUE
  2) the net device were gone
 
  For 1, looks like we can delay the disabling until the refcnt goes to
  zero. For 2 may needs more changes.
  I'd expect keeping a socket reference would prevent both issues.
  Doesn't it?
  Doesn't work for 2 I think, the socket didn't hold a refcnt of the
  device, so the device can go away at anytime. Although we can change
  this, but it's the behaviour before multiqueue support.
  Hmm there's one scenario that does seem to
  trigger this: queue can get disabled
  and then poll fails.
 
  Is this the only issue?
 
 Another one I think we can trigger is:
 
 - start vhost thread
 - do ip link del link dev tap0 to delete the tap device
 
 In this case, the netdevice is unregistered but the file/socket still exist.

Yes but in this case poll_wait is called so 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-13 Thread Jason Wang
On 01/14/2013 02:57 PM, Michael S. Tsirkin wrote:
 On Mon, Jan 14, 2013 at 10:59:02AM +0800, Jason Wang wrote:
 On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote:
 On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote:
 On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
 On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
 On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
 On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
 Polling errors were ignored by vhost/vhost_net, this may lead to crash 
 when
 trying to remove vhost from waitqueue when after the polling is 
 failed. Solve
 this problem by:

 - checking the poll-wqh before trying to remove from waitqueue
 - report an error when poll() returns a POLLERR in vhost_start_poll()
 - report an error when vhost_start_poll() fails in
   vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify 
 the
   failure to userspace.
 - report an error in the data path in vhost_net when meet polling 
 errors.

 After those changes, we can safely drop the tx polling state in 
 vhost_net since
 it was replaced by the checking of poll-wqh.

 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  drivers/vhost/net.c   |   74 
 
  drivers/vhost/vhost.c |   31 +++-
  drivers/vhost/vhost.h |2 +-
  3 files changed, 49 insertions(+), 58 deletions(-)

 diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
 index d10ad6f..125c1e5 100644
 --- a/drivers/vhost/net.c
 +++ b/drivers/vhost/net.c
 @@ -64,20 +64,10 @@ enum {
VHOST_NET_VQ_MAX = 2,
  };
  
 -enum vhost_net_poll_state {
 -  VHOST_NET_POLL_DISABLED = 0,
 -  VHOST_NET_POLL_STARTED = 1,
 -  VHOST_NET_POLL_STOPPED = 2,
 -};
 -
  struct vhost_net {
struct vhost_dev dev;
struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
struct vhost_poll poll[VHOST_NET_VQ_MAX];
 -  /* Tells us whether we are polling a socket for TX.
 -   * We only do this when socket buffer fills up.
 -   * Protected by tx vq lock. */
 -  enum vhost_net_poll_state tx_poll_state;
/* Number of TX recently submitted.
 * Protected by tx vq lock. */
unsigned tx_packets;
 @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec 
 *from, struct iovec *to,
}
  }
  
 -/* Caller must have TX VQ lock */
 -static void tx_poll_stop(struct vhost_net *net)
 -{
 -  if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED))
 -  return;
 -  vhost_poll_stop(net-poll + VHOST_NET_VQ_TX);
 -  net-tx_poll_state = VHOST_NET_POLL_STOPPED;
 -}
 -
 -/* Caller must have TX VQ lock */
 -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
 -{
 -  if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED))
 -  return;
 -  vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file);
 -  net-tx_poll_state = VHOST_NET_POLL_STARTED;
 -}
 -
  /* In case of DMA done not in order in lower device driver for some 
 reason.
   * upend_idx is used to track end of used idx, done_idx is used to 
 track head
   * of used idx. Once lower device DMA done contiguously, we will 
 signal KVM
 @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct 
 ubuf_info *ubuf, bool success)
  static void handle_tx(struct vhost_net *net)
  {
struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX];
 +  struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX;
unsigned out, in, s;
int head;
struct msghdr msg = {
 @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
wmem = atomic_read(sock-sk-sk_wmem_alloc);
if (wmem = sock-sk-sk_sndbuf) {
mutex_lock(vq-mutex);
 -  tx_poll_start(net, sock);
 +  if (vhost_poll_start(poll, sock-file))
 +  vq_err(vq, Fail to start TX polling\n);
 s/Fail/Failed/

 A question though: how can this happen? Could you clarify please?
 Maybe we can find a way to prevent this error?
 Two conditions I think this can happen:

 1) a buggy userspace disable a queue through TUNSETQUEUE
 2) the net device were gone

 For 1, looks like we can delay the disabling until the refcnt goes to
 zero. For 2 may needs more changes.
 I'd expect keeping a socket reference would prevent both issues.
 Doesn't it?
 Doesn't work for 2 I think, the socket didn't hold a refcnt of the
 device, so the device can go away at anytime. Although we can change
 this, but it's the behaviour before multiqueue support.
 Hmm there's one scenario that does seem to
 trigger this: queue can get disabled
 and then poll fails.

 Is this the only issue?
 Another one I think we can trigger is:

 - start vhost thread
 - do ip link del link dev tap0 to delete the tap device

 In this case, the netdevice is unregistered but the file/socket still exist.
 Yes but in this case poll_wait is called so apparently no issue
 with existing code? We only have an issue if poll_wait 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-07 Thread Jason Wang
On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
> On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
>> On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
>>> On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
 Polling errors were ignored by vhost/vhost_net, this may lead to crash when
 trying to remove vhost from waitqueue when after the polling is failed. 
 Solve
 this problem by:

 - checking the poll->wqh before trying to remove from waitqueue
 - report an error when poll() returns a POLLERR in vhost_start_poll()
 - report an error when vhost_start_poll() fails in
   vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
   failure to userspace.
 - report an error in the data path in vhost_net when meet polling errors.

 After those changes, we can safely drop the tx polling state in vhost_net 
 since
 it was replaced by the checking of poll->wqh.

 Signed-off-by: Jason Wang 
 ---
  drivers/vhost/net.c   |   74 
 
  drivers/vhost/vhost.c |   31 +++-
  drivers/vhost/vhost.h |2 +-
  3 files changed, 49 insertions(+), 58 deletions(-)

 diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
 index d10ad6f..125c1e5 100644
 --- a/drivers/vhost/net.c
 +++ b/drivers/vhost/net.c
 @@ -64,20 +64,10 @@ enum {
VHOST_NET_VQ_MAX = 2,
  };
  
 -enum vhost_net_poll_state {
 -  VHOST_NET_POLL_DISABLED = 0,
 -  VHOST_NET_POLL_STARTED = 1,
 -  VHOST_NET_POLL_STOPPED = 2,
 -};
 -
  struct vhost_net {
struct vhost_dev dev;
struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
struct vhost_poll poll[VHOST_NET_VQ_MAX];
 -  /* Tells us whether we are polling a socket for TX.
 -   * We only do this when socket buffer fills up.
 -   * Protected by tx vq lock. */
 -  enum vhost_net_poll_state tx_poll_state;
/* Number of TX recently submitted.
 * Protected by tx vq lock. */
unsigned tx_packets;
 @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, 
 struct iovec *to,
}
  }
  
 -/* Caller must have TX VQ lock */
 -static void tx_poll_stop(struct vhost_net *net)
 -{
 -  if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
 -  return;
 -  vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
 -  net->tx_poll_state = VHOST_NET_POLL_STOPPED;
 -}
 -
 -/* Caller must have TX VQ lock */
 -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
 -{
 -  if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
 -  return;
 -  vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
 -  net->tx_poll_state = VHOST_NET_POLL_STARTED;
 -}
 -
  /* In case of DMA done not in order in lower device driver for some 
 reason.
   * upend_idx is used to track end of used idx, done_idx is used to track 
 head
   * of used idx. Once lower device DMA done contiguously, we will signal 
 KVM
 @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
 *ubuf, bool success)
  static void handle_tx(struct vhost_net *net)
  {
struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX];
 +  struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
unsigned out, in, s;
int head;
struct msghdr msg = {
 @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
wmem = atomic_read(>sk->sk_wmem_alloc);
if (wmem >= sock->sk->sk_sndbuf) {
mutex_lock(>mutex);
 -  tx_poll_start(net, sock);
 +  if (vhost_poll_start(poll, sock->file))
 +  vq_err(vq, "Fail to start TX polling\n");
>>> s/Fail/Failed/
>>>
>>> A question though: how can this happen? Could you clarify please?
>>> Maybe we can find a way to prevent this error?
>> Two conditions I think this can happen:
>>
>> 1) a buggy userspace disable a queue through TUNSETQUEUE
>> 2) the net device were gone
>>
>> For 1, looks like we can delay the disabling until the refcnt goes to
>> zero. For 2 may needs more changes.
> I'd expect keeping a socket reference would prevent both issues.
> Doesn't it?

Doesn't work for 2 I think, the socket didn't hold a refcnt of the
device, so the device can go away at anytime. Although we can change
this, but it's the behaviour before multiqueue support.
>
>> Not sure it's worth to do this work,
>> maybe a warning is enough just like other failure.
> With other failures, you normally can correct the error then
> kick to have it restart. This is soomething thagt would not
> work here.

If userspace is wrote correctly, (e.g passing a fd with correct state)
it can also be corrected.
>
mutex_unlock(>mutex);
return;
}
 @@ 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-07 Thread Michael S. Tsirkin
On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
> On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
> > On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
> >> Polling errors were ignored by vhost/vhost_net, this may lead to crash when
> >> trying to remove vhost from waitqueue when after the polling is failed. 
> >> Solve
> >> this problem by:
> >>
> >> - checking the poll->wqh before trying to remove from waitqueue
> >> - report an error when poll() returns a POLLERR in vhost_start_poll()
> >> - report an error when vhost_start_poll() fails in
> >>   vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
> >>   failure to userspace.
> >> - report an error in the data path in vhost_net when meet polling errors.
> >>
> >> After those changes, we can safely drop the tx polling state in vhost_net 
> >> since
> >> it was replaced by the checking of poll->wqh.
> >>
> >> Signed-off-by: Jason Wang 
> >> ---
> >>  drivers/vhost/net.c   |   74 
> >> 
> >>  drivers/vhost/vhost.c |   31 +++-
> >>  drivers/vhost/vhost.h |2 +-
> >>  3 files changed, 49 insertions(+), 58 deletions(-)
> >>
> >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> >> index d10ad6f..125c1e5 100644
> >> --- a/drivers/vhost/net.c
> >> +++ b/drivers/vhost/net.c
> >> @@ -64,20 +64,10 @@ enum {
> >>VHOST_NET_VQ_MAX = 2,
> >>  };
> >>  
> >> -enum vhost_net_poll_state {
> >> -  VHOST_NET_POLL_DISABLED = 0,
> >> -  VHOST_NET_POLL_STARTED = 1,
> >> -  VHOST_NET_POLL_STOPPED = 2,
> >> -};
> >> -
> >>  struct vhost_net {
> >>struct vhost_dev dev;
> >>struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
> >>struct vhost_poll poll[VHOST_NET_VQ_MAX];
> >> -  /* Tells us whether we are polling a socket for TX.
> >> -   * We only do this when socket buffer fills up.
> >> -   * Protected by tx vq lock. */
> >> -  enum vhost_net_poll_state tx_poll_state;
> >>/* Number of TX recently submitted.
> >> * Protected by tx vq lock. */
> >>unsigned tx_packets;
> >> @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, 
> >> struct iovec *to,
> >>}
> >>  }
> >>  
> >> -/* Caller must have TX VQ lock */
> >> -static void tx_poll_stop(struct vhost_net *net)
> >> -{
> >> -  if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
> >> -  return;
> >> -  vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
> >> -  net->tx_poll_state = VHOST_NET_POLL_STOPPED;
> >> -}
> >> -
> >> -/* Caller must have TX VQ lock */
> >> -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
> >> -{
> >> -  if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
> >> -  return;
> >> -  vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
> >> -  net->tx_poll_state = VHOST_NET_POLL_STARTED;
> >> -}
> >> -
> >>  /* In case of DMA done not in order in lower device driver for some 
> >> reason.
> >>   * upend_idx is used to track end of used idx, done_idx is used to track 
> >> head
> >>   * of used idx. Once lower device DMA done contiguously, we will signal 
> >> KVM
> >> @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
> >> *ubuf, bool success)
> >>  static void handle_tx(struct vhost_net *net)
> >>  {
> >>struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX];
> >> +  struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
> >>unsigned out, in, s;
> >>int head;
> >>struct msghdr msg = {
> >> @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
> >>wmem = atomic_read(>sk->sk_wmem_alloc);
> >>if (wmem >= sock->sk->sk_sndbuf) {
> >>mutex_lock(>mutex);
> >> -  tx_poll_start(net, sock);
> >> +  if (vhost_poll_start(poll, sock->file))
> >> +  vq_err(vq, "Fail to start TX polling\n");
> > s/Fail/Failed/
> >
> > A question though: how can this happen? Could you clarify please?
> > Maybe we can find a way to prevent this error?
> 
> Two conditions I think this can happen:
> 
> 1) a buggy userspace disable a queue through TUNSETQUEUE
> 2) the net device were gone
> 
> For 1, looks like we can delay the disabling until the refcnt goes to
> zero. For 2 may needs more changes.

I'd expect keeping a socket reference would prevent both issues.
Doesn't it?

> Not sure it's worth to do this work,
> maybe a warning is enough just like other failure.

With other failures, you normally can correct the error then
kick to have it restart. This is soomething thagt would not
work here.

> >
> >>mutex_unlock(>mutex);
> >>return;
> >>}
> >> @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net)
> >>vhost_disable_notify(>dev, vq);
> >>  
> >>if (wmem < sock->sk->sk_sndbuf / 2)
> >> -  tx_poll_stop(net);
> >> +  vhost_poll_stop(poll);
> >>hdr_size = vq->vhost_hlen;
> >>zcopy = vq->ubufs;
> >>  
> >> @@ -283,8 +257,10 @@ static void handle_tx(struct 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-07 Thread Michael S. Tsirkin
On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
 On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
  On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
  Polling errors were ignored by vhost/vhost_net, this may lead to crash when
  trying to remove vhost from waitqueue when after the polling is failed. 
  Solve
  this problem by:
 
  - checking the poll-wqh before trying to remove from waitqueue
  - report an error when poll() returns a POLLERR in vhost_start_poll()
  - report an error when vhost_start_poll() fails in
vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
failure to userspace.
  - report an error in the data path in vhost_net when meet polling errors.
 
  After those changes, we can safely drop the tx polling state in vhost_net 
  since
  it was replaced by the checking of poll-wqh.
 
  Signed-off-by: Jason Wang jasow...@redhat.com
  ---
   drivers/vhost/net.c   |   74 
  
   drivers/vhost/vhost.c |   31 +++-
   drivers/vhost/vhost.h |2 +-
   3 files changed, 49 insertions(+), 58 deletions(-)
 
  diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
  index d10ad6f..125c1e5 100644
  --- a/drivers/vhost/net.c
  +++ b/drivers/vhost/net.c
  @@ -64,20 +64,10 @@ enum {
 VHOST_NET_VQ_MAX = 2,
   };
   
  -enum vhost_net_poll_state {
  -  VHOST_NET_POLL_DISABLED = 0,
  -  VHOST_NET_POLL_STARTED = 1,
  -  VHOST_NET_POLL_STOPPED = 2,
  -};
  -
   struct vhost_net {
 struct vhost_dev dev;
 struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
 struct vhost_poll poll[VHOST_NET_VQ_MAX];
  -  /* Tells us whether we are polling a socket for TX.
  -   * We only do this when socket buffer fills up.
  -   * Protected by tx vq lock. */
  -  enum vhost_net_poll_state tx_poll_state;
 /* Number of TX recently submitted.
  * Protected by tx vq lock. */
 unsigned tx_packets;
  @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, 
  struct iovec *to,
 }
   }
   
  -/* Caller must have TX VQ lock */
  -static void tx_poll_stop(struct vhost_net *net)
  -{
  -  if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED))
  -  return;
  -  vhost_poll_stop(net-poll + VHOST_NET_VQ_TX);
  -  net-tx_poll_state = VHOST_NET_POLL_STOPPED;
  -}
  -
  -/* Caller must have TX VQ lock */
  -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
  -{
  -  if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED))
  -  return;
  -  vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file);
  -  net-tx_poll_state = VHOST_NET_POLL_STARTED;
  -}
  -
   /* In case of DMA done not in order in lower device driver for some 
  reason.
* upend_idx is used to track end of used idx, done_idx is used to track 
  head
* of used idx. Once lower device DMA done contiguously, we will signal 
  KVM
  @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
  *ubuf, bool success)
   static void handle_tx(struct vhost_net *net)
   {
 struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX];
  +  struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX;
 unsigned out, in, s;
 int head;
 struct msghdr msg = {
  @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
 wmem = atomic_read(sock-sk-sk_wmem_alloc);
 if (wmem = sock-sk-sk_sndbuf) {
 mutex_lock(vq-mutex);
  -  tx_poll_start(net, sock);
  +  if (vhost_poll_start(poll, sock-file))
  +  vq_err(vq, Fail to start TX polling\n);
  s/Fail/Failed/
 
  A question though: how can this happen? Could you clarify please?
  Maybe we can find a way to prevent this error?
 
 Two conditions I think this can happen:
 
 1) a buggy userspace disable a queue through TUNSETQUEUE
 2) the net device were gone
 
 For 1, looks like we can delay the disabling until the refcnt goes to
 zero. For 2 may needs more changes.

I'd expect keeping a socket reference would prevent both issues.
Doesn't it?

 Not sure it's worth to do this work,
 maybe a warning is enough just like other failure.

With other failures, you normally can correct the error then
kick to have it restart. This is soomething thagt would not
work here.

 
 mutex_unlock(vq-mutex);
 return;
 }
  @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net)
 vhost_disable_notify(net-dev, vq);
   
 if (wmem  sock-sk-sk_sndbuf / 2)
  -  tx_poll_stop(net);
  +  vhost_poll_stop(poll);
 hdr_size = vq-vhost_hlen;
 zcopy = vq-ubufs;
   
  @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net)
   
 wmem = atomic_read(sock-sk-sk_wmem_alloc);
 if (wmem = sock-sk-sk_sndbuf * 3 / 4) {
  -  tx_poll_start(net, sock);
  -  set_bit(SOCK_ASYNC_NOSPACE, sock-flags);
  +  if (vhost_poll_start(poll, sock-file))
  + 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-07 Thread Jason Wang
On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote:
 On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote:
 On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
 On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
 Polling errors were ignored by vhost/vhost_net, this may lead to crash when
 trying to remove vhost from waitqueue when after the polling is failed. 
 Solve
 this problem by:

 - checking the poll-wqh before trying to remove from waitqueue
 - report an error when poll() returns a POLLERR in vhost_start_poll()
 - report an error when vhost_start_poll() fails in
   vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
   failure to userspace.
 - report an error in the data path in vhost_net when meet polling errors.

 After those changes, we can safely drop the tx polling state in vhost_net 
 since
 it was replaced by the checking of poll-wqh.

 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  drivers/vhost/net.c   |   74 
 
  drivers/vhost/vhost.c |   31 +++-
  drivers/vhost/vhost.h |2 +-
  3 files changed, 49 insertions(+), 58 deletions(-)

 diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
 index d10ad6f..125c1e5 100644
 --- a/drivers/vhost/net.c
 +++ b/drivers/vhost/net.c
 @@ -64,20 +64,10 @@ enum {
VHOST_NET_VQ_MAX = 2,
  };
  
 -enum vhost_net_poll_state {
 -  VHOST_NET_POLL_DISABLED = 0,
 -  VHOST_NET_POLL_STARTED = 1,
 -  VHOST_NET_POLL_STOPPED = 2,
 -};
 -
  struct vhost_net {
struct vhost_dev dev;
struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
struct vhost_poll poll[VHOST_NET_VQ_MAX];
 -  /* Tells us whether we are polling a socket for TX.
 -   * We only do this when socket buffer fills up.
 -   * Protected by tx vq lock. */
 -  enum vhost_net_poll_state tx_poll_state;
/* Number of TX recently submitted.
 * Protected by tx vq lock. */
unsigned tx_packets;
 @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, 
 struct iovec *to,
}
  }
  
 -/* Caller must have TX VQ lock */
 -static void tx_poll_stop(struct vhost_net *net)
 -{
 -  if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED))
 -  return;
 -  vhost_poll_stop(net-poll + VHOST_NET_VQ_TX);
 -  net-tx_poll_state = VHOST_NET_POLL_STOPPED;
 -}
 -
 -/* Caller must have TX VQ lock */
 -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
 -{
 -  if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED))
 -  return;
 -  vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file);
 -  net-tx_poll_state = VHOST_NET_POLL_STARTED;
 -}
 -
  /* In case of DMA done not in order in lower device driver for some 
 reason.
   * upend_idx is used to track end of used idx, done_idx is used to track 
 head
   * of used idx. Once lower device DMA done contiguously, we will signal 
 KVM
 @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
 *ubuf, bool success)
  static void handle_tx(struct vhost_net *net)
  {
struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX];
 +  struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX;
unsigned out, in, s;
int head;
struct msghdr msg = {
 @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
wmem = atomic_read(sock-sk-sk_wmem_alloc);
if (wmem = sock-sk-sk_sndbuf) {
mutex_lock(vq-mutex);
 -  tx_poll_start(net, sock);
 +  if (vhost_poll_start(poll, sock-file))
 +  vq_err(vq, Fail to start TX polling\n);
 s/Fail/Failed/

 A question though: how can this happen? Could you clarify please?
 Maybe we can find a way to prevent this error?
 Two conditions I think this can happen:

 1) a buggy userspace disable a queue through TUNSETQUEUE
 2) the net device were gone

 For 1, looks like we can delay the disabling until the refcnt goes to
 zero. For 2 may needs more changes.
 I'd expect keeping a socket reference would prevent both issues.
 Doesn't it?

Doesn't work for 2 I think, the socket didn't hold a refcnt of the
device, so the device can go away at anytime. Although we can change
this, but it's the behaviour before multiqueue support.

 Not sure it's worth to do this work,
 maybe a warning is enough just like other failure.
 With other failures, you normally can correct the error then
 kick to have it restart. This is soomething thagt would not
 work here.

If userspace is wrote correctly, (e.g passing a fd with correct state)
it can also be corrected.

mutex_unlock(vq-mutex);
return;
}
 @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net)
vhost_disable_notify(net-dev, vq);
  
if (wmem  sock-sk-sk_sndbuf / 2)
 -  tx_poll_stop(net);
 +  vhost_poll_stop(poll);
hdr_size = vq-vhost_hlen;
zcopy = vq-ubufs;
  
 @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net)
  
wmem = atomic_read(sock-sk-sk_wmem_alloc);
if (wmem = 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-06 Thread Jason Wang
On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
> On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
>> Polling errors were ignored by vhost/vhost_net, this may lead to crash when
>> trying to remove vhost from waitqueue when after the polling is failed. Solve
>> this problem by:
>>
>> - checking the poll->wqh before trying to remove from waitqueue
>> - report an error when poll() returns a POLLERR in vhost_start_poll()
>> - report an error when vhost_start_poll() fails in
>>   vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
>>   failure to userspace.
>> - report an error in the data path in vhost_net when meet polling errors.
>>
>> After those changes, we can safely drop the tx polling state in vhost_net 
>> since
>> it was replaced by the checking of poll->wqh.
>>
>> Signed-off-by: Jason Wang 
>> ---
>>  drivers/vhost/net.c   |   74 
>> 
>>  drivers/vhost/vhost.c |   31 +++-
>>  drivers/vhost/vhost.h |2 +-
>>  3 files changed, 49 insertions(+), 58 deletions(-)
>>
>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>> index d10ad6f..125c1e5 100644
>> --- a/drivers/vhost/net.c
>> +++ b/drivers/vhost/net.c
>> @@ -64,20 +64,10 @@ enum {
>>  VHOST_NET_VQ_MAX = 2,
>>  };
>>  
>> -enum vhost_net_poll_state {
>> -VHOST_NET_POLL_DISABLED = 0,
>> -VHOST_NET_POLL_STARTED = 1,
>> -VHOST_NET_POLL_STOPPED = 2,
>> -};
>> -
>>  struct vhost_net {
>>  struct vhost_dev dev;
>>  struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
>>  struct vhost_poll poll[VHOST_NET_VQ_MAX];
>> -/* Tells us whether we are polling a socket for TX.
>> - * We only do this when socket buffer fills up.
>> - * Protected by tx vq lock. */
>> -enum vhost_net_poll_state tx_poll_state;
>>  /* Number of TX recently submitted.
>>   * Protected by tx vq lock. */
>>  unsigned tx_packets;
>> @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, 
>> struct iovec *to,
>>  }
>>  }
>>  
>> -/* Caller must have TX VQ lock */
>> -static void tx_poll_stop(struct vhost_net *net)
>> -{
>> -if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
>> -return;
>> -vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
>> -net->tx_poll_state = VHOST_NET_POLL_STOPPED;
>> -}
>> -
>> -/* Caller must have TX VQ lock */
>> -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
>> -{
>> -if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
>> -return;
>> -vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
>> -net->tx_poll_state = VHOST_NET_POLL_STARTED;
>> -}
>> -
>>  /* In case of DMA done not in order in lower device driver for some reason.
>>   * upend_idx is used to track end of used idx, done_idx is used to track 
>> head
>>   * of used idx. Once lower device DMA done contiguously, we will signal KVM
>> @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
>> *ubuf, bool success)
>>  static void handle_tx(struct vhost_net *net)
>>  {
>>  struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX];
>> +struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
>>  unsigned out, in, s;
>>  int head;
>>  struct msghdr msg = {
>> @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
>>  wmem = atomic_read(>sk->sk_wmem_alloc);
>>  if (wmem >= sock->sk->sk_sndbuf) {
>>  mutex_lock(>mutex);
>> -tx_poll_start(net, sock);
>> +if (vhost_poll_start(poll, sock->file))
>> +vq_err(vq, "Fail to start TX polling\n");
> s/Fail/Failed/
>
> A question though: how can this happen? Could you clarify please?
> Maybe we can find a way to prevent this error?

Two conditions I think this can happen:

1) a buggy userspace disable a queue through TUNSETQUEUE
2) the net device were gone

For 1, looks like we can delay the disabling until the refcnt goes to
zero. For 2 may needs more changes. Not sure it's worth to do this work,
maybe a warning is enough just like other failure.
>
>>  mutex_unlock(>mutex);
>>  return;
>>  }
>> @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net)
>>  vhost_disable_notify(>dev, vq);
>>  
>>  if (wmem < sock->sk->sk_sndbuf / 2)
>> -tx_poll_stop(net);
>> +vhost_poll_stop(poll);
>>  hdr_size = vq->vhost_hlen;
>>  zcopy = vq->ubufs;
>>  
>> @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net)
>>  
>>  wmem = atomic_read(>sk->sk_wmem_alloc);
>>  if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
>> -tx_poll_start(net, sock);
>> -set_bit(SOCK_ASYNC_NOSPACE, >flags);
>> +if (vhost_poll_start(poll, sock->file))
>> +vq_err(vq, "Fail to start TX 
>> polling\n");
>> +   

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-06 Thread Michael S. Tsirkin
On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
> Polling errors were ignored by vhost/vhost_net, this may lead to crash when
> trying to remove vhost from waitqueue when after the polling is failed. Solve
> this problem by:
> 
> - checking the poll->wqh before trying to remove from waitqueue
> - report an error when poll() returns a POLLERR in vhost_start_poll()
> - report an error when vhost_start_poll() fails in
>   vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
>   failure to userspace.
> - report an error in the data path in vhost_net when meet polling errors.
> 
> After those changes, we can safely drop the tx polling state in vhost_net 
> since
> it was replaced by the checking of poll->wqh.
> 
> Signed-off-by: Jason Wang 
> ---
>  drivers/vhost/net.c   |   74 
>  drivers/vhost/vhost.c |   31 +++-
>  drivers/vhost/vhost.h |2 +-
>  3 files changed, 49 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index d10ad6f..125c1e5 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -64,20 +64,10 @@ enum {
>   VHOST_NET_VQ_MAX = 2,
>  };
>  
> -enum vhost_net_poll_state {
> - VHOST_NET_POLL_DISABLED = 0,
> - VHOST_NET_POLL_STARTED = 1,
> - VHOST_NET_POLL_STOPPED = 2,
> -};
> -
>  struct vhost_net {
>   struct vhost_dev dev;
>   struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
>   struct vhost_poll poll[VHOST_NET_VQ_MAX];
> - /* Tells us whether we are polling a socket for TX.
> -  * We only do this when socket buffer fills up.
> -  * Protected by tx vq lock. */
> - enum vhost_net_poll_state tx_poll_state;
>   /* Number of TX recently submitted.
>* Protected by tx vq lock. */
>   unsigned tx_packets;
> @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, 
> struct iovec *to,
>   }
>  }
>  
> -/* Caller must have TX VQ lock */
> -static void tx_poll_stop(struct vhost_net *net)
> -{
> - if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
> - return;
> - vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
> - net->tx_poll_state = VHOST_NET_POLL_STOPPED;
> -}
> -
> -/* Caller must have TX VQ lock */
> -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
> -{
> - if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
> - return;
> - vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
> - net->tx_poll_state = VHOST_NET_POLL_STARTED;
> -}
> -
>  /* In case of DMA done not in order in lower device driver for some reason.
>   * upend_idx is used to track end of used idx, done_idx is used to track head
>   * of used idx. Once lower device DMA done contiguously, we will signal KVM
> @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
> *ubuf, bool success)
>  static void handle_tx(struct vhost_net *net)
>  {
>   struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX];
> + struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
>   unsigned out, in, s;
>   int head;
>   struct msghdr msg = {
> @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
>   wmem = atomic_read(>sk->sk_wmem_alloc);
>   if (wmem >= sock->sk->sk_sndbuf) {
>   mutex_lock(>mutex);
> - tx_poll_start(net, sock);
> + if (vhost_poll_start(poll, sock->file))
> + vq_err(vq, "Fail to start TX polling\n");

s/Fail/Failed/

A question though: how can this happen? Could you clarify please?
Maybe we can find a way to prevent this error?

>   mutex_unlock(>mutex);
>   return;
>   }
> @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net)
>   vhost_disable_notify(>dev, vq);
>  
>   if (wmem < sock->sk->sk_sndbuf / 2)
> - tx_poll_stop(net);
> + vhost_poll_stop(poll);
>   hdr_size = vq->vhost_hlen;
>   zcopy = vq->ubufs;
>  
> @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net)
>  
>   wmem = atomic_read(>sk->sk_wmem_alloc);
>   if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
> - tx_poll_start(net, sock);
> - set_bit(SOCK_ASYNC_NOSPACE, >flags);
> + if (vhost_poll_start(poll, sock->file))
> + vq_err(vq, "Fail to start TX 
> polling\n");
> + else
> + set_bit(SOCK_ASYNC_NOSPACE, 
> >flags);
>   break;
>   }
>   /* If more outstanding DMAs, queue the work.
> @@ -294,8 +270,10 @@ static void handle_tx(struct vhost_net *net)
>   (vq->upend_idx - vq->done_idx) :
>   (vq->upend_idx + UIO_MAXIOV - 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-06 Thread Michael S. Tsirkin
On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
 Polling errors were ignored by vhost/vhost_net, this may lead to crash when
 trying to remove vhost from waitqueue when after the polling is failed. Solve
 this problem by:
 
 - checking the poll-wqh before trying to remove from waitqueue
 - report an error when poll() returns a POLLERR in vhost_start_poll()
 - report an error when vhost_start_poll() fails in
   vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
   failure to userspace.
 - report an error in the data path in vhost_net when meet polling errors.
 
 After those changes, we can safely drop the tx polling state in vhost_net 
 since
 it was replaced by the checking of poll-wqh.
 
 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  drivers/vhost/net.c   |   74 
  drivers/vhost/vhost.c |   31 +++-
  drivers/vhost/vhost.h |2 +-
  3 files changed, 49 insertions(+), 58 deletions(-)
 
 diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
 index d10ad6f..125c1e5 100644
 --- a/drivers/vhost/net.c
 +++ b/drivers/vhost/net.c
 @@ -64,20 +64,10 @@ enum {
   VHOST_NET_VQ_MAX = 2,
  };
  
 -enum vhost_net_poll_state {
 - VHOST_NET_POLL_DISABLED = 0,
 - VHOST_NET_POLL_STARTED = 1,
 - VHOST_NET_POLL_STOPPED = 2,
 -};
 -
  struct vhost_net {
   struct vhost_dev dev;
   struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
   struct vhost_poll poll[VHOST_NET_VQ_MAX];
 - /* Tells us whether we are polling a socket for TX.
 -  * We only do this when socket buffer fills up.
 -  * Protected by tx vq lock. */
 - enum vhost_net_poll_state tx_poll_state;
   /* Number of TX recently submitted.
* Protected by tx vq lock. */
   unsigned tx_packets;
 @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, 
 struct iovec *to,
   }
  }
  
 -/* Caller must have TX VQ lock */
 -static void tx_poll_stop(struct vhost_net *net)
 -{
 - if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED))
 - return;
 - vhost_poll_stop(net-poll + VHOST_NET_VQ_TX);
 - net-tx_poll_state = VHOST_NET_POLL_STOPPED;
 -}
 -
 -/* Caller must have TX VQ lock */
 -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
 -{
 - if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED))
 - return;
 - vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file);
 - net-tx_poll_state = VHOST_NET_POLL_STARTED;
 -}
 -
  /* In case of DMA done not in order in lower device driver for some reason.
   * upend_idx is used to track end of used idx, done_idx is used to track head
   * of used idx. Once lower device DMA done contiguously, we will signal KVM
 @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
 *ubuf, bool success)
  static void handle_tx(struct vhost_net *net)
  {
   struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX];
 + struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX;
   unsigned out, in, s;
   int head;
   struct msghdr msg = {
 @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
   wmem = atomic_read(sock-sk-sk_wmem_alloc);
   if (wmem = sock-sk-sk_sndbuf) {
   mutex_lock(vq-mutex);
 - tx_poll_start(net, sock);
 + if (vhost_poll_start(poll, sock-file))
 + vq_err(vq, Fail to start TX polling\n);

s/Fail/Failed/

A question though: how can this happen? Could you clarify please?
Maybe we can find a way to prevent this error?

   mutex_unlock(vq-mutex);
   return;
   }
 @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net)
   vhost_disable_notify(net-dev, vq);
  
   if (wmem  sock-sk-sk_sndbuf / 2)
 - tx_poll_stop(net);
 + vhost_poll_stop(poll);
   hdr_size = vq-vhost_hlen;
   zcopy = vq-ubufs;
  
 @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net)
  
   wmem = atomic_read(sock-sk-sk_wmem_alloc);
   if (wmem = sock-sk-sk_sndbuf * 3 / 4) {
 - tx_poll_start(net, sock);
 - set_bit(SOCK_ASYNC_NOSPACE, sock-flags);
 + if (vhost_poll_start(poll, sock-file))
 + vq_err(vq, Fail to start TX 
 polling\n);
 + else
 + set_bit(SOCK_ASYNC_NOSPACE, 
 sock-flags);
   break;
   }
   /* If more outstanding DMAs, queue the work.
 @@ -294,8 +270,10 @@ static void handle_tx(struct vhost_net *net)
   (vq-upend_idx - vq-done_idx) :
   (vq-upend_idx + UIO_MAXIOV - vq-done_idx);
   if (unlikely(num_pends  VHOST_MAX_PEND)) {
 - 

Re: [PATCH V3 2/2] vhost: handle polling errors

2013-01-06 Thread Jason Wang
On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote:
 On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote:
 Polling errors were ignored by vhost/vhost_net, this may lead to crash when
 trying to remove vhost from waitqueue when after the polling is failed. Solve
 this problem by:

 - checking the poll-wqh before trying to remove from waitqueue
 - report an error when poll() returns a POLLERR in vhost_start_poll()
 - report an error when vhost_start_poll() fails in
   vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
   failure to userspace.
 - report an error in the data path in vhost_net when meet polling errors.

 After those changes, we can safely drop the tx polling state in vhost_net 
 since
 it was replaced by the checking of poll-wqh.

 Signed-off-by: Jason Wang jasow...@redhat.com
 ---
  drivers/vhost/net.c   |   74 
 
  drivers/vhost/vhost.c |   31 +++-
  drivers/vhost/vhost.h |2 +-
  3 files changed, 49 insertions(+), 58 deletions(-)

 diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
 index d10ad6f..125c1e5 100644
 --- a/drivers/vhost/net.c
 +++ b/drivers/vhost/net.c
 @@ -64,20 +64,10 @@ enum {
  VHOST_NET_VQ_MAX = 2,
  };
  
 -enum vhost_net_poll_state {
 -VHOST_NET_POLL_DISABLED = 0,
 -VHOST_NET_POLL_STARTED = 1,
 -VHOST_NET_POLL_STOPPED = 2,
 -};
 -
  struct vhost_net {
  struct vhost_dev dev;
  struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
  struct vhost_poll poll[VHOST_NET_VQ_MAX];
 -/* Tells us whether we are polling a socket for TX.
 - * We only do this when socket buffer fills up.
 - * Protected by tx vq lock. */
 -enum vhost_net_poll_state tx_poll_state;
  /* Number of TX recently submitted.
   * Protected by tx vq lock. */
  unsigned tx_packets;
 @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, 
 struct iovec *to,
  }
  }
  
 -/* Caller must have TX VQ lock */
 -static void tx_poll_stop(struct vhost_net *net)
 -{
 -if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED))
 -return;
 -vhost_poll_stop(net-poll + VHOST_NET_VQ_TX);
 -net-tx_poll_state = VHOST_NET_POLL_STOPPED;
 -}
 -
 -/* Caller must have TX VQ lock */
 -static void tx_poll_start(struct vhost_net *net, struct socket *sock)
 -{
 -if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED))
 -return;
 -vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file);
 -net-tx_poll_state = VHOST_NET_POLL_STARTED;
 -}
 -
  /* In case of DMA done not in order in lower device driver for some reason.
   * upend_idx is used to track end of used idx, done_idx is used to track 
 head
   * of used idx. Once lower device DMA done contiguously, we will signal KVM
 @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info 
 *ubuf, bool success)
  static void handle_tx(struct vhost_net *net)
  {
  struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX];
 +struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX;
  unsigned out, in, s;
  int head;
  struct msghdr msg = {
 @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
  wmem = atomic_read(sock-sk-sk_wmem_alloc);
  if (wmem = sock-sk-sk_sndbuf) {
  mutex_lock(vq-mutex);
 -tx_poll_start(net, sock);
 +if (vhost_poll_start(poll, sock-file))
 +vq_err(vq, Fail to start TX polling\n);
 s/Fail/Failed/

 A question though: how can this happen? Could you clarify please?
 Maybe we can find a way to prevent this error?

Two conditions I think this can happen:

1) a buggy userspace disable a queue through TUNSETQUEUE
2) the net device were gone

For 1, looks like we can delay the disabling until the refcnt goes to
zero. For 2 may needs more changes. Not sure it's worth to do this work,
maybe a warning is enough just like other failure.

  mutex_unlock(vq-mutex);
  return;
  }
 @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net)
  vhost_disable_notify(net-dev, vq);
  
  if (wmem  sock-sk-sk_sndbuf / 2)
 -tx_poll_stop(net);
 +vhost_poll_stop(poll);
  hdr_size = vq-vhost_hlen;
  zcopy = vq-ubufs;
  
 @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net)
  
  wmem = atomic_read(sock-sk-sk_wmem_alloc);
  if (wmem = sock-sk-sk_sndbuf * 3 / 4) {
 -tx_poll_start(net, sock);
 -set_bit(SOCK_ASYNC_NOSPACE, sock-flags);
 +if (vhost_poll_start(poll, sock-file))
 +vq_err(vq, Fail to start TX 
 polling\n);
 +else
 +set_bit(SOCK_ASYNC_NOSPACE, 
 sock-flags);
  break;
  }
  /* If more outstanding DMAs, queue 

[PATCH V3 2/2] vhost: handle polling errors

2013-01-05 Thread Jason Wang
Polling errors were ignored by vhost/vhost_net, this may lead to crash when
trying to remove vhost from waitqueue when after the polling is failed. Solve
this problem by:

- checking the poll->wqh before trying to remove from waitqueue
- report an error when poll() returns a POLLERR in vhost_start_poll()
- report an error when vhost_start_poll() fails in
  vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
  failure to userspace.
- report an error in the data path in vhost_net when meet polling errors.

After those changes, we can safely drop the tx polling state in vhost_net since
it was replaced by the checking of poll->wqh.

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c   |   74 
 drivers/vhost/vhost.c |   31 +++-
 drivers/vhost/vhost.h |2 +-
 3 files changed, 49 insertions(+), 58 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index d10ad6f..125c1e5 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -64,20 +64,10 @@ enum {
VHOST_NET_VQ_MAX = 2,
 };
 
-enum vhost_net_poll_state {
-   VHOST_NET_POLL_DISABLED = 0,
-   VHOST_NET_POLL_STARTED = 1,
-   VHOST_NET_POLL_STOPPED = 2,
-};
-
 struct vhost_net {
struct vhost_dev dev;
struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
struct vhost_poll poll[VHOST_NET_VQ_MAX];
-   /* Tells us whether we are polling a socket for TX.
-* We only do this when socket buffer fills up.
-* Protected by tx vq lock. */
-   enum vhost_net_poll_state tx_poll_state;
/* Number of TX recently submitted.
 * Protected by tx vq lock. */
unsigned tx_packets;
@@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, 
struct iovec *to,
}
 }
 
-/* Caller must have TX VQ lock */
-static void tx_poll_stop(struct vhost_net *net)
-{
-   if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
-   return;
-   vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
-   net->tx_poll_state = VHOST_NET_POLL_STOPPED;
-}
-
-/* Caller must have TX VQ lock */
-static void tx_poll_start(struct vhost_net *net, struct socket *sock)
-{
-   if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
-   return;
-   vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
-   net->tx_poll_state = VHOST_NET_POLL_STARTED;
-}
-
 /* In case of DMA done not in order in lower device driver for some reason.
  * upend_idx is used to track end of used idx, done_idx is used to track head
  * of used idx. Once lower device DMA done contiguously, we will signal KVM
@@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, 
bool success)
 static void handle_tx(struct vhost_net *net)
 {
struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX];
+   struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX;
unsigned out, in, s;
int head;
struct msghdr msg = {
@@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
wmem = atomic_read(>sk->sk_wmem_alloc);
if (wmem >= sock->sk->sk_sndbuf) {
mutex_lock(>mutex);
-   tx_poll_start(net, sock);
+   if (vhost_poll_start(poll, sock->file))
+   vq_err(vq, "Fail to start TX polling\n");
mutex_unlock(>mutex);
return;
}
@@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net)
vhost_disable_notify(>dev, vq);
 
if (wmem < sock->sk->sk_sndbuf / 2)
-   tx_poll_stop(net);
+   vhost_poll_stop(poll);
hdr_size = vq->vhost_hlen;
zcopy = vq->ubufs;
 
@@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net)
 
wmem = atomic_read(>sk->sk_wmem_alloc);
if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
-   tx_poll_start(net, sock);
-   set_bit(SOCK_ASYNC_NOSPACE, >flags);
+   if (vhost_poll_start(poll, sock->file))
+   vq_err(vq, "Fail to start TX 
polling\n");
+   else
+   set_bit(SOCK_ASYNC_NOSPACE, 
>flags);
break;
}
/* If more outstanding DMAs, queue the work.
@@ -294,8 +270,10 @@ static void handle_tx(struct vhost_net *net)
(vq->upend_idx - vq->done_idx) :
(vq->upend_idx + UIO_MAXIOV - vq->done_idx);
if (unlikely(num_pends > VHOST_MAX_PEND)) {
-   tx_poll_start(net, sock);
-   set_bit(SOCK_ASYNC_NOSPACE, >flags);
+   if (vhost_poll_start(poll, sock->file))
+   

[PATCH V3 2/2] vhost: handle polling errors

2013-01-05 Thread Jason Wang
Polling errors were ignored by vhost/vhost_net, this may lead to crash when
trying to remove vhost from waitqueue when after the polling is failed. Solve
this problem by:

- checking the poll-wqh before trying to remove from waitqueue
- report an error when poll() returns a POLLERR in vhost_start_poll()
- report an error when vhost_start_poll() fails in
  vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the
  failure to userspace.
- report an error in the data path in vhost_net when meet polling errors.

After those changes, we can safely drop the tx polling state in vhost_net since
it was replaced by the checking of poll-wqh.

Signed-off-by: Jason Wang jasow...@redhat.com
---
 drivers/vhost/net.c   |   74 
 drivers/vhost/vhost.c |   31 +++-
 drivers/vhost/vhost.h |2 +-
 3 files changed, 49 insertions(+), 58 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index d10ad6f..125c1e5 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -64,20 +64,10 @@ enum {
VHOST_NET_VQ_MAX = 2,
 };
 
-enum vhost_net_poll_state {
-   VHOST_NET_POLL_DISABLED = 0,
-   VHOST_NET_POLL_STARTED = 1,
-   VHOST_NET_POLL_STOPPED = 2,
-};
-
 struct vhost_net {
struct vhost_dev dev;
struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
struct vhost_poll poll[VHOST_NET_VQ_MAX];
-   /* Tells us whether we are polling a socket for TX.
-* We only do this when socket buffer fills up.
-* Protected by tx vq lock. */
-   enum vhost_net_poll_state tx_poll_state;
/* Number of TX recently submitted.
 * Protected by tx vq lock. */
unsigned tx_packets;
@@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, 
struct iovec *to,
}
 }
 
-/* Caller must have TX VQ lock */
-static void tx_poll_stop(struct vhost_net *net)
-{
-   if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED))
-   return;
-   vhost_poll_stop(net-poll + VHOST_NET_VQ_TX);
-   net-tx_poll_state = VHOST_NET_POLL_STOPPED;
-}
-
-/* Caller must have TX VQ lock */
-static void tx_poll_start(struct vhost_net *net, struct socket *sock)
-{
-   if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED))
-   return;
-   vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file);
-   net-tx_poll_state = VHOST_NET_POLL_STARTED;
-}
-
 /* In case of DMA done not in order in lower device driver for some reason.
  * upend_idx is used to track end of used idx, done_idx is used to track head
  * of used idx. Once lower device DMA done contiguously, we will signal KVM
@@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, 
bool success)
 static void handle_tx(struct vhost_net *net)
 {
struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX];
+   struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX;
unsigned out, in, s;
int head;
struct msghdr msg = {
@@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net)
wmem = atomic_read(sock-sk-sk_wmem_alloc);
if (wmem = sock-sk-sk_sndbuf) {
mutex_lock(vq-mutex);
-   tx_poll_start(net, sock);
+   if (vhost_poll_start(poll, sock-file))
+   vq_err(vq, Fail to start TX polling\n);
mutex_unlock(vq-mutex);
return;
}
@@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net)
vhost_disable_notify(net-dev, vq);
 
if (wmem  sock-sk-sk_sndbuf / 2)
-   tx_poll_stop(net);
+   vhost_poll_stop(poll);
hdr_size = vq-vhost_hlen;
zcopy = vq-ubufs;
 
@@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net)
 
wmem = atomic_read(sock-sk-sk_wmem_alloc);
if (wmem = sock-sk-sk_sndbuf * 3 / 4) {
-   tx_poll_start(net, sock);
-   set_bit(SOCK_ASYNC_NOSPACE, sock-flags);
+   if (vhost_poll_start(poll, sock-file))
+   vq_err(vq, Fail to start TX 
polling\n);
+   else
+   set_bit(SOCK_ASYNC_NOSPACE, 
sock-flags);
break;
}
/* If more outstanding DMAs, queue the work.
@@ -294,8 +270,10 @@ static void handle_tx(struct vhost_net *net)
(vq-upend_idx - vq-done_idx) :
(vq-upend_idx + UIO_MAXIOV - vq-done_idx);
if (unlikely(num_pends  VHOST_MAX_PEND)) {
-   tx_poll_start(net, sock);
-   set_bit(SOCK_ASYNC_NOSPACE, sock-flags);
+   if (vhost_poll_start(poll, sock-file))
+