Re: [PATCH V3 2/2] vhost: handle polling errors
On Mon, Jan 14, 2013 at 03:37:58PM +0800, Jason Wang wrote: > On 01/14/2013 02:57 PM, Michael S. Tsirkin wrote: > > On Mon, Jan 14, 2013 at 10:59:02AM +0800, Jason Wang wrote: > >> On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote: > >>> On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote: > On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: > > On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: > >> On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: > >>> On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: > Polling errors were ignored by vhost/vhost_net, this may lead to > crash when > trying to remove vhost from waitqueue when after the polling is > failed. Solve > this problem by: > > - checking the poll->wqh before trying to remove from waitqueue > - report an error when poll() returns a POLLERR in vhost_start_poll() > - report an error when vhost_start_poll() fails in > vhost_vring_ioctl()/vhost_net_set_backend() which is used to > notify the > failure to userspace. > - report an error in the data path in vhost_net when meet polling > errors. > > After those changes, we can safely drop the tx polling state in > vhost_net since > it was replaced by the checking of poll->wqh. > > Signed-off-by: Jason Wang > --- > drivers/vhost/net.c | 74 > > drivers/vhost/vhost.c | 31 +++- > drivers/vhost/vhost.h |2 +- > 3 files changed, 49 insertions(+), 58 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index d10ad6f..125c1e5 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -64,20 +64,10 @@ enum { > VHOST_NET_VQ_MAX = 2, > }; > > -enum vhost_net_poll_state { > -VHOST_NET_POLL_DISABLED = 0, > -VHOST_NET_POLL_STARTED = 1, > -VHOST_NET_POLL_STOPPED = 2, > -}; > - > struct vhost_net { > struct vhost_dev dev; > struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; > struct vhost_poll poll[VHOST_NET_VQ_MAX]; > -/* Tells us whether we are polling a socket for TX. > - * We only do this when socket buffer fills up. > - * Protected by tx vq lock. */ > -enum vhost_net_poll_state tx_poll_state; > /* Number of TX recently submitted. > * Protected by tx vq lock. */ > unsigned tx_packets; > @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec > *from, struct iovec *to, > } > } > > -/* Caller must have TX VQ lock */ > -static void tx_poll_stop(struct vhost_net *net) > -{ > -if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) > -return; > -vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); > -net->tx_poll_state = VHOST_NET_POLL_STOPPED; > -} > - > -/* Caller must have TX VQ lock */ > -static void tx_poll_start(struct vhost_net *net, struct socket > *sock) > -{ > -if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) > -return; > -vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); > -net->tx_poll_state = VHOST_NET_POLL_STARTED; > -} > - > /* In case of DMA done not in order in lower device driver for some > reason. > * upend_idx is used to track end of used idx, done_idx is used to > track head > * of used idx. Once lower device DMA done contiguously, we will > signal KVM > @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct > ubuf_info *ubuf, bool success) > static void handle_tx(struct vhost_net *net) > { > struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX]; > +struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX; > unsigned out, in, s; > int head; > struct msghdr msg = { > @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) > wmem = atomic_read(>sk->sk_wmem_alloc); > if (wmem >= sock->sk->sk_sndbuf) { > mutex_lock(>mutex); > -tx_poll_start(net, sock); > +if (vhost_poll_start(poll, sock->file)) > +vq_err(vq, "Fail to start TX polling\n"); > >>> s/Fail/Failed/ > >>> > >>> A question though: how can this happen?
Re: [PATCH V3 2/2] vhost: handle polling errors
On Mon, Jan 14, 2013 at 03:37:58PM +0800, Jason Wang wrote: On 01/14/2013 02:57 PM, Michael S. Tsirkin wrote: On Mon, Jan 14, 2013 at 10:59:02AM +0800, Jason Wang wrote: On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote: On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote: On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll-wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll-wqh. Signed-off-by: Jason Wang jasow...@redhat.com --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { -VHOST_NET_POLL_DISABLED = 0, -VHOST_NET_POLL_STARTED = 1, -VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; -/* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ -enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ -if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED)) -return; -vhost_poll_stop(net-poll + VHOST_NET_VQ_TX); -net-tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ -if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED)) -return; -vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file); -net-tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX]; +struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf) { mutex_lock(vq-mutex); -tx_poll_start(net, sock); +if (vhost_poll_start(poll, sock-file)) +vq_err(vq, Fail to start TX polling\n); s/Fail/Failed/ A question though: how can this happen? Could you clarify please? Maybe we can find a way to prevent this error? Two conditions I think this can happen: 1) a buggy userspace disable a queue through TUNSETQUEUE 2) the net device were gone For 1, looks like we can delay the disabling until the refcnt goes to zero. For 2 may needs more changes. I'd expect keeping a socket reference would prevent both issues. Doesn't it? Doesn't work for 2 I think, the socket didn't hold a refcnt of the device, so the device can go away at anytime. Although we can change this, but it's the behaviour before multiqueue support. Hmm there's one scenario that does seem to trigger this: queue can get disabled and then poll fails. Is this the only issue? Another one I think we can trigger is: - start vhost thread - do ip link del link dev tap0 to delete the tap device In this case, the netdevice is unregistered but the file/socket still
Re: [PATCH V3 2/2] vhost: handle polling errors
On 01/14/2013 02:57 PM, Michael S. Tsirkin wrote: > On Mon, Jan 14, 2013 at 10:59:02AM +0800, Jason Wang wrote: >> On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote: >>> On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote: On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: > On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: >> On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: >>> On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll->wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll->wqh. Signed-off-by: Jason Wang --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { - VHOST_NET_POLL_DISABLED = 0, - VHOST_NET_POLL_STARTED = 1, - VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; - /* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ - enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ - if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) - return; - vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); - net->tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ - if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) - return; - vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); - net->tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX]; + struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(>sk->sk_wmem_alloc); if (wmem >= sock->sk->sk_sndbuf) { mutex_lock(>mutex); - tx_poll_start(net, sock); + if (vhost_poll_start(poll, sock->file)) + vq_err(vq, "Fail to start TX polling\n"); >>> s/Fail/Failed/ >>> >>> A question though: how can this happen? Could you clarify please? >>> Maybe we can find a way to prevent this error? >> Two conditions I think this can happen: >> >> 1) a buggy userspace disable a queue through TUNSETQUEUE >> 2) the net
Re: [PATCH V3 2/2] vhost: handle polling errors
On Mon, Jan 14, 2013 at 10:59:02AM +0800, Jason Wang wrote: > On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote: > > On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote: > >> On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: > >>> On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: > On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: > > On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: > >> Polling errors were ignored by vhost/vhost_net, this may lead to crash > >> when > >> trying to remove vhost from waitqueue when after the polling is > >> failed. Solve > >> this problem by: > >> > >> - checking the poll->wqh before trying to remove from waitqueue > >> - report an error when poll() returns a POLLERR in vhost_start_poll() > >> - report an error when vhost_start_poll() fails in > >> vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify > >> the > >> failure to userspace. > >> - report an error in the data path in vhost_net when meet polling > >> errors. > >> > >> After those changes, we can safely drop the tx polling state in > >> vhost_net since > >> it was replaced by the checking of poll->wqh. > >> > >> Signed-off-by: Jason Wang > >> --- > >> drivers/vhost/net.c | 74 > >> > >> drivers/vhost/vhost.c | 31 +++- > >> drivers/vhost/vhost.h |2 +- > >> 3 files changed, 49 insertions(+), 58 deletions(-) > >> > >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > >> index d10ad6f..125c1e5 100644 > >> --- a/drivers/vhost/net.c > >> +++ b/drivers/vhost/net.c > >> @@ -64,20 +64,10 @@ enum { > >>VHOST_NET_VQ_MAX = 2, > >> }; > >> > >> -enum vhost_net_poll_state { > >> - VHOST_NET_POLL_DISABLED = 0, > >> - VHOST_NET_POLL_STARTED = 1, > >> - VHOST_NET_POLL_STOPPED = 2, > >> -}; > >> - > >> struct vhost_net { > >>struct vhost_dev dev; > >>struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; > >>struct vhost_poll poll[VHOST_NET_VQ_MAX]; > >> - /* Tells us whether we are polling a socket for TX. > >> - * We only do this when socket buffer fills up. > >> - * Protected by tx vq lock. */ > >> - enum vhost_net_poll_state tx_poll_state; > >>/* Number of TX recently submitted. > >> * Protected by tx vq lock. */ > >>unsigned tx_packets; > >> @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec > >> *from, struct iovec *to, > >>} > >> } > >> > >> -/* Caller must have TX VQ lock */ > >> -static void tx_poll_stop(struct vhost_net *net) > >> -{ > >> - if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) > >> - return; > >> - vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); > >> - net->tx_poll_state = VHOST_NET_POLL_STOPPED; > >> -} > >> - > >> -/* Caller must have TX VQ lock */ > >> -static void tx_poll_start(struct vhost_net *net, struct socket *sock) > >> -{ > >> - if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) > >> - return; > >> - vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); > >> - net->tx_poll_state = VHOST_NET_POLL_STARTED; > >> -} > >> - > >> /* In case of DMA done not in order in lower device driver for some > >> reason. > >> * upend_idx is used to track end of used idx, done_idx is used to > >> track head > >> * of used idx. Once lower device DMA done contiguously, we will > >> signal KVM > >> @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct > >> ubuf_info *ubuf, bool success) > >> static void handle_tx(struct vhost_net *net) > >> { > >>struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX]; > >> + struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX; > >>unsigned out, in, s; > >>int head; > >>struct msghdr msg = { > >> @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) > >>wmem = atomic_read(>sk->sk_wmem_alloc); > >>if (wmem >= sock->sk->sk_sndbuf) { > >>mutex_lock(>mutex); > >> - tx_poll_start(net, sock); > >> + if (vhost_poll_start(poll, sock->file)) > >> + vq_err(vq, "Fail to start TX polling\n"); > > s/Fail/Failed/ > > > > A question though: how can this happen? Could you clarify please? > > Maybe we can find a way to prevent this error? > Two conditions I think this can happen: > > 1) a buggy userspace disable a queue through TUNSETQUEUE > 2) the net device were gone > > For 1, looks like we can
Re: [PATCH V3 2/2] vhost: handle polling errors
On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote: > On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote: >> On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: >>> On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: > On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: >> Polling errors were ignored by vhost/vhost_net, this may lead to crash >> when >> trying to remove vhost from waitqueue when after the polling is failed. >> Solve >> this problem by: >> >> - checking the poll->wqh before trying to remove from waitqueue >> - report an error when poll() returns a POLLERR in vhost_start_poll() >> - report an error when vhost_start_poll() fails in >> vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the >> failure to userspace. >> - report an error in the data path in vhost_net when meet polling errors. >> >> After those changes, we can safely drop the tx polling state in >> vhost_net since >> it was replaced by the checking of poll->wqh. >> >> Signed-off-by: Jason Wang >> --- >> drivers/vhost/net.c | 74 >> >> drivers/vhost/vhost.c | 31 +++- >> drivers/vhost/vhost.h |2 +- >> 3 files changed, 49 insertions(+), 58 deletions(-) >> >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c >> index d10ad6f..125c1e5 100644 >> --- a/drivers/vhost/net.c >> +++ b/drivers/vhost/net.c >> @@ -64,20 +64,10 @@ enum { >> VHOST_NET_VQ_MAX = 2, >> }; >> >> -enum vhost_net_poll_state { >> -VHOST_NET_POLL_DISABLED = 0, >> -VHOST_NET_POLL_STARTED = 1, >> -VHOST_NET_POLL_STOPPED = 2, >> -}; >> - >> struct vhost_net { >> struct vhost_dev dev; >> struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; >> struct vhost_poll poll[VHOST_NET_VQ_MAX]; >> -/* Tells us whether we are polling a socket for TX. >> - * We only do this when socket buffer fills up. >> - * Protected by tx vq lock. */ >> -enum vhost_net_poll_state tx_poll_state; >> /* Number of TX recently submitted. >> * Protected by tx vq lock. */ >> unsigned tx_packets; >> @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec >> *from, struct iovec *to, >> } >> } >> >> -/* Caller must have TX VQ lock */ >> -static void tx_poll_stop(struct vhost_net *net) >> -{ >> -if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) >> -return; >> -vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); >> -net->tx_poll_state = VHOST_NET_POLL_STOPPED; >> -} >> - >> -/* Caller must have TX VQ lock */ >> -static void tx_poll_start(struct vhost_net *net, struct socket *sock) >> -{ >> -if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) >> -return; >> -vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); >> -net->tx_poll_state = VHOST_NET_POLL_STARTED; >> -} >> - >> /* In case of DMA done not in order in lower device driver for some >> reason. >> * upend_idx is used to track end of used idx, done_idx is used to >> track head >> * of used idx. Once lower device DMA done contiguously, we will signal >> KVM >> @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info >> *ubuf, bool success) >> static void handle_tx(struct vhost_net *net) >> { >> struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX]; >> +struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX; >> unsigned out, in, s; >> int head; >> struct msghdr msg = { >> @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) >> wmem = atomic_read(>sk->sk_wmem_alloc); >> if (wmem >= sock->sk->sk_sndbuf) { >> mutex_lock(>mutex); >> -tx_poll_start(net, sock); >> +if (vhost_poll_start(poll, sock->file)) >> +vq_err(vq, "Fail to start TX polling\n"); > s/Fail/Failed/ > > A question though: how can this happen? Could you clarify please? > Maybe we can find a way to prevent this error? Two conditions I think this can happen: 1) a buggy userspace disable a queue through TUNSETQUEUE 2) the net device were gone For 1, looks like we can delay the disabling until the refcnt goes to zero. For 2 may needs more changes. >>> I'd expect keeping a socket reference would prevent both issues. >>> Doesn't it? >> Doesn't work for 2 I think, the socket didn't hold a refcnt of
Re: [PATCH V3 2/2] vhost: handle polling errors
On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote: > On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: > > On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: > >> On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: > >>> On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: > Polling errors were ignored by vhost/vhost_net, this may lead to crash > when > trying to remove vhost from waitqueue when after the polling is failed. > Solve > this problem by: > > - checking the poll->wqh before trying to remove from waitqueue > - report an error when poll() returns a POLLERR in vhost_start_poll() > - report an error when vhost_start_poll() fails in > vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the > failure to userspace. > - report an error in the data path in vhost_net when meet polling errors. > > After those changes, we can safely drop the tx polling state in > vhost_net since > it was replaced by the checking of poll->wqh. > > Signed-off-by: Jason Wang > --- > drivers/vhost/net.c | 74 > > drivers/vhost/vhost.c | 31 +++- > drivers/vhost/vhost.h |2 +- > 3 files changed, 49 insertions(+), 58 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index d10ad6f..125c1e5 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -64,20 +64,10 @@ enum { > VHOST_NET_VQ_MAX = 2, > }; > > -enum vhost_net_poll_state { > -VHOST_NET_POLL_DISABLED = 0, > -VHOST_NET_POLL_STARTED = 1, > -VHOST_NET_POLL_STOPPED = 2, > -}; > - > struct vhost_net { > struct vhost_dev dev; > struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; > struct vhost_poll poll[VHOST_NET_VQ_MAX]; > -/* Tells us whether we are polling a socket for TX. > - * We only do this when socket buffer fills up. > - * Protected by tx vq lock. */ > -enum vhost_net_poll_state tx_poll_state; > /* Number of TX recently submitted. > * Protected by tx vq lock. */ > unsigned tx_packets; > @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec > *from, struct iovec *to, > } > } > > -/* Caller must have TX VQ lock */ > -static void tx_poll_stop(struct vhost_net *net) > -{ > -if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) > -return; > -vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); > -net->tx_poll_state = VHOST_NET_POLL_STOPPED; > -} > - > -/* Caller must have TX VQ lock */ > -static void tx_poll_start(struct vhost_net *net, struct socket *sock) > -{ > -if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) > -return; > -vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); > -net->tx_poll_state = VHOST_NET_POLL_STARTED; > -} > - > /* In case of DMA done not in order in lower device driver for some > reason. > * upend_idx is used to track end of used idx, done_idx is used to > track head > * of used idx. Once lower device DMA done contiguously, we will signal > KVM > @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info > *ubuf, bool success) > static void handle_tx(struct vhost_net *net) > { > struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX]; > +struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX; > unsigned out, in, s; > int head; > struct msghdr msg = { > @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) > wmem = atomic_read(>sk->sk_wmem_alloc); > if (wmem >= sock->sk->sk_sndbuf) { > mutex_lock(>mutex); > -tx_poll_start(net, sock); > +if (vhost_poll_start(poll, sock->file)) > +vq_err(vq, "Fail to start TX polling\n"); > >>> s/Fail/Failed/ > >>> > >>> A question though: how can this happen? Could you clarify please? > >>> Maybe we can find a way to prevent this error? > >> Two conditions I think this can happen: > >> > >> 1) a buggy userspace disable a queue through TUNSETQUEUE > >> 2) the net device were gone > >> > >> For 1, looks like we can delay the disabling until the refcnt goes to > >> zero. For 2 may needs more changes. > > I'd expect keeping a socket reference would prevent both issues. > > Doesn't it? > > Doesn't work for 2 I think, the socket didn't hold a refcnt of the > device, so the device can go away at
Re: [PATCH V3 2/2] vhost: handle polling errors
On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote: > On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: > > On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: > >> On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: > >>> On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: > Polling errors were ignored by vhost/vhost_net, this may lead to crash > when > trying to remove vhost from waitqueue when after the polling is failed. > Solve > this problem by: > > - checking the poll->wqh before trying to remove from waitqueue > - report an error when poll() returns a POLLERR in vhost_start_poll() > - report an error when vhost_start_poll() fails in > vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the > failure to userspace. > - report an error in the data path in vhost_net when meet polling errors. > > After those changes, we can safely drop the tx polling state in > vhost_net since > it was replaced by the checking of poll->wqh. > > Signed-off-by: Jason Wang > --- > drivers/vhost/net.c | 74 > > drivers/vhost/vhost.c | 31 +++- > drivers/vhost/vhost.h |2 +- > 3 files changed, 49 insertions(+), 58 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index d10ad6f..125c1e5 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -64,20 +64,10 @@ enum { > VHOST_NET_VQ_MAX = 2, > }; > > -enum vhost_net_poll_state { > -VHOST_NET_POLL_DISABLED = 0, > -VHOST_NET_POLL_STARTED = 1, > -VHOST_NET_POLL_STOPPED = 2, > -}; > - > struct vhost_net { > struct vhost_dev dev; > struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; > struct vhost_poll poll[VHOST_NET_VQ_MAX]; > -/* Tells us whether we are polling a socket for TX. > - * We only do this when socket buffer fills up. > - * Protected by tx vq lock. */ > -enum vhost_net_poll_state tx_poll_state; > /* Number of TX recently submitted. > * Protected by tx vq lock. */ > unsigned tx_packets; > @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec > *from, struct iovec *to, > } > } > > -/* Caller must have TX VQ lock */ > -static void tx_poll_stop(struct vhost_net *net) > -{ > -if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) > -return; > -vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); > -net->tx_poll_state = VHOST_NET_POLL_STOPPED; > -} > - > -/* Caller must have TX VQ lock */ > -static void tx_poll_start(struct vhost_net *net, struct socket *sock) > -{ > -if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) > -return; > -vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); > -net->tx_poll_state = VHOST_NET_POLL_STARTED; > -} > - > /* In case of DMA done not in order in lower device driver for some > reason. > * upend_idx is used to track end of used idx, done_idx is used to > track head > * of used idx. Once lower device DMA done contiguously, we will signal > KVM > @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info > *ubuf, bool success) > static void handle_tx(struct vhost_net *net) > { > struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX]; > +struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX; > unsigned out, in, s; > int head; > struct msghdr msg = { > @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) > wmem = atomic_read(>sk->sk_wmem_alloc); > if (wmem >= sock->sk->sk_sndbuf) { > mutex_lock(>mutex); > -tx_poll_start(net, sock); > +if (vhost_poll_start(poll, sock->file)) > +vq_err(vq, "Fail to start TX polling\n"); > >>> s/Fail/Failed/ > >>> > >>> A question though: how can this happen? Could you clarify please? > >>> Maybe we can find a way to prevent this error? > >> Two conditions I think this can happen: > >> > >> 1) a buggy userspace disable a queue through TUNSETQUEUE > >> 2) the net device were gone > >> > >> For 1, looks like we can delay the disabling until the refcnt goes to > >> zero. For 2 may needs more changes. > > I'd expect keeping a socket reference would prevent both issues. > > Doesn't it? > > Doesn't work for 2 I think, the socket didn't hold a refcnt of the > device, so the device can go away at
Re: [PATCH V3 2/2] vhost: handle polling errors
On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote: On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll-wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll-wqh. Signed-off-by: Jason Wang jasow...@redhat.com --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { -VHOST_NET_POLL_DISABLED = 0, -VHOST_NET_POLL_STARTED = 1, -VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; -/* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ -enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ -if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED)) -return; -vhost_poll_stop(net-poll + VHOST_NET_VQ_TX); -net-tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ -if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED)) -return; -vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file); -net-tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX]; +struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf) { mutex_lock(vq-mutex); -tx_poll_start(net, sock); +if (vhost_poll_start(poll, sock-file)) +vq_err(vq, Fail to start TX polling\n); s/Fail/Failed/ A question though: how can this happen? Could you clarify please? Maybe we can find a way to prevent this error? Two conditions I think this can happen: 1) a buggy userspace disable a queue through TUNSETQUEUE 2) the net device were gone For 1, looks like we can delay the disabling until the refcnt goes to zero. For 2 may needs more changes. I'd expect keeping a socket reference would prevent both issues. Doesn't it? Doesn't work for 2 I think, the socket didn't hold a refcnt of the device, so the device can go away at anytime. Although we can change this, but it's the behaviour before multiqueue support. Hmm there's one scenario that does seem to trigger this: queue can get disabled and then poll fails. Is this the only issue? Not sure it's worth to do this work, maybe a warning is enough just like other failure. With other failures, you normally can correct the error then kick to have it restart. This is soomething thagt would not work here. If userspace is wrote correctly, (e.g passing a fd with correct state)
Re: [PATCH V3 2/2] vhost: handle polling errors
On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote: On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll-wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll-wqh. Signed-off-by: Jason Wang jasow...@redhat.com --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { -VHOST_NET_POLL_DISABLED = 0, -VHOST_NET_POLL_STARTED = 1, -VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; -/* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ -enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ -if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED)) -return; -vhost_poll_stop(net-poll + VHOST_NET_VQ_TX); -net-tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ -if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED)) -return; -vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file); -net-tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX]; +struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf) { mutex_lock(vq-mutex); -tx_poll_start(net, sock); +if (vhost_poll_start(poll, sock-file)) +vq_err(vq, Fail to start TX polling\n); s/Fail/Failed/ A question though: how can this happen? Could you clarify please? Maybe we can find a way to prevent this error? Two conditions I think this can happen: 1) a buggy userspace disable a queue through TUNSETQUEUE 2) the net device were gone For 1, looks like we can delay the disabling until the refcnt goes to zero. For 2 may needs more changes. I'd expect keeping a socket reference would prevent both issues. Doesn't it? Doesn't work for 2 I think, the socket didn't hold a refcnt of the device, so the device can go away at anytime. Hmm I don't really understand. All we care about is that socket is around no? Could you please show how a problematic case can be triggered? Although we can change this, but it's the behaviour before multiqueue support. Not sure it's worth to do this work, maybe a warning is enough just like other failure. With other failures, you normally can correct the error then kick to have it restart. This is soomething thagt would not work here. If userspace is wrote correctly, (e.g passing a fd with
Re: [PATCH V3 2/2] vhost: handle polling errors
On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote: On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote: On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll-wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll-wqh. Signed-off-by: Jason Wang jasow...@redhat.com --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { -VHOST_NET_POLL_DISABLED = 0, -VHOST_NET_POLL_STARTED = 1, -VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; -/* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ -enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ -if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED)) -return; -vhost_poll_stop(net-poll + VHOST_NET_VQ_TX); -net-tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ -if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED)) -return; -vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file); -net-tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX]; +struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf) { mutex_lock(vq-mutex); -tx_poll_start(net, sock); +if (vhost_poll_start(poll, sock-file)) +vq_err(vq, Fail to start TX polling\n); s/Fail/Failed/ A question though: how can this happen? Could you clarify please? Maybe we can find a way to prevent this error? Two conditions I think this can happen: 1) a buggy userspace disable a queue through TUNSETQUEUE 2) the net device were gone For 1, looks like we can delay the disabling until the refcnt goes to zero. For 2 may needs more changes. I'd expect keeping a socket reference would prevent both issues. Doesn't it? Doesn't work for 2 I think, the socket didn't hold a refcnt of the device, so the device can go away at anytime. Although we can change this, but it's the behaviour before multiqueue support. Hmm there's one scenario that does seem to trigger this: queue can get disabled and then poll fails. Is this the only issue? Another one I think we can trigger is: - start vhost thread - do ip link del link dev tap0 to delete the tap device In this case, the netdevice is unregistered but the file/socket still exist. Not sure it's worth to do this work, maybe a warning is enough just like other failure. With other failures, you normally can correct the error then kick to have it
Re: [PATCH V3 2/2] vhost: handle polling errors
On Mon, Jan 14, 2013 at 10:59:02AM +0800, Jason Wang wrote: On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote: On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote: On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll-wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll-wqh. Signed-off-by: Jason Wang jasow...@redhat.com --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { - VHOST_NET_POLL_DISABLED = 0, - VHOST_NET_POLL_STARTED = 1, - VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; - /* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ - enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ - if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED)) - return; - vhost_poll_stop(net-poll + VHOST_NET_VQ_TX); - net-tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ - if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED)) - return; - vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file); - net-tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX]; + struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf) { mutex_lock(vq-mutex); - tx_poll_start(net, sock); + if (vhost_poll_start(poll, sock-file)) + vq_err(vq, Fail to start TX polling\n); s/Fail/Failed/ A question though: how can this happen? Could you clarify please? Maybe we can find a way to prevent this error? Two conditions I think this can happen: 1) a buggy userspace disable a queue through TUNSETQUEUE 2) the net device were gone For 1, looks like we can delay the disabling until the refcnt goes to zero. For 2 may needs more changes. I'd expect keeping a socket reference would prevent both issues. Doesn't it? Doesn't work for 2 I think, the socket didn't hold a refcnt of the device, so the device can go away at anytime. Although we can change this, but it's the behaviour before multiqueue support. Hmm there's one scenario that does seem to trigger this: queue can get disabled and then poll fails. Is this the only issue? Another one I think we can trigger is: - start vhost thread - do ip link del link dev tap0 to delete the tap device In this case, the netdevice is unregistered but the file/socket still exist. Yes but in this case poll_wait is called so
Re: [PATCH V3 2/2] vhost: handle polling errors
On 01/14/2013 02:57 PM, Michael S. Tsirkin wrote: On Mon, Jan 14, 2013 at 10:59:02AM +0800, Jason Wang wrote: On 01/13/2013 07:10 PM, Michael S. Tsirkin wrote: On Mon, Jan 07, 2013 at 11:04:32PM +0800, Jason Wang wrote: On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll-wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll-wqh. Signed-off-by: Jason Wang jasow...@redhat.com --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { - VHOST_NET_POLL_DISABLED = 0, - VHOST_NET_POLL_STARTED = 1, - VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; - /* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ - enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ - if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED)) - return; - vhost_poll_stop(net-poll + VHOST_NET_VQ_TX); - net-tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ - if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED)) - return; - vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file); - net-tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX]; + struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf) { mutex_lock(vq-mutex); - tx_poll_start(net, sock); + if (vhost_poll_start(poll, sock-file)) + vq_err(vq, Fail to start TX polling\n); s/Fail/Failed/ A question though: how can this happen? Could you clarify please? Maybe we can find a way to prevent this error? Two conditions I think this can happen: 1) a buggy userspace disable a queue through TUNSETQUEUE 2) the net device were gone For 1, looks like we can delay the disabling until the refcnt goes to zero. For 2 may needs more changes. I'd expect keeping a socket reference would prevent both issues. Doesn't it? Doesn't work for 2 I think, the socket didn't hold a refcnt of the device, so the device can go away at anytime. Although we can change this, but it's the behaviour before multiqueue support. Hmm there's one scenario that does seem to trigger this: queue can get disabled and then poll fails. Is this the only issue? Another one I think we can trigger is: - start vhost thread - do ip link del link dev tap0 to delete the tap device In this case, the netdevice is unregistered but the file/socket still exist. Yes but in this case poll_wait is called so apparently no issue with existing code? We only have an issue if poll_wait
Re: [PATCH V3 2/2] vhost: handle polling errors
On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: > On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: >> On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: >>> On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll->wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll->wqh. Signed-off-by: Jason Wang --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { - VHOST_NET_POLL_DISABLED = 0, - VHOST_NET_POLL_STARTED = 1, - VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; - /* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ - enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ - if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) - return; - vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); - net->tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ - if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) - return; - vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); - net->tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX]; + struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(>sk->sk_wmem_alloc); if (wmem >= sock->sk->sk_sndbuf) { mutex_lock(>mutex); - tx_poll_start(net, sock); + if (vhost_poll_start(poll, sock->file)) + vq_err(vq, "Fail to start TX polling\n"); >>> s/Fail/Failed/ >>> >>> A question though: how can this happen? Could you clarify please? >>> Maybe we can find a way to prevent this error? >> Two conditions I think this can happen: >> >> 1) a buggy userspace disable a queue through TUNSETQUEUE >> 2) the net device were gone >> >> For 1, looks like we can delay the disabling until the refcnt goes to >> zero. For 2 may needs more changes. > I'd expect keeping a socket reference would prevent both issues. > Doesn't it? Doesn't work for 2 I think, the socket didn't hold a refcnt of the device, so the device can go away at anytime. Although we can change this, but it's the behaviour before multiqueue support. > >> Not sure it's worth to do this work, >> maybe a warning is enough just like other failure. > With other failures, you normally can correct the error then > kick to have it restart. This is soomething thagt would not > work here. If userspace is wrote correctly, (e.g passing a fd with correct state) it can also be corrected. > mutex_unlock(>mutex); return; } @@
Re: [PATCH V3 2/2] vhost: handle polling errors
On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: > On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: > > On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: > >> Polling errors were ignored by vhost/vhost_net, this may lead to crash when > >> trying to remove vhost from waitqueue when after the polling is failed. > >> Solve > >> this problem by: > >> > >> - checking the poll->wqh before trying to remove from waitqueue > >> - report an error when poll() returns a POLLERR in vhost_start_poll() > >> - report an error when vhost_start_poll() fails in > >> vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the > >> failure to userspace. > >> - report an error in the data path in vhost_net when meet polling errors. > >> > >> After those changes, we can safely drop the tx polling state in vhost_net > >> since > >> it was replaced by the checking of poll->wqh. > >> > >> Signed-off-by: Jason Wang > >> --- > >> drivers/vhost/net.c | 74 > >> > >> drivers/vhost/vhost.c | 31 +++- > >> drivers/vhost/vhost.h |2 +- > >> 3 files changed, 49 insertions(+), 58 deletions(-) > >> > >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > >> index d10ad6f..125c1e5 100644 > >> --- a/drivers/vhost/net.c > >> +++ b/drivers/vhost/net.c > >> @@ -64,20 +64,10 @@ enum { > >>VHOST_NET_VQ_MAX = 2, > >> }; > >> > >> -enum vhost_net_poll_state { > >> - VHOST_NET_POLL_DISABLED = 0, > >> - VHOST_NET_POLL_STARTED = 1, > >> - VHOST_NET_POLL_STOPPED = 2, > >> -}; > >> - > >> struct vhost_net { > >>struct vhost_dev dev; > >>struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; > >>struct vhost_poll poll[VHOST_NET_VQ_MAX]; > >> - /* Tells us whether we are polling a socket for TX. > >> - * We only do this when socket buffer fills up. > >> - * Protected by tx vq lock. */ > >> - enum vhost_net_poll_state tx_poll_state; > >>/* Number of TX recently submitted. > >> * Protected by tx vq lock. */ > >>unsigned tx_packets; > >> @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, > >> struct iovec *to, > >>} > >> } > >> > >> -/* Caller must have TX VQ lock */ > >> -static void tx_poll_stop(struct vhost_net *net) > >> -{ > >> - if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) > >> - return; > >> - vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); > >> - net->tx_poll_state = VHOST_NET_POLL_STOPPED; > >> -} > >> - > >> -/* Caller must have TX VQ lock */ > >> -static void tx_poll_start(struct vhost_net *net, struct socket *sock) > >> -{ > >> - if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) > >> - return; > >> - vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); > >> - net->tx_poll_state = VHOST_NET_POLL_STARTED; > >> -} > >> - > >> /* In case of DMA done not in order in lower device driver for some > >> reason. > >> * upend_idx is used to track end of used idx, done_idx is used to track > >> head > >> * of used idx. Once lower device DMA done contiguously, we will signal > >> KVM > >> @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info > >> *ubuf, bool success) > >> static void handle_tx(struct vhost_net *net) > >> { > >>struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX]; > >> + struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX; > >>unsigned out, in, s; > >>int head; > >>struct msghdr msg = { > >> @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) > >>wmem = atomic_read(>sk->sk_wmem_alloc); > >>if (wmem >= sock->sk->sk_sndbuf) { > >>mutex_lock(>mutex); > >> - tx_poll_start(net, sock); > >> + if (vhost_poll_start(poll, sock->file)) > >> + vq_err(vq, "Fail to start TX polling\n"); > > s/Fail/Failed/ > > > > A question though: how can this happen? Could you clarify please? > > Maybe we can find a way to prevent this error? > > Two conditions I think this can happen: > > 1) a buggy userspace disable a queue through TUNSETQUEUE > 2) the net device were gone > > For 1, looks like we can delay the disabling until the refcnt goes to > zero. For 2 may needs more changes. I'd expect keeping a socket reference would prevent both issues. Doesn't it? > Not sure it's worth to do this work, > maybe a warning is enough just like other failure. With other failures, you normally can correct the error then kick to have it restart. This is soomething thagt would not work here. > > > >>mutex_unlock(>mutex); > >>return; > >>} > >> @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net) > >>vhost_disable_notify(>dev, vq); > >> > >>if (wmem < sock->sk->sk_sndbuf / 2) > >> - tx_poll_stop(net); > >> + vhost_poll_stop(poll); > >>hdr_size = vq->vhost_hlen; > >>zcopy = vq->ubufs; > >> > >> @@ -283,8 +257,10 @@ static void handle_tx(struct
Re: [PATCH V3 2/2] vhost: handle polling errors
On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll-wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll-wqh. Signed-off-by: Jason Wang jasow...@redhat.com --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { - VHOST_NET_POLL_DISABLED = 0, - VHOST_NET_POLL_STARTED = 1, - VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; - /* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ - enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ - if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED)) - return; - vhost_poll_stop(net-poll + VHOST_NET_VQ_TX); - net-tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ - if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED)) - return; - vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file); - net-tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX]; + struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf) { mutex_lock(vq-mutex); - tx_poll_start(net, sock); + if (vhost_poll_start(poll, sock-file)) + vq_err(vq, Fail to start TX polling\n); s/Fail/Failed/ A question though: how can this happen? Could you clarify please? Maybe we can find a way to prevent this error? Two conditions I think this can happen: 1) a buggy userspace disable a queue through TUNSETQUEUE 2) the net device were gone For 1, looks like we can delay the disabling until the refcnt goes to zero. For 2 may needs more changes. I'd expect keeping a socket reference would prevent both issues. Doesn't it? Not sure it's worth to do this work, maybe a warning is enough just like other failure. With other failures, you normally can correct the error then kick to have it restart. This is soomething thagt would not work here. mutex_unlock(vq-mutex); return; } @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net) vhost_disable_notify(net-dev, vq); if (wmem sock-sk-sk_sndbuf / 2) - tx_poll_stop(net); + vhost_poll_stop(poll); hdr_size = vq-vhost_hlen; zcopy = vq-ubufs; @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf * 3 / 4) { - tx_poll_start(net, sock); - set_bit(SOCK_ASYNC_NOSPACE, sock-flags); + if (vhost_poll_start(poll, sock-file)) +
Re: [PATCH V3 2/2] vhost: handle polling errors
On 01/07/2013 10:55 PM, Michael S. Tsirkin wrote: On Mon, Jan 07, 2013 at 12:38:17PM +0800, Jason Wang wrote: On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll-wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll-wqh. Signed-off-by: Jason Wang jasow...@redhat.com --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { - VHOST_NET_POLL_DISABLED = 0, - VHOST_NET_POLL_STARTED = 1, - VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; - /* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ - enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ - if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED)) - return; - vhost_poll_stop(net-poll + VHOST_NET_VQ_TX); - net-tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ - if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED)) - return; - vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file); - net-tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX]; + struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf) { mutex_lock(vq-mutex); - tx_poll_start(net, sock); + if (vhost_poll_start(poll, sock-file)) + vq_err(vq, Fail to start TX polling\n); s/Fail/Failed/ A question though: how can this happen? Could you clarify please? Maybe we can find a way to prevent this error? Two conditions I think this can happen: 1) a buggy userspace disable a queue through TUNSETQUEUE 2) the net device were gone For 1, looks like we can delay the disabling until the refcnt goes to zero. For 2 may needs more changes. I'd expect keeping a socket reference would prevent both issues. Doesn't it? Doesn't work for 2 I think, the socket didn't hold a refcnt of the device, so the device can go away at anytime. Although we can change this, but it's the behaviour before multiqueue support. Not sure it's worth to do this work, maybe a warning is enough just like other failure. With other failures, you normally can correct the error then kick to have it restart. This is soomething thagt would not work here. If userspace is wrote correctly, (e.g passing a fd with correct state) it can also be corrected. mutex_unlock(vq-mutex); return; } @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net) vhost_disable_notify(net-dev, vq); if (wmem sock-sk-sk_sndbuf / 2) - tx_poll_stop(net); + vhost_poll_stop(poll); hdr_size = vq-vhost_hlen; zcopy = vq-ubufs; @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem =
Re: [PATCH V3 2/2] vhost: handle polling errors
On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: > On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: >> Polling errors were ignored by vhost/vhost_net, this may lead to crash when >> trying to remove vhost from waitqueue when after the polling is failed. Solve >> this problem by: >> >> - checking the poll->wqh before trying to remove from waitqueue >> - report an error when poll() returns a POLLERR in vhost_start_poll() >> - report an error when vhost_start_poll() fails in >> vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the >> failure to userspace. >> - report an error in the data path in vhost_net when meet polling errors. >> >> After those changes, we can safely drop the tx polling state in vhost_net >> since >> it was replaced by the checking of poll->wqh. >> >> Signed-off-by: Jason Wang >> --- >> drivers/vhost/net.c | 74 >> >> drivers/vhost/vhost.c | 31 +++- >> drivers/vhost/vhost.h |2 +- >> 3 files changed, 49 insertions(+), 58 deletions(-) >> >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c >> index d10ad6f..125c1e5 100644 >> --- a/drivers/vhost/net.c >> +++ b/drivers/vhost/net.c >> @@ -64,20 +64,10 @@ enum { >> VHOST_NET_VQ_MAX = 2, >> }; >> >> -enum vhost_net_poll_state { >> -VHOST_NET_POLL_DISABLED = 0, >> -VHOST_NET_POLL_STARTED = 1, >> -VHOST_NET_POLL_STOPPED = 2, >> -}; >> - >> struct vhost_net { >> struct vhost_dev dev; >> struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; >> struct vhost_poll poll[VHOST_NET_VQ_MAX]; >> -/* Tells us whether we are polling a socket for TX. >> - * We only do this when socket buffer fills up. >> - * Protected by tx vq lock. */ >> -enum vhost_net_poll_state tx_poll_state; >> /* Number of TX recently submitted. >> * Protected by tx vq lock. */ >> unsigned tx_packets; >> @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, >> struct iovec *to, >> } >> } >> >> -/* Caller must have TX VQ lock */ >> -static void tx_poll_stop(struct vhost_net *net) >> -{ >> -if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) >> -return; >> -vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); >> -net->tx_poll_state = VHOST_NET_POLL_STOPPED; >> -} >> - >> -/* Caller must have TX VQ lock */ >> -static void tx_poll_start(struct vhost_net *net, struct socket *sock) >> -{ >> -if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) >> -return; >> -vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); >> -net->tx_poll_state = VHOST_NET_POLL_STARTED; >> -} >> - >> /* In case of DMA done not in order in lower device driver for some reason. >> * upend_idx is used to track end of used idx, done_idx is used to track >> head >> * of used idx. Once lower device DMA done contiguously, we will signal KVM >> @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info >> *ubuf, bool success) >> static void handle_tx(struct vhost_net *net) >> { >> struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX]; >> +struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX; >> unsigned out, in, s; >> int head; >> struct msghdr msg = { >> @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) >> wmem = atomic_read(>sk->sk_wmem_alloc); >> if (wmem >= sock->sk->sk_sndbuf) { >> mutex_lock(>mutex); >> -tx_poll_start(net, sock); >> +if (vhost_poll_start(poll, sock->file)) >> +vq_err(vq, "Fail to start TX polling\n"); > s/Fail/Failed/ > > A question though: how can this happen? Could you clarify please? > Maybe we can find a way to prevent this error? Two conditions I think this can happen: 1) a buggy userspace disable a queue through TUNSETQUEUE 2) the net device were gone For 1, looks like we can delay the disabling until the refcnt goes to zero. For 2 may needs more changes. Not sure it's worth to do this work, maybe a warning is enough just like other failure. > >> mutex_unlock(>mutex); >> return; >> } >> @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net) >> vhost_disable_notify(>dev, vq); >> >> if (wmem < sock->sk->sk_sndbuf / 2) >> -tx_poll_stop(net); >> +vhost_poll_stop(poll); >> hdr_size = vq->vhost_hlen; >> zcopy = vq->ubufs; >> >> @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net) >> >> wmem = atomic_read(>sk->sk_wmem_alloc); >> if (wmem >= sock->sk->sk_sndbuf * 3 / 4) { >> -tx_poll_start(net, sock); >> -set_bit(SOCK_ASYNC_NOSPACE, >flags); >> +if (vhost_poll_start(poll, sock->file)) >> +vq_err(vq, "Fail to start TX >> polling\n"); >> +
Re: [PATCH V3 2/2] vhost: handle polling errors
On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: > Polling errors were ignored by vhost/vhost_net, this may lead to crash when > trying to remove vhost from waitqueue when after the polling is failed. Solve > this problem by: > > - checking the poll->wqh before trying to remove from waitqueue > - report an error when poll() returns a POLLERR in vhost_start_poll() > - report an error when vhost_start_poll() fails in > vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the > failure to userspace. > - report an error in the data path in vhost_net when meet polling errors. > > After those changes, we can safely drop the tx polling state in vhost_net > since > it was replaced by the checking of poll->wqh. > > Signed-off-by: Jason Wang > --- > drivers/vhost/net.c | 74 > drivers/vhost/vhost.c | 31 +++- > drivers/vhost/vhost.h |2 +- > 3 files changed, 49 insertions(+), 58 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index d10ad6f..125c1e5 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -64,20 +64,10 @@ enum { > VHOST_NET_VQ_MAX = 2, > }; > > -enum vhost_net_poll_state { > - VHOST_NET_POLL_DISABLED = 0, > - VHOST_NET_POLL_STARTED = 1, > - VHOST_NET_POLL_STOPPED = 2, > -}; > - > struct vhost_net { > struct vhost_dev dev; > struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; > struct vhost_poll poll[VHOST_NET_VQ_MAX]; > - /* Tells us whether we are polling a socket for TX. > - * We only do this when socket buffer fills up. > - * Protected by tx vq lock. */ > - enum vhost_net_poll_state tx_poll_state; > /* Number of TX recently submitted. >* Protected by tx vq lock. */ > unsigned tx_packets; > @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, > struct iovec *to, > } > } > > -/* Caller must have TX VQ lock */ > -static void tx_poll_stop(struct vhost_net *net) > -{ > - if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) > - return; > - vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); > - net->tx_poll_state = VHOST_NET_POLL_STOPPED; > -} > - > -/* Caller must have TX VQ lock */ > -static void tx_poll_start(struct vhost_net *net, struct socket *sock) > -{ > - if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) > - return; > - vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); > - net->tx_poll_state = VHOST_NET_POLL_STARTED; > -} > - > /* In case of DMA done not in order in lower device driver for some reason. > * upend_idx is used to track end of used idx, done_idx is used to track head > * of used idx. Once lower device DMA done contiguously, we will signal KVM > @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info > *ubuf, bool success) > static void handle_tx(struct vhost_net *net) > { > struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX]; > + struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX; > unsigned out, in, s; > int head; > struct msghdr msg = { > @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) > wmem = atomic_read(>sk->sk_wmem_alloc); > if (wmem >= sock->sk->sk_sndbuf) { > mutex_lock(>mutex); > - tx_poll_start(net, sock); > + if (vhost_poll_start(poll, sock->file)) > + vq_err(vq, "Fail to start TX polling\n"); s/Fail/Failed/ A question though: how can this happen? Could you clarify please? Maybe we can find a way to prevent this error? > mutex_unlock(>mutex); > return; > } > @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net) > vhost_disable_notify(>dev, vq); > > if (wmem < sock->sk->sk_sndbuf / 2) > - tx_poll_stop(net); > + vhost_poll_stop(poll); > hdr_size = vq->vhost_hlen; > zcopy = vq->ubufs; > > @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net) > > wmem = atomic_read(>sk->sk_wmem_alloc); > if (wmem >= sock->sk->sk_sndbuf * 3 / 4) { > - tx_poll_start(net, sock); > - set_bit(SOCK_ASYNC_NOSPACE, >flags); > + if (vhost_poll_start(poll, sock->file)) > + vq_err(vq, "Fail to start TX > polling\n"); > + else > + set_bit(SOCK_ASYNC_NOSPACE, > >flags); > break; > } > /* If more outstanding DMAs, queue the work. > @@ -294,8 +270,10 @@ static void handle_tx(struct vhost_net *net) > (vq->upend_idx - vq->done_idx) : > (vq->upend_idx + UIO_MAXIOV -
Re: [PATCH V3 2/2] vhost: handle polling errors
On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll-wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll-wqh. Signed-off-by: Jason Wang jasow...@redhat.com --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { - VHOST_NET_POLL_DISABLED = 0, - VHOST_NET_POLL_STARTED = 1, - VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; - /* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ - enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ - if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED)) - return; - vhost_poll_stop(net-poll + VHOST_NET_VQ_TX); - net-tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ - if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED)) - return; - vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file); - net-tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX]; + struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf) { mutex_lock(vq-mutex); - tx_poll_start(net, sock); + if (vhost_poll_start(poll, sock-file)) + vq_err(vq, Fail to start TX polling\n); s/Fail/Failed/ A question though: how can this happen? Could you clarify please? Maybe we can find a way to prevent this error? mutex_unlock(vq-mutex); return; } @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net) vhost_disable_notify(net-dev, vq); if (wmem sock-sk-sk_sndbuf / 2) - tx_poll_stop(net); + vhost_poll_stop(poll); hdr_size = vq-vhost_hlen; zcopy = vq-ubufs; @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf * 3 / 4) { - tx_poll_start(net, sock); - set_bit(SOCK_ASYNC_NOSPACE, sock-flags); + if (vhost_poll_start(poll, sock-file)) + vq_err(vq, Fail to start TX polling\n); + else + set_bit(SOCK_ASYNC_NOSPACE, sock-flags); break; } /* If more outstanding DMAs, queue the work. @@ -294,8 +270,10 @@ static void handle_tx(struct vhost_net *net) (vq-upend_idx - vq-done_idx) : (vq-upend_idx + UIO_MAXIOV - vq-done_idx); if (unlikely(num_pends VHOST_MAX_PEND)) { -
Re: [PATCH V3 2/2] vhost: handle polling errors
On 01/06/2013 09:22 PM, Michael S. Tsirkin wrote: On Sun, Jan 06, 2013 at 03:18:38PM +0800, Jason Wang wrote: Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll-wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll-wqh. Signed-off-by: Jason Wang jasow...@redhat.com --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { -VHOST_NET_POLL_DISABLED = 0, -VHOST_NET_POLL_STARTED = 1, -VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; -/* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ -enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ -if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED)) -return; -vhost_poll_stop(net-poll + VHOST_NET_VQ_TX); -net-tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ -if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED)) -return; -vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file); -net-tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX]; +struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf) { mutex_lock(vq-mutex); -tx_poll_start(net, sock); +if (vhost_poll_start(poll, sock-file)) +vq_err(vq, Fail to start TX polling\n); s/Fail/Failed/ A question though: how can this happen? Could you clarify please? Maybe we can find a way to prevent this error? Two conditions I think this can happen: 1) a buggy userspace disable a queue through TUNSETQUEUE 2) the net device were gone For 1, looks like we can delay the disabling until the refcnt goes to zero. For 2 may needs more changes. Not sure it's worth to do this work, maybe a warning is enough just like other failure. mutex_unlock(vq-mutex); return; } @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net) vhost_disable_notify(net-dev, vq); if (wmem sock-sk-sk_sndbuf / 2) -tx_poll_stop(net); +vhost_poll_stop(poll); hdr_size = vq-vhost_hlen; zcopy = vq-ubufs; @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf * 3 / 4) { -tx_poll_start(net, sock); -set_bit(SOCK_ASYNC_NOSPACE, sock-flags); +if (vhost_poll_start(poll, sock-file)) +vq_err(vq, Fail to start TX polling\n); +else +set_bit(SOCK_ASYNC_NOSPACE, sock-flags); break; } /* If more outstanding DMAs, queue
[PATCH V3 2/2] vhost: handle polling errors
Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll->wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll->wqh. Signed-off-by: Jason Wang --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { - VHOST_NET_POLL_DISABLED = 0, - VHOST_NET_POLL_STARTED = 1, - VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; - /* Tells us whether we are polling a socket for TX. -* We only do this when socket buffer fills up. -* Protected by tx vq lock. */ - enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ - if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) - return; - vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); - net->tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ - if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) - return; - vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); - net->tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = >dev.vqs[VHOST_NET_VQ_TX]; + struct vhost_poll *poll = net->poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(>sk->sk_wmem_alloc); if (wmem >= sock->sk->sk_sndbuf) { mutex_lock(>mutex); - tx_poll_start(net, sock); + if (vhost_poll_start(poll, sock->file)) + vq_err(vq, "Fail to start TX polling\n"); mutex_unlock(>mutex); return; } @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net) vhost_disable_notify(>dev, vq); if (wmem < sock->sk->sk_sndbuf / 2) - tx_poll_stop(net); + vhost_poll_stop(poll); hdr_size = vq->vhost_hlen; zcopy = vq->ubufs; @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(>sk->sk_wmem_alloc); if (wmem >= sock->sk->sk_sndbuf * 3 / 4) { - tx_poll_start(net, sock); - set_bit(SOCK_ASYNC_NOSPACE, >flags); + if (vhost_poll_start(poll, sock->file)) + vq_err(vq, "Fail to start TX polling\n"); + else + set_bit(SOCK_ASYNC_NOSPACE, >flags); break; } /* If more outstanding DMAs, queue the work. @@ -294,8 +270,10 @@ static void handle_tx(struct vhost_net *net) (vq->upend_idx - vq->done_idx) : (vq->upend_idx + UIO_MAXIOV - vq->done_idx); if (unlikely(num_pends > VHOST_MAX_PEND)) { - tx_poll_start(net, sock); - set_bit(SOCK_ASYNC_NOSPACE, >flags); + if (vhost_poll_start(poll, sock->file)) +
[PATCH V3 2/2] vhost: handle polling errors
Polling errors were ignored by vhost/vhost_net, this may lead to crash when trying to remove vhost from waitqueue when after the polling is failed. Solve this problem by: - checking the poll-wqh before trying to remove from waitqueue - report an error when poll() returns a POLLERR in vhost_start_poll() - report an error when vhost_start_poll() fails in vhost_vring_ioctl()/vhost_net_set_backend() which is used to notify the failure to userspace. - report an error in the data path in vhost_net when meet polling errors. After those changes, we can safely drop the tx polling state in vhost_net since it was replaced by the checking of poll-wqh. Signed-off-by: Jason Wang jasow...@redhat.com --- drivers/vhost/net.c | 74 drivers/vhost/vhost.c | 31 +++- drivers/vhost/vhost.h |2 +- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d10ad6f..125c1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { - VHOST_NET_POLL_DISABLED = 0, - VHOST_NET_POLL_STARTED = 1, - VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; - /* Tells us whether we are polling a socket for TX. -* We only do this when socket buffer fills up. -* Protected by tx vq lock. */ - enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,24 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ - if (likely(net-tx_poll_state != VHOST_NET_POLL_STARTED)) - return; - vhost_poll_stop(net-poll + VHOST_NET_VQ_TX); - net-tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static void tx_poll_start(struct vhost_net *net, struct socket *sock) -{ - if (unlikely(net-tx_poll_state != VHOST_NET_POLL_STOPPED)) - return; - vhost_poll_start(net-poll + VHOST_NET_VQ_TX, sock-file); - net-tx_poll_state = VHOST_NET_POLL_STARTED; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -227,6 +199,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = net-dev.vqs[VHOST_NET_VQ_TX]; + struct vhost_poll *poll = net-poll + VHOST_NET_VQ_TX; unsigned out, in, s; int head; struct msghdr msg = { @@ -252,7 +225,8 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf) { mutex_lock(vq-mutex); - tx_poll_start(net, sock); + if (vhost_poll_start(poll, sock-file)) + vq_err(vq, Fail to start TX polling\n); mutex_unlock(vq-mutex); return; } @@ -261,7 +235,7 @@ static void handle_tx(struct vhost_net *net) vhost_disable_notify(net-dev, vq); if (wmem sock-sk-sk_sndbuf / 2) - tx_poll_stop(net); + vhost_poll_stop(poll); hdr_size = vq-vhost_hlen; zcopy = vq-ubufs; @@ -283,8 +257,10 @@ static void handle_tx(struct vhost_net *net) wmem = atomic_read(sock-sk-sk_wmem_alloc); if (wmem = sock-sk-sk_sndbuf * 3 / 4) { - tx_poll_start(net, sock); - set_bit(SOCK_ASYNC_NOSPACE, sock-flags); + if (vhost_poll_start(poll, sock-file)) + vq_err(vq, Fail to start TX polling\n); + else + set_bit(SOCK_ASYNC_NOSPACE, sock-flags); break; } /* If more outstanding DMAs, queue the work. @@ -294,8 +270,10 @@ static void handle_tx(struct vhost_net *net) (vq-upend_idx - vq-done_idx) : (vq-upend_idx + UIO_MAXIOV - vq-done_idx); if (unlikely(num_pends VHOST_MAX_PEND)) { - tx_poll_start(net, sock); - set_bit(SOCK_ASYNC_NOSPACE, sock-flags); + if (vhost_poll_start(poll, sock-file)) +