Re: [PATCH V2 3/3] vhost_net: basic polling support
On 01/20/2016 10:35 PM, Michael S. Tsirkin wrote: > On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote: >> This patch tries to poll for new added tx buffer or socket receive >> queue for a while at the end of tx/rx processing. The maximum time >> spent on polling were specified through a new kind of vring ioctl. >> >> Signed-off-by: Jason Wang >> --- >> drivers/vhost/net.c| 72 >> ++ >> drivers/vhost/vhost.c | 15 ++ >> drivers/vhost/vhost.h | 1 + >> include/uapi/linux/vhost.h | 11 +++ >> 4 files changed, 94 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c >> index 9eda69e..ce6da77 100644 >> --- a/drivers/vhost/net.c >> +++ b/drivers/vhost/net.c >> @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info >> *ubuf, bool success) >> rcu_read_unlock_bh(); >> } >> >> +static inline unsigned long busy_clock(void) >> +{ >> +return local_clock() >> 10; >> +} >> + >> +static bool vhost_can_busy_poll(struct vhost_dev *dev, >> +unsigned long endtime) >> +{ >> +return likely(!need_resched()) && >> + likely(!time_after(busy_clock(), endtime)) && >> + likely(!signal_pending(current)) && >> + !vhost_has_work(dev) && >> + single_task_running(); >> +} >> + >> +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, >> +struct vhost_virtqueue *vq, >> +struct iovec iov[], unsigned int iov_size, >> +unsigned int *out_num, unsigned int *in_num) >> +{ >> +unsigned long uninitialized_var(endtime); >> + >> +if (vq->busyloop_timeout) { >> +preempt_disable(); >> +endtime = busy_clock() + vq->busyloop_timeout; >> +while (vhost_can_busy_poll(vq->dev, endtime) && >> + !vhost_vq_more_avail(vq->dev, vq)) >> +cpu_relax(); >> +preempt_enable(); >> +} > Isn't there a way to call all this after vhost_get_vq_desc? We can. > First, this will reduce the good path overhead as you > won't have to play with timers and preemption. For good path, yes. > > Second, this will reduce the chance of a pagefault on avail ring read. Yes. > >> + >> +return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), >> + out_num, in_num, NULL, NULL); >> +} >> + >> /* Expects to be always run from workqueue - which acts as >> * read-size critical section for our kind of RCU. */ >> static void handle_tx(struct vhost_net *net) >> @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) >>% UIO_MAXIOV == nvq->done_idx)) >> break; >> >> -head = vhost_get_vq_desc(vq, vq->iov, >> - ARRAY_SIZE(vq->iov), >> - , , >> - NULL, NULL); >> +head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, >> +ARRAY_SIZE(vq->iov), >> +, ); >> /* On error, stop handling until the next kick. */ >> if (unlikely(head < 0)) >> break; >> @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) >> return len; >> } >> >> +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) > Need a hint that it's rx related in the name. Ok. > >> +{ >> +struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX]; >> +struct vhost_virtqueue *vq = >vq; >> +unsigned long uninitialized_var(endtime); >> + >> +if (vq->busyloop_timeout) { >> +mutex_lock(>mutex); > This appears to be called under vq mutex in handle_rx. > So how does this work then? This is tx mutex, an optimization here: both rx socket and tx ring is polled. So there's no need to tx notification anymore. This can save lots of vmexits. > > >> +vhost_disable_notify(>dev, vq); > This appears to be called after disable notify > in handle_rx - so why disable here again? It disable the tx notification instead of rx. > >> + >> +preempt_disable(); >> +endtime = busy_clock() + vq->busyloop_timeout; >> + >> +while (vhost_can_busy_poll(>dev, endtime) && >> + skb_queue_empty(>sk_receive_queue) && >> + !vhost_vq_more_avail(>dev, vq)) >> +cpu_relax(); > This seems to mix in several items. > RX queue is normally not empty. I don't think > we need to poll for that. > So IMHO we only need to poll for sk_receive_queue really. Same as above, tx virt queue is being polled here. > >> + >> +preempt_enable(); >> + >> +if (vhost_enable_notify(>dev, vq)) >> +vhost_poll_queue(>poll); > But
Re: [PATCH V2 3/3] vhost_net: basic polling support
On 01/20/2016 10:35 PM, Michael S. Tsirkin wrote: > On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote: >> This patch tries to poll for new added tx buffer or socket receive >> queue for a while at the end of tx/rx processing. The maximum time >> spent on polling were specified through a new kind of vring ioctl. >> >> Signed-off-by: Jason Wang>> --- >> drivers/vhost/net.c| 72 >> ++ >> drivers/vhost/vhost.c | 15 ++ >> drivers/vhost/vhost.h | 1 + >> include/uapi/linux/vhost.h | 11 +++ >> 4 files changed, 94 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c >> index 9eda69e..ce6da77 100644 >> --- a/drivers/vhost/net.c >> +++ b/drivers/vhost/net.c >> @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info >> *ubuf, bool success) >> rcu_read_unlock_bh(); >> } >> >> +static inline unsigned long busy_clock(void) >> +{ >> +return local_clock() >> 10; >> +} >> + >> +static bool vhost_can_busy_poll(struct vhost_dev *dev, >> +unsigned long endtime) >> +{ >> +return likely(!need_resched()) && >> + likely(!time_after(busy_clock(), endtime)) && >> + likely(!signal_pending(current)) && >> + !vhost_has_work(dev) && >> + single_task_running(); >> +} >> + >> +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, >> +struct vhost_virtqueue *vq, >> +struct iovec iov[], unsigned int iov_size, >> +unsigned int *out_num, unsigned int *in_num) >> +{ >> +unsigned long uninitialized_var(endtime); >> + >> +if (vq->busyloop_timeout) { >> +preempt_disable(); >> +endtime = busy_clock() + vq->busyloop_timeout; >> +while (vhost_can_busy_poll(vq->dev, endtime) && >> + !vhost_vq_more_avail(vq->dev, vq)) >> +cpu_relax(); >> +preempt_enable(); >> +} > Isn't there a way to call all this after vhost_get_vq_desc? We can. > First, this will reduce the good path overhead as you > won't have to play with timers and preemption. For good path, yes. > > Second, this will reduce the chance of a pagefault on avail ring read. Yes. > >> + >> +return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), >> + out_num, in_num, NULL, NULL); >> +} >> + >> /* Expects to be always run from workqueue - which acts as >> * read-size critical section for our kind of RCU. */ >> static void handle_tx(struct vhost_net *net) >> @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) >>% UIO_MAXIOV == nvq->done_idx)) >> break; >> >> -head = vhost_get_vq_desc(vq, vq->iov, >> - ARRAY_SIZE(vq->iov), >> - , , >> - NULL, NULL); >> +head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, >> +ARRAY_SIZE(vq->iov), >> +, ); >> /* On error, stop handling until the next kick. */ >> if (unlikely(head < 0)) >> break; >> @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) >> return len; >> } >> >> +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) > Need a hint that it's rx related in the name. Ok. > >> +{ >> +struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX]; >> +struct vhost_virtqueue *vq = >vq; >> +unsigned long uninitialized_var(endtime); >> + >> +if (vq->busyloop_timeout) { >> +mutex_lock(>mutex); > This appears to be called under vq mutex in handle_rx. > So how does this work then? This is tx mutex, an optimization here: both rx socket and tx ring is polled. So there's no need to tx notification anymore. This can save lots of vmexits. > > >> +vhost_disable_notify(>dev, vq); > This appears to be called after disable notify > in handle_rx - so why disable here again? It disable the tx notification instead of rx. > >> + >> +preempt_disable(); >> +endtime = busy_clock() + vq->busyloop_timeout; >> + >> +while (vhost_can_busy_poll(>dev, endtime) && >> + skb_queue_empty(>sk_receive_queue) && >> + !vhost_vq_more_avail(>dev, vq)) >> +cpu_relax(); > This seems to mix in several items. > RX queue is normally not empty. I don't think > we need to poll for that. > So IMHO we only need to poll for sk_receive_queue really. Same as above, tx virt queue is being polled here. > >> + >> +preempt_enable(); >> + >> +if (vhost_enable_notify(>dev, vq)) >> +
Re: [PATCH V2 3/3] vhost_net: basic polling support
On 2016/1/21 13:13, Michael S. Tsirkin wrote: On Thu, Jan 21, 2016 at 10:11:35AM +0800, Yang Zhang wrote: On 2016/1/20 22:35, Michael S. Tsirkin wrote: On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote: This patch tries to poll for new added tx buffer or socket receive queue for a while at the end of tx/rx processing. The maximum time spent on polling were specified through a new kind of vring ioctl. Signed-off-by: Jason Wang --- drivers/vhost/net.c| 72 ++ drivers/vhost/vhost.c | 15 ++ drivers/vhost/vhost.h | 1 + include/uapi/linux/vhost.h | 11 +++ 4 files changed, 94 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9eda69e..ce6da77 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) rcu_read_unlock_bh(); } +static inline unsigned long busy_clock(void) +{ + return local_clock() >> 10; +} + +static bool vhost_can_busy_poll(struct vhost_dev *dev, + unsigned long endtime) +{ + return likely(!need_resched()) && + likely(!time_after(busy_clock(), endtime)) && + likely(!signal_pending(current)) && + !vhost_has_work(dev) && + single_task_running(); +} + +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num) +{ + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + while (vhost_can_busy_poll(vq->dev, endtime) && + !vhost_vq_more_avail(vq->dev, vq)) + cpu_relax(); + preempt_enable(); + } Isn't there a way to call all this after vhost_get_vq_desc? First, this will reduce the good path overhead as you won't have to play with timers and preemption. Second, this will reduce the chance of a pagefault on avail ring read. + + return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), +out_num, in_num, NULL, NULL); +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV == nvq->done_idx)) break; - head = vhost_get_vq_desc(vq, vq->iov, -ARRAY_SIZE(vq->iov), -, , -NULL, NULL); + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, + ARRAY_SIZE(vq->iov), + , ); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) return len; } +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) Need a hint that it's rx related in the name. +{ + struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = >vq; + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + mutex_lock(>mutex); This appears to be called under vq mutex in handle_rx. So how does this work then? + vhost_disable_notify(>dev, vq); This appears to be called after disable notify in handle_rx - so why disable here again? + + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + + while (vhost_can_busy_poll(>dev, endtime) && + skb_queue_empty(>sk_receive_queue) && + !vhost_vq_more_avail(>dev, vq)) + cpu_relax(); This seems to mix in several items. RX queue is normally not empty. I don't think we need to poll for that. I have seen the RX queue is easy to be empty under some extreme conditions like lots of small packet. So maybe the check is useful here. It's not useful *here*. If you have an rx packet but no space in the ring, this will exit immediately. Indeed! It might be useful elsewhere but I doubt it - if rx ring is out of buffers, you are better off backing out and giving guest some breathing space. -- best regards yang -- best regards yang
Re: [PATCH V2 3/3] vhost_net: basic polling support
On Thu, Jan 21, 2016 at 10:11:35AM +0800, Yang Zhang wrote: > On 2016/1/20 22:35, Michael S. Tsirkin wrote: > >On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote: > >>This patch tries to poll for new added tx buffer or socket receive > >>queue for a while at the end of tx/rx processing. The maximum time > >>spent on polling were specified through a new kind of vring ioctl. > >> > >>Signed-off-by: Jason Wang > >>--- > >> drivers/vhost/net.c| 72 > >> ++ > >> drivers/vhost/vhost.c | 15 ++ > >> drivers/vhost/vhost.h | 1 + > >> include/uapi/linux/vhost.h | 11 +++ > >> 4 files changed, 94 insertions(+), 5 deletions(-) > >> > >>diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > >>index 9eda69e..ce6da77 100644 > >>--- a/drivers/vhost/net.c > >>+++ b/drivers/vhost/net.c > >>@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info > >>*ubuf, bool success) > >>rcu_read_unlock_bh(); > >> } > >> > >>+static inline unsigned long busy_clock(void) > >>+{ > >>+ return local_clock() >> 10; > >>+} > >>+ > >>+static bool vhost_can_busy_poll(struct vhost_dev *dev, > >>+ unsigned long endtime) > >>+{ > >>+ return likely(!need_resched()) && > >>+ likely(!time_after(busy_clock(), endtime)) && > >>+ likely(!signal_pending(current)) && > >>+ !vhost_has_work(dev) && > >>+ single_task_running(); > >>+} > >>+ > >>+static int vhost_net_tx_get_vq_desc(struct vhost_net *net, > >>+ struct vhost_virtqueue *vq, > >>+ struct iovec iov[], unsigned int iov_size, > >>+ unsigned int *out_num, unsigned int *in_num) > >>+{ > >>+ unsigned long uninitialized_var(endtime); > >>+ > >>+ if (vq->busyloop_timeout) { > >>+ preempt_disable(); > >>+ endtime = busy_clock() + vq->busyloop_timeout; > >>+ while (vhost_can_busy_poll(vq->dev, endtime) && > >>+ !vhost_vq_more_avail(vq->dev, vq)) > >>+ cpu_relax(); > >>+ preempt_enable(); > >>+ } > > > >Isn't there a way to call all this after vhost_get_vq_desc? > >First, this will reduce the good path overhead as you > >won't have to play with timers and preemption. > > > >Second, this will reduce the chance of a pagefault on avail ring read. > > > >>+ > >>+ return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), > >>+out_num, in_num, NULL, NULL); > >>+} > >>+ > >> /* Expects to be always run from workqueue - which acts as > >> * read-size critical section for our kind of RCU. */ > >> static void handle_tx(struct vhost_net *net) > >>@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) > >> % UIO_MAXIOV == nvq->done_idx)) > >>break; > >> > >>- head = vhost_get_vq_desc(vq, vq->iov, > >>-ARRAY_SIZE(vq->iov), > >>-, , > >>-NULL, NULL); > >>+ head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, > >>+ ARRAY_SIZE(vq->iov), > >>+ , ); > >>/* On error, stop handling until the next kick. */ > >>if (unlikely(head < 0)) > >>break; > >>@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) > >>return len; > >> } > >> > >>+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) > > > >Need a hint that it's rx related in the name. > > > >>+{ > >>+ struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX]; > >>+ struct vhost_virtqueue *vq = >vq; > >>+ unsigned long uninitialized_var(endtime); > >>+ > >>+ if (vq->busyloop_timeout) { > >>+ mutex_lock(>mutex); > > > >This appears to be called under vq mutex in handle_rx. > >So how does this work then? > > > > > >>+ vhost_disable_notify(>dev, vq); > > > >This appears to be called after disable notify > >in handle_rx - so why disable here again? > > > >>+ > >>+ preempt_disable(); > >>+ endtime = busy_clock() + vq->busyloop_timeout; > >>+ > >>+ while (vhost_can_busy_poll(>dev, endtime) && > >>+ skb_queue_empty(>sk_receive_queue) && > >>+ !vhost_vq_more_avail(>dev, vq)) > >>+ cpu_relax(); > > > >This seems to mix in several items. > >RX queue is normally not empty. I don't think > >we need to poll for that. > > I have seen the RX queue is easy to be empty under some extreme conditions > like lots of small packet. So maybe the check is useful here. It's not useful *here*. If you have an rx packet but no space in the ring, this will exit immediately. It might be useful elsewhere but I doubt it - if rx ring is out of buffers, you are better off backing out
Re: [PATCH V2 3/3] vhost_net: basic polling support
On 2016/1/20 22:35, Michael S. Tsirkin wrote: On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote: This patch tries to poll for new added tx buffer or socket receive queue for a while at the end of tx/rx processing. The maximum time spent on polling were specified through a new kind of vring ioctl. Signed-off-by: Jason Wang --- drivers/vhost/net.c| 72 ++ drivers/vhost/vhost.c | 15 ++ drivers/vhost/vhost.h | 1 + include/uapi/linux/vhost.h | 11 +++ 4 files changed, 94 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9eda69e..ce6da77 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) rcu_read_unlock_bh(); } +static inline unsigned long busy_clock(void) +{ + return local_clock() >> 10; +} + +static bool vhost_can_busy_poll(struct vhost_dev *dev, + unsigned long endtime) +{ + return likely(!need_resched()) && + likely(!time_after(busy_clock(), endtime)) && + likely(!signal_pending(current)) && + !vhost_has_work(dev) && + single_task_running(); +} + +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num) +{ + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + while (vhost_can_busy_poll(vq->dev, endtime) && + !vhost_vq_more_avail(vq->dev, vq)) + cpu_relax(); + preempt_enable(); + } Isn't there a way to call all this after vhost_get_vq_desc? First, this will reduce the good path overhead as you won't have to play with timers and preemption. Second, this will reduce the chance of a pagefault on avail ring read. + + return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), +out_num, in_num, NULL, NULL); +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV == nvq->done_idx)) break; - head = vhost_get_vq_desc(vq, vq->iov, -ARRAY_SIZE(vq->iov), -, , -NULL, NULL); + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, + ARRAY_SIZE(vq->iov), + , ); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) return len; } +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) Need a hint that it's rx related in the name. +{ + struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = >vq; + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + mutex_lock(>mutex); This appears to be called under vq mutex in handle_rx. So how does this work then? + vhost_disable_notify(>dev, vq); This appears to be called after disable notify in handle_rx - so why disable here again? + + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + + while (vhost_can_busy_poll(>dev, endtime) && + skb_queue_empty(>sk_receive_queue) && + !vhost_vq_more_avail(>dev, vq)) + cpu_relax(); This seems to mix in several items. RX queue is normally not empty. I don't think we need to poll for that. I have seen the RX queue is easy to be empty under some extreme conditions like lots of small packet. So maybe the check is useful here. -- best regards yang
Re: [PATCH V2 3/3] vhost_net: basic polling support
On 2016/1/20 22:35, Michael S. Tsirkin wrote: On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote: This patch tries to poll for new added tx buffer or socket receive queue for a while at the end of tx/rx processing. The maximum time spent on polling were specified through a new kind of vring ioctl. Signed-off-by: Jason Wang--- drivers/vhost/net.c| 72 ++ drivers/vhost/vhost.c | 15 ++ drivers/vhost/vhost.h | 1 + include/uapi/linux/vhost.h | 11 +++ 4 files changed, 94 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9eda69e..ce6da77 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) rcu_read_unlock_bh(); } +static inline unsigned long busy_clock(void) +{ + return local_clock() >> 10; +} + +static bool vhost_can_busy_poll(struct vhost_dev *dev, + unsigned long endtime) +{ + return likely(!need_resched()) && + likely(!time_after(busy_clock(), endtime)) && + likely(!signal_pending(current)) && + !vhost_has_work(dev) && + single_task_running(); +} + +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num) +{ + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + while (vhost_can_busy_poll(vq->dev, endtime) && + !vhost_vq_more_avail(vq->dev, vq)) + cpu_relax(); + preempt_enable(); + } Isn't there a way to call all this after vhost_get_vq_desc? First, this will reduce the good path overhead as you won't have to play with timers and preemption. Second, this will reduce the chance of a pagefault on avail ring read. + + return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), +out_num, in_num, NULL, NULL); +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV == nvq->done_idx)) break; - head = vhost_get_vq_desc(vq, vq->iov, -ARRAY_SIZE(vq->iov), -, , -NULL, NULL); + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, + ARRAY_SIZE(vq->iov), + , ); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) return len; } +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) Need a hint that it's rx related in the name. +{ + struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = >vq; + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + mutex_lock(>mutex); This appears to be called under vq mutex in handle_rx. So how does this work then? + vhost_disable_notify(>dev, vq); This appears to be called after disable notify in handle_rx - so why disable here again? + + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + + while (vhost_can_busy_poll(>dev, endtime) && + skb_queue_empty(>sk_receive_queue) && + !vhost_vq_more_avail(>dev, vq)) + cpu_relax(); This seems to mix in several items. RX queue is normally not empty. I don't think we need to poll for that. I have seen the RX queue is easy to be empty under some extreme conditions like lots of small packet. So maybe the check is useful here. -- best regards yang
Re: [PATCH V2 3/3] vhost_net: basic polling support
On Thu, Jan 21, 2016 at 10:11:35AM +0800, Yang Zhang wrote: > On 2016/1/20 22:35, Michael S. Tsirkin wrote: > >On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote: > >>This patch tries to poll for new added tx buffer or socket receive > >>queue for a while at the end of tx/rx processing. The maximum time > >>spent on polling were specified through a new kind of vring ioctl. > >> > >>Signed-off-by: Jason Wang> >>--- > >> drivers/vhost/net.c| 72 > >> ++ > >> drivers/vhost/vhost.c | 15 ++ > >> drivers/vhost/vhost.h | 1 + > >> include/uapi/linux/vhost.h | 11 +++ > >> 4 files changed, 94 insertions(+), 5 deletions(-) > >> > >>diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > >>index 9eda69e..ce6da77 100644 > >>--- a/drivers/vhost/net.c > >>+++ b/drivers/vhost/net.c > >>@@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info > >>*ubuf, bool success) > >>rcu_read_unlock_bh(); > >> } > >> > >>+static inline unsigned long busy_clock(void) > >>+{ > >>+ return local_clock() >> 10; > >>+} > >>+ > >>+static bool vhost_can_busy_poll(struct vhost_dev *dev, > >>+ unsigned long endtime) > >>+{ > >>+ return likely(!need_resched()) && > >>+ likely(!time_after(busy_clock(), endtime)) && > >>+ likely(!signal_pending(current)) && > >>+ !vhost_has_work(dev) && > >>+ single_task_running(); > >>+} > >>+ > >>+static int vhost_net_tx_get_vq_desc(struct vhost_net *net, > >>+ struct vhost_virtqueue *vq, > >>+ struct iovec iov[], unsigned int iov_size, > >>+ unsigned int *out_num, unsigned int *in_num) > >>+{ > >>+ unsigned long uninitialized_var(endtime); > >>+ > >>+ if (vq->busyloop_timeout) { > >>+ preempt_disable(); > >>+ endtime = busy_clock() + vq->busyloop_timeout; > >>+ while (vhost_can_busy_poll(vq->dev, endtime) && > >>+ !vhost_vq_more_avail(vq->dev, vq)) > >>+ cpu_relax(); > >>+ preempt_enable(); > >>+ } > > > >Isn't there a way to call all this after vhost_get_vq_desc? > >First, this will reduce the good path overhead as you > >won't have to play with timers and preemption. > > > >Second, this will reduce the chance of a pagefault on avail ring read. > > > >>+ > >>+ return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), > >>+out_num, in_num, NULL, NULL); > >>+} > >>+ > >> /* Expects to be always run from workqueue - which acts as > >> * read-size critical section for our kind of RCU. */ > >> static void handle_tx(struct vhost_net *net) > >>@@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) > >> % UIO_MAXIOV == nvq->done_idx)) > >>break; > >> > >>- head = vhost_get_vq_desc(vq, vq->iov, > >>-ARRAY_SIZE(vq->iov), > >>-, , > >>-NULL, NULL); > >>+ head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, > >>+ ARRAY_SIZE(vq->iov), > >>+ , ); > >>/* On error, stop handling until the next kick. */ > >>if (unlikely(head < 0)) > >>break; > >>@@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) > >>return len; > >> } > >> > >>+static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) > > > >Need a hint that it's rx related in the name. > > > >>+{ > >>+ struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX]; > >>+ struct vhost_virtqueue *vq = >vq; > >>+ unsigned long uninitialized_var(endtime); > >>+ > >>+ if (vq->busyloop_timeout) { > >>+ mutex_lock(>mutex); > > > >This appears to be called under vq mutex in handle_rx. > >So how does this work then? > > > > > >>+ vhost_disable_notify(>dev, vq); > > > >This appears to be called after disable notify > >in handle_rx - so why disable here again? > > > >>+ > >>+ preempt_disable(); > >>+ endtime = busy_clock() + vq->busyloop_timeout; > >>+ > >>+ while (vhost_can_busy_poll(>dev, endtime) && > >>+ skb_queue_empty(>sk_receive_queue) && > >>+ !vhost_vq_more_avail(>dev, vq)) > >>+ cpu_relax(); > > > >This seems to mix in several items. > >RX queue is normally not empty. I don't think > >we need to poll for that. > > I have seen the RX queue is easy to be empty under some extreme conditions > like lots of small packet. So maybe the check is useful here. It's not useful *here*. If you have an rx packet but no space in the ring, this will exit immediately. It might be useful elsewhere but I doubt it - if rx ring is out of buffers, you are
Re: [PATCH V2 3/3] vhost_net: basic polling support
On 2016/1/21 13:13, Michael S. Tsirkin wrote: On Thu, Jan 21, 2016 at 10:11:35AM +0800, Yang Zhang wrote: On 2016/1/20 22:35, Michael S. Tsirkin wrote: On Tue, Dec 01, 2015 at 02:39:45PM +0800, Jason Wang wrote: This patch tries to poll for new added tx buffer or socket receive queue for a while at the end of tx/rx processing. The maximum time spent on polling were specified through a new kind of vring ioctl. Signed-off-by: Jason Wang--- drivers/vhost/net.c| 72 ++ drivers/vhost/vhost.c | 15 ++ drivers/vhost/vhost.h | 1 + include/uapi/linux/vhost.h | 11 +++ 4 files changed, 94 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9eda69e..ce6da77 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) rcu_read_unlock_bh(); } +static inline unsigned long busy_clock(void) +{ + return local_clock() >> 10; +} + +static bool vhost_can_busy_poll(struct vhost_dev *dev, + unsigned long endtime) +{ + return likely(!need_resched()) && + likely(!time_after(busy_clock(), endtime)) && + likely(!signal_pending(current)) && + !vhost_has_work(dev) && + single_task_running(); +} + +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num) +{ + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + while (vhost_can_busy_poll(vq->dev, endtime) && + !vhost_vq_more_avail(vq->dev, vq)) + cpu_relax(); + preempt_enable(); + } Isn't there a way to call all this after vhost_get_vq_desc? First, this will reduce the good path overhead as you won't have to play with timers and preemption. Second, this will reduce the chance of a pagefault on avail ring read. + + return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), +out_num, in_num, NULL, NULL); +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV == nvq->done_idx)) break; - head = vhost_get_vq_desc(vq, vq->iov, -ARRAY_SIZE(vq->iov), -, , -NULL, NULL); + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, + ARRAY_SIZE(vq->iov), + , ); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) return len; } +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) Need a hint that it's rx related in the name. +{ + struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = >vq; + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + mutex_lock(>mutex); This appears to be called under vq mutex in handle_rx. So how does this work then? + vhost_disable_notify(>dev, vq); This appears to be called after disable notify in handle_rx - so why disable here again? + + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + + while (vhost_can_busy_poll(>dev, endtime) && + skb_queue_empty(>sk_receive_queue) && + !vhost_vq_more_avail(>dev, vq)) + cpu_relax(); This seems to mix in several items. RX queue is normally not empty. I don't think we need to poll for that. I have seen the RX queue is easy to be empty under some extreme conditions like lots of small packet. So maybe the check is useful here. It's not useful *here*. If you have an rx packet but no space in the ring, this will exit immediately. Indeed! It might be useful elsewhere but I doubt it - if rx ring is out of buffers, you are better off backing out and giving guest some breathing space. -- best regards yang -- best regards yang
[PATCH V2 3/3] vhost_net: basic polling support
This patch tries to poll for new added tx buffer or socket receive queue for a while at the end of tx/rx processing. The maximum time spent on polling were specified through a new kind of vring ioctl. Signed-off-by: Jason Wang --- drivers/vhost/net.c| 72 ++ drivers/vhost/vhost.c | 15 ++ drivers/vhost/vhost.h | 1 + include/uapi/linux/vhost.h | 11 +++ 4 files changed, 94 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9eda69e..ce6da77 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) rcu_read_unlock_bh(); } +static inline unsigned long busy_clock(void) +{ + return local_clock() >> 10; +} + +static bool vhost_can_busy_poll(struct vhost_dev *dev, + unsigned long endtime) +{ + return likely(!need_resched()) && + likely(!time_after(busy_clock(), endtime)) && + likely(!signal_pending(current)) && + !vhost_has_work(dev) && + single_task_running(); +} + +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num) +{ + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + while (vhost_can_busy_poll(vq->dev, endtime) && + !vhost_vq_more_avail(vq->dev, vq)) + cpu_relax(); + preempt_enable(); + } + + return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), +out_num, in_num, NULL, NULL); +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV == nvq->done_idx)) break; - head = vhost_get_vq_desc(vq, vq->iov, -ARRAY_SIZE(vq->iov), -, , -NULL, NULL); + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, + ARRAY_SIZE(vq->iov), + , ); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) return len; } +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) +{ + struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = >vq; + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + mutex_lock(>mutex); + vhost_disable_notify(>dev, vq); + + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + + while (vhost_can_busy_poll(>dev, endtime) && + skb_queue_empty(>sk_receive_queue) && + !vhost_vq_more_avail(>dev, vq)) + cpu_relax(); + + preempt_enable(); + + if (vhost_enable_notify(>dev, vq)) + vhost_poll_queue(>poll); + mutex_unlock(>mutex); + } + + return peek_head_len(sk); +} + /* This is a multi-buffer version of vhost_get_desc, that works if * vq has read descriptors only. * @vq - the relevant virtqueue @@ -553,7 +615,7 @@ static void handle_rx(struct vhost_net *net) vq->log : NULL; mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); - while ((sock_len = peek_head_len(sock->sk))) { + while ((sock_len = vhost_net_peek_head_len(net, sock->sk))) { sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads, vhost_len, diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 4f45a03..b8ca873 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -285,6 +285,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->memory = NULL; vq->is_le = virtio_legacy_is_little_endian(); vhost_vq_reset_user_be(vq); + vq->busyloop_timeout = 0; } static int vhost_worker(void *data) @@ -747,6 +748,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) struct vhost_vring_state s;
[PATCH V2 3/3] vhost_net: basic polling support
This patch tries to poll for new added tx buffer or socket receive queue for a while at the end of tx/rx processing. The maximum time spent on polling were specified through a new kind of vring ioctl. Signed-off-by: Jason Wang--- drivers/vhost/net.c| 72 ++ drivers/vhost/vhost.c | 15 ++ drivers/vhost/vhost.h | 1 + include/uapi/linux/vhost.h | 11 +++ 4 files changed, 94 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9eda69e..ce6da77 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -287,6 +287,41 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) rcu_read_unlock_bh(); } +static inline unsigned long busy_clock(void) +{ + return local_clock() >> 10; +} + +static bool vhost_can_busy_poll(struct vhost_dev *dev, + unsigned long endtime) +{ + return likely(!need_resched()) && + likely(!time_after(busy_clock(), endtime)) && + likely(!signal_pending(current)) && + !vhost_has_work(dev) && + single_task_running(); +} + +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num) +{ + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + while (vhost_can_busy_poll(vq->dev, endtime) && + !vhost_vq_more_avail(vq->dev, vq)) + cpu_relax(); + preempt_enable(); + } + + return vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), +out_num, in_num, NULL, NULL); +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -331,10 +366,9 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV == nvq->done_idx)) break; - head = vhost_get_vq_desc(vq, vq->iov, -ARRAY_SIZE(vq->iov), -, , -NULL, NULL); + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, + ARRAY_SIZE(vq->iov), + , ); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; @@ -435,6 +469,34 @@ static int peek_head_len(struct sock *sk) return len; } +static int vhost_net_peek_head_len(struct vhost_net *net, struct sock *sk) +{ + struct vhost_net_virtqueue *nvq = >vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = >vq; + unsigned long uninitialized_var(endtime); + + if (vq->busyloop_timeout) { + mutex_lock(>mutex); + vhost_disable_notify(>dev, vq); + + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + + while (vhost_can_busy_poll(>dev, endtime) && + skb_queue_empty(>sk_receive_queue) && + !vhost_vq_more_avail(>dev, vq)) + cpu_relax(); + + preempt_enable(); + + if (vhost_enable_notify(>dev, vq)) + vhost_poll_queue(>poll); + mutex_unlock(>mutex); + } + + return peek_head_len(sk); +} + /* This is a multi-buffer version of vhost_get_desc, that works if * vq has read descriptors only. * @vq - the relevant virtqueue @@ -553,7 +615,7 @@ static void handle_rx(struct vhost_net *net) vq->log : NULL; mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); - while ((sock_len = peek_head_len(sock->sk))) { + while ((sock_len = vhost_net_peek_head_len(net, sock->sk))) { sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads, vhost_len, diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 4f45a03..b8ca873 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -285,6 +285,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->memory = NULL; vq->is_le = virtio_legacy_is_little_endian(); vhost_vq_reset_user_be(vq); + vq->busyloop_timeout = 0; } static int vhost_worker(void *data) @@ -747,6 +748,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) struct