Re: [PATCH v2 10/13] xen/pvcalls: implement poll command

2017-07-26 Thread Stefano Stabellini
On Wed, 26 Jul 2017, Boris Ostrovsky wrote:
> On 07/25/2017 05:22 PM, Stefano Stabellini wrote:
> > For active sockets, check the indexes and use the inflight_conn_req
> > waitqueue to wait.
> >
> > For passive sockets, send PVCALLS_POLL to the backend. Use the
> > inflight_accept_req waitqueue if an accept is outstanding. Otherwise use
> > the inflight_req waitqueue: inflight_req is awaken when a new response
> > is received; on wakeup we check whether the POLL response is arrived by
> > looking at the PVCALLS_FLAG_POLL_RET flag. We set the flag from
> > pvcalls_front_event_handler, if the response was for a POLL command.
> >
> > In pvcalls_front_event_handler, get the struct socket pointer from the
> > poll id (we previously converted struct socket* to uint64_t and used it
> > as id).
> >
> > Signed-off-by: Stefano Stabellini 
> > CC: boris.ostrov...@oracle.com
> > CC: jgr...@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 134 
> > 
> >  drivers/xen/pvcalls-front.h |   3 +
> >  2 files changed, 126 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index b4ca569..833b717 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -130,17 +130,35 @@ static irqreturn_t pvcalls_front_event_handler(int 
> > irq, void *dev_id)
> > rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
> >  
> > req_id = rsp->req_id;
> > -   src = (uint8_t *)&bedata->rsp[req_id];
> > -   src += sizeof(rsp->req_id);
> > -   dst = (uint8_t *)rsp;
> > -   dst += sizeof(rsp->req_id);
> > -   memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> > -   /*
> > -* First copy the rest of the data, then req_id. It is
> > -* paired with the barrier when accessing bedata->rsp.
> > -*/
> > -   smp_wmb();
> > -   WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> > +   if (rsp->cmd == PVCALLS_POLL) {
> > +   struct socket *sock = (struct socket *) rsp->u.poll.id;
> > +   struct sock_mapping *map =
> > +   (struct sock_mapping *)
> > +   READ_ONCE(sock->sk->sk_send_head);
> > +
> > +   set_bit(PVCALLS_FLAG_POLL_RET,
> > +   (void *)&map->passive.flags);
> > +   /*
> > +* Set RET, then clear INFLIGHT. It pairs with
> > +* the checks at the beginning of
> > +* pvcalls_front_poll_passive.
> > +*/
> > +   smp_wmb();
> 
> virt_wmb() (here, below, and I think I saw it somewhere else)

This smp_wmb is an internal barrier to the frontend, it doesn't
synchronize with the backend. It synchronizes only within the frontend.
A uniprocessor frontend wouldn't actually need this barrier. I admit it
is a bit confusing, I'll add a comment about this.


> > +   clear_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> > + (void *)&map->passive.flags);
> > +   } else {
> > +   src = (uint8_t *)&bedata->rsp[req_id];
> > +   src += sizeof(rsp->req_id);
> > +   dst = (uint8_t *)rsp;
> > +   dst += sizeof(rsp->req_id);
> > +   memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> > +   /*
> > +* First copy the rest of the data, then req_id. It is
> > +* paired with the barrier when accessing bedata->rsp.
> > +*/
> > +   smp_wmb();
> > +   WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> > +   }
> >  
> > done = 1;
> > bedata->ring.rsp_cons++;
> > @@ -707,6 +725,100 @@ int pvcalls_front_accept(struct socket *sock, struct 
> > socket *newsock, int flags)
> > return ret;
> >  }
> >  
> > +static unsigned int pvcalls_front_poll_passive(struct file *file,
> > +  struct pvcalls_bedata *bedata,
> > +  struct sock_mapping *map,
> > +  poll_table *wait)
> > +{
> > +   int notify, req_id;
> > +   struct xen_pvcalls_request *req;
> > +
> > +   if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> > +(void *)&map->passive.flags)) {
> > +   poll_wait(file, &map->passive.inflight_accept_req, wait);
> > +   return 0;
> > +   }
> > +
> > +   if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET,
> > +  (void *)&map->passive.flags))
> > +   return POLLIN;
> > +
> > +   /*
> > +* First check RET, then INFLIGHT. No barriers necessary to
> > +* ensure execution ordering because of the conditional
> > +* instructions creating control depende

Re: [PATCH v2 10/13] xen/pvcalls: implement poll command

2017-07-26 Thread Boris Ostrovsky
On 07/25/2017 05:22 PM, Stefano Stabellini wrote:
> For active sockets, check the indexes and use the inflight_conn_req
> waitqueue to wait.
>
> For passive sockets, send PVCALLS_POLL to the backend. Use the
> inflight_accept_req waitqueue if an accept is outstanding. Otherwise use
> the inflight_req waitqueue: inflight_req is awaken when a new response
> is received; on wakeup we check whether the POLL response is arrived by
> looking at the PVCALLS_FLAG_POLL_RET flag. We set the flag from
> pvcalls_front_event_handler, if the response was for a POLL command.
>
> In pvcalls_front_event_handler, get the struct socket pointer from the
> poll id (we previously converted struct socket* to uint64_t and used it
> as id).
>
> Signed-off-by: Stefano Stabellini 
> CC: boris.ostrov...@oracle.com
> CC: jgr...@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 134 
> 
>  drivers/xen/pvcalls-front.h |   3 +
>  2 files changed, 126 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index b4ca569..833b717 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -130,17 +130,35 @@ static irqreturn_t pvcalls_front_event_handler(int irq, 
> void *dev_id)
>   rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
>  
>   req_id = rsp->req_id;
> - src = (uint8_t *)&bedata->rsp[req_id];
> - src += sizeof(rsp->req_id);
> - dst = (uint8_t *)rsp;
> - dst += sizeof(rsp->req_id);
> - memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> - /*
> -  * First copy the rest of the data, then req_id. It is
> -  * paired with the barrier when accessing bedata->rsp.
> -  */
> - smp_wmb();
> - WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> + if (rsp->cmd == PVCALLS_POLL) {
> + struct socket *sock = (struct socket *) rsp->u.poll.id;
> + struct sock_mapping *map =
> + (struct sock_mapping *)
> + READ_ONCE(sock->sk->sk_send_head);
> +
> + set_bit(PVCALLS_FLAG_POLL_RET,
> + (void *)&map->passive.flags);
> + /*
> +  * Set RET, then clear INFLIGHT. It pairs with
> +  * the checks at the beginning of
> +  * pvcalls_front_poll_passive.
> +  */
> + smp_wmb();

virt_wmb() (here, below, and I think I saw it somewhere else)

> + clear_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> +   (void *)&map->passive.flags);
> + } else {
> + src = (uint8_t *)&bedata->rsp[req_id];
> + src += sizeof(rsp->req_id);
> + dst = (uint8_t *)rsp;
> + dst += sizeof(rsp->req_id);
> + memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> + /*
> +  * First copy the rest of the data, then req_id. It is
> +  * paired with the barrier when accessing bedata->rsp.
> +  */
> + smp_wmb();
> + WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> + }
>  
>   done = 1;
>   bedata->ring.rsp_cons++;
> @@ -707,6 +725,100 @@ int pvcalls_front_accept(struct socket *sock, struct 
> socket *newsock, int flags)
>   return ret;
>  }
>  
> +static unsigned int pvcalls_front_poll_passive(struct file *file,
> +struct pvcalls_bedata *bedata,
> +struct sock_mapping *map,
> +poll_table *wait)
> +{
> + int notify, req_id;
> + struct xen_pvcalls_request *req;
> +
> + if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> +  (void *)&map->passive.flags)) {
> + poll_wait(file, &map->passive.inflight_accept_req, wait);
> + return 0;
> + }
> +
> + if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET,
> +(void *)&map->passive.flags))
> + return POLLIN;
> +
> + /*
> +  * First check RET, then INFLIGHT. No barriers necessary to
> +  * ensure execution ordering because of the conditional
> +  * instructions creating control dependencies.
> +  */
> +
> + if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> +  (void *)&map->passive.flags)) {
> + poll_wait(file, &bedata->inflight_req, wait);
> + return 0;
> + }
> +
> + spin_lock(&bedata->pvcallss_lock);
> + req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> + if (RING_FULL(&bedata-

[PATCH v2 10/13] xen/pvcalls: implement poll command

2017-07-25 Thread Stefano Stabellini
For active sockets, check the indexes and use the inflight_conn_req
waitqueue to wait.

For passive sockets, send PVCALLS_POLL to the backend. Use the
inflight_accept_req waitqueue if an accept is outstanding. Otherwise use
the inflight_req waitqueue: inflight_req is awaken when a new response
is received; on wakeup we check whether the POLL response is arrived by
looking at the PVCALLS_FLAG_POLL_RET flag. We set the flag from
pvcalls_front_event_handler, if the response was for a POLL command.

In pvcalls_front_event_handler, get the struct socket pointer from the
poll id (we previously converted struct socket* to uint64_t and used it
as id).

Signed-off-by: Stefano Stabellini 
CC: boris.ostrov...@oracle.com
CC: jgr...@suse.com
---
 drivers/xen/pvcalls-front.c | 134 
 drivers/xen/pvcalls-front.h |   3 +
 2 files changed, 126 insertions(+), 11 deletions(-)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index b4ca569..833b717 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -130,17 +130,35 @@ static irqreturn_t pvcalls_front_event_handler(int irq, 
void *dev_id)
rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
 
req_id = rsp->req_id;
-   src = (uint8_t *)&bedata->rsp[req_id];
-   src += sizeof(rsp->req_id);
-   dst = (uint8_t *)rsp;
-   dst += sizeof(rsp->req_id);
-   memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
-   /*
-* First copy the rest of the data, then req_id. It is
-* paired with the barrier when accessing bedata->rsp.
-*/
-   smp_wmb();
-   WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
+   if (rsp->cmd == PVCALLS_POLL) {
+   struct socket *sock = (struct socket *) rsp->u.poll.id;
+   struct sock_mapping *map =
+   (struct sock_mapping *)
+   READ_ONCE(sock->sk->sk_send_head);
+
+   set_bit(PVCALLS_FLAG_POLL_RET,
+   (void *)&map->passive.flags);
+   /*
+* Set RET, then clear INFLIGHT. It pairs with
+* the checks at the beginning of
+* pvcalls_front_poll_passive.
+*/
+   smp_wmb();
+   clear_bit(PVCALLS_FLAG_POLL_INFLIGHT,
+ (void *)&map->passive.flags);
+   } else {
+   src = (uint8_t *)&bedata->rsp[req_id];
+   src += sizeof(rsp->req_id);
+   dst = (uint8_t *)rsp;
+   dst += sizeof(rsp->req_id);
+   memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
+   /*
+* First copy the rest of the data, then req_id. It is
+* paired with the barrier when accessing bedata->rsp.
+*/
+   smp_wmb();
+   WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
+   }
 
done = 1;
bedata->ring.rsp_cons++;
@@ -707,6 +725,100 @@ int pvcalls_front_accept(struct socket *sock, struct 
socket *newsock, int flags)
return ret;
 }
 
+static unsigned int pvcalls_front_poll_passive(struct file *file,
+  struct pvcalls_bedata *bedata,
+  struct sock_mapping *map,
+  poll_table *wait)
+{
+   int notify, req_id;
+   struct xen_pvcalls_request *req;
+
+   if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
+(void *)&map->passive.flags)) {
+   poll_wait(file, &map->passive.inflight_accept_req, wait);
+   return 0;
+   }
+
+   if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET,
+  (void *)&map->passive.flags))
+   return POLLIN;
+
+   /*
+* First check RET, then INFLIGHT. No barriers necessary to
+* ensure execution ordering because of the conditional
+* instructions creating control dependencies.
+*/
+
+   if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT,
+(void *)&map->passive.flags)) {
+   poll_wait(file, &bedata->inflight_req, wait);
+   return 0;
+   }
+
+   spin_lock(&bedata->pvcallss_lock);
+   req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+   if (RING_FULL(&bedata->ring) ||
+   READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+   spin_unlock(&bedata->pvcallss_lock);
+   return -EAGAIN;
+   }
+   req = RIN