Re: [PATCH v2 10/13] xen/pvcalls: implement poll command
On Wed, 26 Jul 2017, Boris Ostrovsky wrote: > On 07/25/2017 05:22 PM, Stefano Stabellini wrote: > > For active sockets, check the indexes and use the inflight_conn_req > > waitqueue to wait. > > > > For passive sockets, send PVCALLS_POLL to the backend. Use the > > inflight_accept_req waitqueue if an accept is outstanding. Otherwise use > > the inflight_req waitqueue: inflight_req is awaken when a new response > > is received; on wakeup we check whether the POLL response is arrived by > > looking at the PVCALLS_FLAG_POLL_RET flag. We set the flag from > > pvcalls_front_event_handler, if the response was for a POLL command. > > > > In pvcalls_front_event_handler, get the struct socket pointer from the > > poll id (we previously converted struct socket* to uint64_t and used it > > as id). > > > > Signed-off-by: Stefano Stabellini > > CC: boris.ostrov...@oracle.com > > CC: jgr...@suse.com > > --- > > drivers/xen/pvcalls-front.c | 134 > > > > drivers/xen/pvcalls-front.h | 3 + > > 2 files changed, 126 insertions(+), 11 deletions(-) > > > > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c > > index b4ca569..833b717 100644 > > --- a/drivers/xen/pvcalls-front.c > > +++ b/drivers/xen/pvcalls-front.c > > @@ -130,17 +130,35 @@ static irqreturn_t pvcalls_front_event_handler(int > > irq, void *dev_id) > > rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons); > > > > req_id = rsp->req_id; > > - src = (uint8_t *)&bedata->rsp[req_id]; > > - src += sizeof(rsp->req_id); > > - dst = (uint8_t *)rsp; > > - dst += sizeof(rsp->req_id); > > - memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id)); > > - /* > > -* First copy the rest of the data, then req_id. It is > > -* paired with the barrier when accessing bedata->rsp. > > -*/ > > - smp_wmb(); > > - WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id); > > + if (rsp->cmd == PVCALLS_POLL) { > > + struct socket *sock = (struct socket *) rsp->u.poll.id; > > + struct sock_mapping *map = > > + (struct sock_mapping *) > > + READ_ONCE(sock->sk->sk_send_head); > > + > > + set_bit(PVCALLS_FLAG_POLL_RET, > > + (void *)&map->passive.flags); > > + /* > > +* Set RET, then clear INFLIGHT. It pairs with > > +* the checks at the beginning of > > +* pvcalls_front_poll_passive. > > +*/ > > + smp_wmb(); > > virt_wmb() (here, below, and I think I saw it somewhere else) This smp_wmb is an internal barrier to the frontend, it doesn't synchronize with the backend. It synchronizes only within the frontend. A uniprocessor frontend wouldn't actually need this barrier. I admit it is a bit confusing, I'll add a comment about this. > > + clear_bit(PVCALLS_FLAG_POLL_INFLIGHT, > > + (void *)&map->passive.flags); > > + } else { > > + src = (uint8_t *)&bedata->rsp[req_id]; > > + src += sizeof(rsp->req_id); > > + dst = (uint8_t *)rsp; > > + dst += sizeof(rsp->req_id); > > + memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id)); > > + /* > > +* First copy the rest of the data, then req_id. It is > > +* paired with the barrier when accessing bedata->rsp. > > +*/ > > + smp_wmb(); > > + WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id); > > + } > > > > done = 1; > > bedata->ring.rsp_cons++; > > @@ -707,6 +725,100 @@ int pvcalls_front_accept(struct socket *sock, struct > > socket *newsock, int flags) > > return ret; > > } > > > > +static unsigned int pvcalls_front_poll_passive(struct file *file, > > + struct pvcalls_bedata *bedata, > > + struct sock_mapping *map, > > + poll_table *wait) > > +{ > > + int notify, req_id; > > + struct xen_pvcalls_request *req; > > + > > + if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, > > +(void *)&map->passive.flags)) { > > + poll_wait(file, &map->passive.inflight_accept_req, wait); > > + return 0; > > + } > > + > > + if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET, > > + (void *)&map->passive.flags)) > > + return POLLIN; > > + > > + /* > > +* First check RET, then INFLIGHT. No barriers necessary to > > +* ensure execution ordering because of the conditional > > +* instructions creating control depende
Re: [PATCH v2 10/13] xen/pvcalls: implement poll command
On 07/25/2017 05:22 PM, Stefano Stabellini wrote: > For active sockets, check the indexes and use the inflight_conn_req > waitqueue to wait. > > For passive sockets, send PVCALLS_POLL to the backend. Use the > inflight_accept_req waitqueue if an accept is outstanding. Otherwise use > the inflight_req waitqueue: inflight_req is awaken when a new response > is received; on wakeup we check whether the POLL response is arrived by > looking at the PVCALLS_FLAG_POLL_RET flag. We set the flag from > pvcalls_front_event_handler, if the response was for a POLL command. > > In pvcalls_front_event_handler, get the struct socket pointer from the > poll id (we previously converted struct socket* to uint64_t and used it > as id). > > Signed-off-by: Stefano Stabellini > CC: boris.ostrov...@oracle.com > CC: jgr...@suse.com > --- > drivers/xen/pvcalls-front.c | 134 > > drivers/xen/pvcalls-front.h | 3 + > 2 files changed, 126 insertions(+), 11 deletions(-) > > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c > index b4ca569..833b717 100644 > --- a/drivers/xen/pvcalls-front.c > +++ b/drivers/xen/pvcalls-front.c > @@ -130,17 +130,35 @@ static irqreturn_t pvcalls_front_event_handler(int irq, > void *dev_id) > rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons); > > req_id = rsp->req_id; > - src = (uint8_t *)&bedata->rsp[req_id]; > - src += sizeof(rsp->req_id); > - dst = (uint8_t *)rsp; > - dst += sizeof(rsp->req_id); > - memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id)); > - /* > - * First copy the rest of the data, then req_id. It is > - * paired with the barrier when accessing bedata->rsp. > - */ > - smp_wmb(); > - WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id); > + if (rsp->cmd == PVCALLS_POLL) { > + struct socket *sock = (struct socket *) rsp->u.poll.id; > + struct sock_mapping *map = > + (struct sock_mapping *) > + READ_ONCE(sock->sk->sk_send_head); > + > + set_bit(PVCALLS_FLAG_POLL_RET, > + (void *)&map->passive.flags); > + /* > + * Set RET, then clear INFLIGHT. It pairs with > + * the checks at the beginning of > + * pvcalls_front_poll_passive. > + */ > + smp_wmb(); virt_wmb() (here, below, and I think I saw it somewhere else) > + clear_bit(PVCALLS_FLAG_POLL_INFLIGHT, > + (void *)&map->passive.flags); > + } else { > + src = (uint8_t *)&bedata->rsp[req_id]; > + src += sizeof(rsp->req_id); > + dst = (uint8_t *)rsp; > + dst += sizeof(rsp->req_id); > + memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id)); > + /* > + * First copy the rest of the data, then req_id. It is > + * paired with the barrier when accessing bedata->rsp. > + */ > + smp_wmb(); > + WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id); > + } > > done = 1; > bedata->ring.rsp_cons++; > @@ -707,6 +725,100 @@ int pvcalls_front_accept(struct socket *sock, struct > socket *newsock, int flags) > return ret; > } > > +static unsigned int pvcalls_front_poll_passive(struct file *file, > +struct pvcalls_bedata *bedata, > +struct sock_mapping *map, > +poll_table *wait) > +{ > + int notify, req_id; > + struct xen_pvcalls_request *req; > + > + if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, > + (void *)&map->passive.flags)) { > + poll_wait(file, &map->passive.inflight_accept_req, wait); > + return 0; > + } > + > + if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET, > +(void *)&map->passive.flags)) > + return POLLIN; > + > + /* > + * First check RET, then INFLIGHT. No barriers necessary to > + * ensure execution ordering because of the conditional > + * instructions creating control dependencies. > + */ > + > + if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT, > + (void *)&map->passive.flags)) { > + poll_wait(file, &bedata->inflight_req, wait); > + return 0; > + } > + > + spin_lock(&bedata->pvcallss_lock); > + req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1); > + if (RING_FULL(&bedata-
[PATCH v2 10/13] xen/pvcalls: implement poll command
For active sockets, check the indexes and use the inflight_conn_req waitqueue to wait. For passive sockets, send PVCALLS_POLL to the backend. Use the inflight_accept_req waitqueue if an accept is outstanding. Otherwise use the inflight_req waitqueue: inflight_req is awaken when a new response is received; on wakeup we check whether the POLL response is arrived by looking at the PVCALLS_FLAG_POLL_RET flag. We set the flag from pvcalls_front_event_handler, if the response was for a POLL command. In pvcalls_front_event_handler, get the struct socket pointer from the poll id (we previously converted struct socket* to uint64_t and used it as id). Signed-off-by: Stefano Stabellini CC: boris.ostrov...@oracle.com CC: jgr...@suse.com --- drivers/xen/pvcalls-front.c | 134 drivers/xen/pvcalls-front.h | 3 + 2 files changed, 126 insertions(+), 11 deletions(-) diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index b4ca569..833b717 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -130,17 +130,35 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id) rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons); req_id = rsp->req_id; - src = (uint8_t *)&bedata->rsp[req_id]; - src += sizeof(rsp->req_id); - dst = (uint8_t *)rsp; - dst += sizeof(rsp->req_id); - memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id)); - /* -* First copy the rest of the data, then req_id. It is -* paired with the barrier when accessing bedata->rsp. -*/ - smp_wmb(); - WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id); + if (rsp->cmd == PVCALLS_POLL) { + struct socket *sock = (struct socket *) rsp->u.poll.id; + struct sock_mapping *map = + (struct sock_mapping *) + READ_ONCE(sock->sk->sk_send_head); + + set_bit(PVCALLS_FLAG_POLL_RET, + (void *)&map->passive.flags); + /* +* Set RET, then clear INFLIGHT. It pairs with +* the checks at the beginning of +* pvcalls_front_poll_passive. +*/ + smp_wmb(); + clear_bit(PVCALLS_FLAG_POLL_INFLIGHT, + (void *)&map->passive.flags); + } else { + src = (uint8_t *)&bedata->rsp[req_id]; + src += sizeof(rsp->req_id); + dst = (uint8_t *)rsp; + dst += sizeof(rsp->req_id); + memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id)); + /* +* First copy the rest of the data, then req_id. It is +* paired with the barrier when accessing bedata->rsp. +*/ + smp_wmb(); + WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id); + } done = 1; bedata->ring.rsp_cons++; @@ -707,6 +725,100 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) return ret; } +static unsigned int pvcalls_front_poll_passive(struct file *file, + struct pvcalls_bedata *bedata, + struct sock_mapping *map, + poll_table *wait) +{ + int notify, req_id; + struct xen_pvcalls_request *req; + + if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, +(void *)&map->passive.flags)) { + poll_wait(file, &map->passive.inflight_accept_req, wait); + return 0; + } + + if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET, + (void *)&map->passive.flags)) + return POLLIN; + + /* +* First check RET, then INFLIGHT. No barriers necessary to +* ensure execution ordering because of the conditional +* instructions creating control dependencies. +*/ + + if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT, +(void *)&map->passive.flags)) { + poll_wait(file, &bedata->inflight_req, wait); + return 0; + } + + spin_lock(&bedata->pvcallss_lock); + req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1); + if (RING_FULL(&bedata->ring) || + READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) { + spin_unlock(&bedata->pvcallss_lock); + return -EAGAIN; + } + req = RIN