Send PVCALLS_BIND to the backend. Introduce a new structure, part of struct sock_mapping, to store information specific to passive sockets.
Introduce a status field to keep track of the status of the passive socket. Introduce a waitqueue for the "accept" command (see the accept command implementation): it is used to allow only one outstanding accept command at any given time and to implement polling on the passive socket. Introduce a flags field to keep track of in-flight accept and poll commands. sock->sk->sk_send_head is not used for ip sockets: reuse the field to store a pointer to the struct sock_mapping corresponding to the socket. Convert the struct socket pointer into an uint64_t and use it as id for the socket to pass to the backend. Signed-off-by: Stefano Stabellini <stef...@aporeto.com> CC: boris.ostrov...@oracle.com CC: jgr...@suse.com --- drivers/xen/pvcalls-front.c | 74 +++++++++++++++++++++++++++++++++++++++++++++ drivers/xen/pvcalls-front.h | 3 ++ 2 files changed, 77 insertions(+) diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 0d305e0..71619bc 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -57,6 +57,23 @@ struct sock_mapping { wait_queue_head_t inflight_conn_req; } active; + struct { + /* Socket status */ +#define PVCALLS_STATUS_UNINITALIZED 0 +#define PVCALLS_STATUS_BIND 1 +#define PVCALLS_STATUS_LISTEN 2 + uint8_t status; + /* + * Internal state-machine flags. + * Only one accept operation can be inflight for a socket. + * Only one poll operation can be inflight for a given socket. + */ +#define PVCALLS_FLAG_ACCEPT_INFLIGHT 0 +#define PVCALLS_FLAG_POLL_INFLIGHT 1 +#define PVCALLS_FLAG_POLL_RET 2 + uint8_t flags; + wait_queue_head_t inflight_accept_req; + } passive; }; }; @@ -287,6 +304,63 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr, return ret; } +int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +{ + struct pvcalls_bedata *bedata; + struct sock_mapping *map = NULL; + struct xen_pvcalls_request *req; + int notify, req_id, ret; + + if (!pvcalls_front_dev) + return -ENOTCONN; + if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM) + return -ENOTSUPP; + bedata = dev_get_drvdata(&pvcalls_front_dev->dev); + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (map == NULL) + return -ENOMEM; + + spin_lock(&bedata->pvcallss_lock); + req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1); + BUG_ON(req_id >= PVCALLS_NR_REQ_PER_RING); + if (RING_FULL(&bedata->ring) || + READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) { + kfree(map); + spin_unlock(&bedata->pvcallss_lock); + return -EAGAIN; + } + req = RING_GET_REQUEST(&bedata->ring, req_id); + req->req_id = req_id; + map->sock = sock; + req->cmd = PVCALLS_BIND; + req->u.bind.id = (uint64_t) sock; + memcpy(req->u.bind.addr, addr, sizeof(*addr)); + req->u.bind.len = addr_len; + + init_waitqueue_head(&map->passive.inflight_accept_req); + + list_add_tail(&map->list, &bedata->socketpass_mappings); + WRITE_ONCE(sock->sk->sk_send_head, (void *)map); + map->active_socket = false; + + bedata->ring.req_prod_pvt++; + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify); + spin_unlock(&bedata->pvcallss_lock); + if (notify) + notify_remote_via_irq(bedata->irq); + + wait_event(bedata->inflight_req, + READ_ONCE(bedata->rsp[req_id].req_id) == req_id); + + map->passive.status = PVCALLS_STATUS_BIND; + ret = bedata->rsp[req_id].ret; + /* read ret, then set this rsp slot to be reused */ + smp_mb(); + WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID); + return 0; +} + static const struct xenbus_device_id pvcalls_front_ids[] = { { "pvcalls" }, { "" } diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h index 63b0417..8b0a274 100644 --- a/drivers/xen/pvcalls-front.h +++ b/drivers/xen/pvcalls-front.h @@ -6,5 +6,8 @@ int pvcalls_front_socket(struct socket *sock); int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr, int addr_len, int flags); +int pvcalls_front_bind(struct socket *sock, + struct sockaddr *addr, + int addr_len); #endif -- 1.9.1