Il 22/10/2012 13:09, n...@bytemark.co.uk ha scritto: > diff --git a/block/nbd.c b/block/nbd.c > index 9536408..1caae89 100644 > --- a/block/nbd.c > +++ b/block/nbd.c > @@ -71,6 +71,9 @@ typedef struct BDRVNBDState { > char *host_spec; > } BDRVNBDState; > > +static int nbd_establish_connection(BDRVNBDState *s); > +static void nbd_teardown_connection(BDRVNBDState *s, bool send_disconnect); > + > static bool nbd_is_connected(BDRVNBDState *s) > { > return s->sock >= 0; > @@ -152,6 +155,20 @@ static int nbd_have_request(void *opaque) > return s->in_flight > 0; > } > > +static void nbd_abort_inflight_requests(BDRVNBDState *s) > +{ > + int i; > + Coroutine *co; > + > + s->reply.handle = 0; > + for (i = 0; i < MAX_NBD_REQUESTS; i++) { > + co = s->recv_coroutine[i]; > + if (co && co != qemu_coroutine_self()) { > + qemu_coroutine_enter(co, NULL); > + } > + } > +}
I think this is quite risky. Does it work if you shutdown(s, 2) the socket, then wait for the number of pending requests (not just those in_flight---also those that are waiting) to become 0, and only then close it? (For the wait you can add another Coroutine * field to BDRVNBDState, and reenter it in nbd_coroutine_end if the number of pending requests becomes zero). > static void nbd_reply_ready(void *opaque) > { > BDRVNBDState *s = opaque; > @@ -168,8 +185,9 @@ static void nbd_reply_ready(void *opaque) > return; > } > if (ret < 0) { > - s->reply.handle = 0; > - goto fail; > + nbd_teardown_connection(s, false); > + nbd_abort_inflight_requests(s); This is also problematic because you first close the socket, which means that something else can grab the file descriptor. But the original file descriptor is in use (in qemu_co_recvv or qemu_co_sendv) until after nbd_abort_inflight_requests returns. So the correct order would be something like this: assert(nbd_is_connected(s)); shutdown(s->sock, 2); nbd_abort_inflight_requests(s); nbd_teardown_connection(s, false); where (if my theory is right) the shutdown should immediately cause the socket to become readable and writable. > + return; > } > } > > @@ -178,20 +196,15 @@ static void nbd_reply_ready(void *opaque) > * one coroutine is called until the reply finishes. */ > i = HANDLE_TO_INDEX(s, s->reply.handle); > if (i >= MAX_NBD_REQUESTS) { > - goto fail; > + nbd_teardown_connection(s, false); > + nbd_abort_inflight_requests(s); > + return; > } > > if (s->recv_coroutine[i]) { > qemu_coroutine_enter(s->recv_coroutine[i], NULL); > return; > } > - > -fail: > - for (i = 0; i < MAX_NBD_REQUESTS; i++) { > - if (s->recv_coroutine[i]) { > - qemu_coroutine_enter(s->recv_coroutine[i], NULL); > - } > - } > } > > static void nbd_restart_write(void *opaque) > @@ -206,6 +219,13 @@ static int nbd_co_send_request(BDRVNBDState *s, struct > nbd_request *request, > int rc, ret; > > qemu_co_mutex_lock(&s->send_mutex); > + > + if (!nbd_is_connected(s) && nbd_establish_connection(s) != 0) { > + nbd_abort_inflight_requests(s); > + qemu_co_mutex_unlock(&s->send_mutex); > + return -EIO; Do you really need to abort all the requests, or only this one? Paolo > + } > + > s->send_coroutine = qemu_coroutine_self(); > qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, > nbd_have_request, s); > @@ -214,6 +234,9 @@ static int nbd_co_send_request(BDRVNBDState *s, struct > nbd_request *request, > ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov, > offset, request->len); > if (ret != request->len) { > + s->send_coroutine = NULL; > + nbd_teardown_connection(s, false); > + qemu_co_mutex_unlock(&s->send_mutex); > return -EIO; > } > } > @@ -259,9 +282,8 @@ static void nbd_coroutine_end(BDRVNBDState *s, struct > nbd_request *request) > } > } > > -static int nbd_establish_connection(BlockDriverState *bs) > +static int nbd_establish_connection(BDRVNBDState *s) > { > - BDRVNBDState *s = bs->opaque; > int sock; > int ret; > off_t size; > @@ -302,19 +324,23 @@ static int nbd_establish_connection(BlockDriverState > *bs) > return 0; > } > > -static void nbd_teardown_connection(BlockDriverState *bs) > +static void nbd_teardown_connection(BDRVNBDState *s, bool send_disconnect) > { > - BDRVNBDState *s = bs->opaque; > + > struct nbd_request request; > > - request.type = NBD_CMD_DISC; > - request.from = 0; > - request.len = 0; > - nbd_send_request(s->sock, &request); > + if (nbd_is_connected(s)) { > + if (send_disconnect) { > + request.type = NBD_CMD_DISC; > + request.from = 0; > + request.len = 0; > + nbd_send_request(s->sock, &request); > + } > > - qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL); > - closesocket(s->sock); > - s->sock = -1; > + qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL); > + closesocket(s->sock); > + s->sock = -1; /* Makes nbd_is_connected() return true */ > + } > } > > static int nbd_open(BlockDriverState *bs, const char* filename, int flags) > @@ -336,7 +362,7 @@ static int nbd_open(BlockDriverState *bs, const char* > filename, int flags) > /* establish TCP connection, return error if it fails > * TODO: Configurable retry-until-timeout behaviour. > */ > - result = nbd_establish_connection(bs); > + result = nbd_establish_connection(s); > > return result; > } > @@ -494,7 +520,7 @@ static void nbd_close(BlockDriverState *bs) > g_free(s->export_name); > g_free(s->host_spec); > > - nbd_teardown_connection(bs); > + nbd_teardown_connection(s, true); > } > > static int64_t nbd_getlength(BlockDriverState *bs) >