Hi Markus On Fri, May 20, 2016 at 1:52 PM, Markus Pargmann <m...@pengutronix.de> wrote: > On Friday 20 May 2016 02:05:36 Pranay Srivastava wrote: >> On Thu, May 19, 2016 at 11:52 AM, Markus Pargmann <m...@pengutronix.de> >> wrote: >> > Hi, >> > >> > On Wed, May 11, 2016 at 11:18:29AM +0300, Pranay Kr. Srivastava wrote: >> >> This patch fixes the warning generated when a timeout occurs >> >> on the request and socket is closed from a non-sleep context >> >> by >> >> >> >> 1. Moving the socket closing on a timeout to nbd_thread_send >> > >> > What happens if a send blocks? >> >> socket closing needs to be moved to a non-atomic context and, >> sender thread seemed to be a good place to do this. If you mean >> send blocks just before calling sock_shutdown[?] in nbd_thread_send >> then yes I think that should be removed. I need to re-check how nbd-server >> behaves in that case. > > No that's not what I meant. Your approach uses the sender thread as a > worker to close the socket. You are using waiting_wq to notify the > sender thread. That's fine so far. > > But what happens if the sender thread is at this point trying to send on > the socket which blocks? Then the timeout triggers and waiting_wq will > notify the sending thread as soon as it left the sending routine. But it > will not interrupt the thread that is waiting in kernel_sendmsg() and > the sending thread will be stuck much longer than specified in the > timeout.
So socket shutdown must be triggered immediately. I've done a version using system_wq for this and appears to be good. I'll be sending that soon after doing cleanup and applying your sock_shutdown patch you sent earlier. > >> >> > >> >> >> >> 2. Make sock lock to be a mutex instead of a spin lock, since >> >> nbd_xmit_timeout doesn't need to hold it anymore. >> > >> > I can't see why we need a mutex instead of a spinlock? >> >> you are right, with your earlier patch we don't need it to be a mutex. >> >> > >> >> >> >> Signed-off-by: Pranay Kr. Srivastava <pran...@gmail.com> >> >> --- >> >> drivers/block/nbd.c | 65 >> >> ++++++++++++++++++++++++++++++++--------------------- >> >> 1 file changed, 39 insertions(+), 26 deletions(-) >> >> >> >> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c >> >> index 31e73a7..c79bcd7 100644 >> >> --- a/drivers/block/nbd.c >> >> +++ b/drivers/block/nbd.c >> >> @@ -57,12 +57,12 @@ struct nbd_device { >> >> int blksize; >> >> loff_t bytesize; >> >> int xmit_timeout; >> >> - bool timedout; >> >> + atomic_t timedout; >> >> bool disconnect; /* a disconnect has been requested by user */ >> >> >> >> struct timer_list timeout_timer; >> >> /* protects initialization and shutdown of the socket */ >> >> - spinlock_t sock_lock; >> >> + struct mutex sock_lock; >> >> struct task_struct *task_recv; >> >> struct task_struct *task_send; >> >> >> >> @@ -172,10 +172,9 @@ static void nbd_end_request(struct nbd_device *nbd, >> >> struct request *req) >> >> */ >> >> static void sock_shutdown(struct nbd_device *nbd) >> >> { >> >> - spin_lock_irq(&nbd->sock_lock); >> >> - >> >> + mutex_lock(&nbd->sock_lock); >> >> if (!nbd->sock) { >> >> - spin_unlock_irq(&nbd->sock_lock); >> >> + mutex_unlock(&nbd->sock_lock); >> >> return; >> >> } >> >> >> >> @@ -183,27 +182,19 @@ static void sock_shutdown(struct nbd_device *nbd) >> >> kernel_sock_shutdown(nbd->sock, SHUT_RDWR); >> >> sockfd_put(nbd->sock); >> >> nbd->sock = NULL; >> >> - spin_unlock_irq(&nbd->sock_lock); >> >> - >> >> + mutex_unlock(&nbd->sock_lock); >> >> del_timer(&nbd->timeout_timer); >> >> } >> >> >> >> static void nbd_xmit_timeout(unsigned long arg) >> >> { >> >> struct nbd_device *nbd = (struct nbd_device *)arg; >> >> - unsigned long flags; >> >> >> >> if (list_empty(&nbd->queue_head)) >> >> return; >> >> >> >> - spin_lock_irqsave(&nbd->sock_lock, flags); >> >> - >> >> - nbd->timedout = true; >> >> - >> >> - if (nbd->sock) >> >> - kernel_sock_shutdown(nbd->sock, SHUT_RDWR); >> >> - >> >> - spin_unlock_irqrestore(&nbd->sock_lock, flags); >> >> + atomic_inc(&nbd->timedout); >> >> + wake_up(&nbd->waiting_wq); >> >> >> >> dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down >> >> connection\n"); >> >> } >> >> @@ -579,7 +570,27 @@ static int nbd_thread_send(void *data) >> >> /* wait for something to do */ >> >> wait_event_interruptible(nbd->waiting_wq, >> >> kthread_should_stop() || >> >> - !list_empty(&nbd->waiting_queue)); >> >> + !list_empty(&nbd->waiting_queue) || >> >> + atomic_read(&nbd->timedout)); >> >> + >> >> + if (atomic_read(&nbd->timedout)) { >> >> + mutex_lock(&nbd->sock_lock); >> >> + if (nbd->sock) { >> >> + struct request sreq; >> >> + >> >> + blk_rq_init(NULL, &sreq); >> >> + sreq.cmd_type = REQ_TYPE_DRV_PRIV; >> >> + mutex_lock(&nbd->tx_lock); >> >> + nbd->disconnect = true; >> >> + nbd_send_req(nbd, &sreq); >> >> + mutex_unlock(&nbd->tx_lock); >> >> + dev_err(disk_to_dev(nbd->disk), >> >> + "Device Timeout occured.Shutting >> >> down" >> >> + " socket."); >> >> + } >> >> + mutex_unlock(&nbd->sock_lock); >> >> + sock_shutdown(nbd); >> > >> > Why are you trying to send something on a connection that timed out >> > (nbd_send_req())? And afterwards you execute a socket shutdown so in most >> > timeout cases this won't reach the server and we risk a blocking send on >> > a timedout connection. >> >> Ok. I get it. But shouldn't the server side also close it's socket as >> well? I don't >> think the timeout value is propagated to server or like server can >> "ping" to check >> if client is there right? >> >> I agree on nbd_send_req in timedout, it shouldn't be there, just a >> sock_shutdown should >> do. Can you confirm if I'm right about nbd-server side as well like it >> won't timeout and close >> that socket or did I miss any option while starting it? > > If the socket is closed the server will notice at some point in the > future at least after the TCP timeout. I am not sure how we could notify > the server without running into the next connection issues. > > Best Regards, > > Markus > >> >> > >> > Regards, >> > >> > Markus >> > >> >> + } >> >> >> >> /* extract request */ >> >> if (list_empty(&nbd->waiting_queue)) >> >> @@ -592,7 +603,11 @@ static int nbd_thread_send(void *data) >> >> spin_unlock_irq(&nbd->queue_lock); >> >> >> >> /* handle request */ >> >> - nbd_handle_req(nbd, req); >> >> + if (atomic_read(&nbd->timedout)) { >> >> + req->errors++; >> >> + nbd_end_request(nbd, req); >> >> + } else >> >> + nbd_handle_req(nbd, req); >> >> } >> >> >> >> nbd->task_send = NULL; >> >> @@ -647,7 +662,7 @@ static int nbd_set_socket(struct nbd_device *nbd, >> >> struct socket *sock) >> >> { >> >> int ret = 0; >> >> >> >> - spin_lock_irq(&nbd->sock_lock); >> >> + mutex_lock(&nbd->sock_lock); >> >> >> >> if (nbd->sock) { >> >> ret = -EBUSY; >> >> @@ -657,7 +672,7 @@ static int nbd_set_socket(struct nbd_device *nbd, >> >> struct socket *sock) >> >> nbd->sock = sock; >> >> >> >> out: >> >> - spin_unlock_irq(&nbd->sock_lock); >> >> + mutex_unlock(&nbd->sock_lock); >> >> >> >> return ret; >> >> } >> >> @@ -666,7 +681,7 @@ out: >> >> static void nbd_reset(struct nbd_device *nbd) >> >> { >> >> nbd->disconnect = false; >> >> - nbd->timedout = false; >> >> + atomic_set(&nbd->timedout, 0); >> >> nbd->blksize = 1024; >> >> nbd->bytesize = 0; >> >> set_capacity(nbd->disk, 0); >> >> @@ -803,17 +818,15 @@ static int __nbd_ioctl(struct block_device *bdev, >> >> struct nbd_device *nbd, >> >> error = nbd_thread_recv(nbd, bdev); >> >> nbd_dev_dbg_close(nbd); >> >> kthread_stop(thread); >> >> - >> >> - mutex_lock(&nbd->tx_lock); >> >> - >> >> sock_shutdown(nbd); >> >> + mutex_lock(&nbd->tx_lock); >> >> nbd_clear_que(nbd); >> >> kill_bdev(bdev); >> >> nbd_bdev_reset(bdev); >> >> >> >> if (nbd->disconnect) /* user requested, ignore socket >> >> errors */ >> >> error = 0; >> >> - if (nbd->timedout) >> >> + if (atomic_read(&nbd->timedout)) >> >> error = -ETIMEDOUT; >> >> >> >> nbd_reset(nbd); >> >> @@ -1075,7 +1088,7 @@ static int __init nbd_init(void) >> >> nbd_dev[i].magic = NBD_MAGIC; >> >> INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); >> >> spin_lock_init(&nbd_dev[i].queue_lock); >> >> - spin_lock_init(&nbd_dev[i].sock_lock); >> >> + mutex_init(&nbd_dev[i].sock_lock); >> >> INIT_LIST_HEAD(&nbd_dev[i].queue_head); >> >> mutex_init(&nbd_dev[i].tx_lock); >> >> init_timer(&nbd_dev[i].timeout_timer); >> >> -- >> >> 2.6.2 >> >> >> >> >> > >> > -- >> > Pengutronix e.K. | | >> > Industrial Linux Solutions | http://www.pengutronix.de/ | >> > Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0 | >> > Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 | >> >> >> >> > > -- > Pengutronix e.K. | | > Industrial Linux Solutions | http://www.pengutronix.de/ | > Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0 | > Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 | -- ---P.K.S