Module Name: src Committed By: bouyer Date: Sat Jul 2 17:53:51 UTC 2011
Modified Files: src/sys/kern: init_main.c uipc_socket.c src/sys/sys: socketvar.h Log Message: Fix kern/45093 as discussed on tech-kern@: http://mail-index.netbsd.org/tech-kern/2011/06/17/msg010734.html The cause of the problem is that the so_pendfree is processed with the softnet_lock held at one point, and processing the list calls sodoloanfree() which may kpause(). As the thread sleeps with softnet_lock held, it ultimately cause a deadlock (see the PR or tech-kern thread for details). Although it should be possible to call sodopendfree() after releasing the socket lock, it's not so easy to know where he socket lock is held and where it's not, so we may hit the issue again later. Add a kernel thread to handle the so_pendfree list, and wake up this thread when adding mbufs to this list. Get rid of the various sodopendfree() calls, hopefully fixing definitively the problem. To generate a diff of this commit: cvs rdiff -u -r1.432 -r1.433 src/sys/kern/init_main.c cvs rdiff -u -r1.204 -r1.205 src/sys/kern/uipc_socket.c cvs rdiff -u -r1.125 -r1.126 src/sys/sys/socketvar.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/init_main.c diff -u src/sys/kern/init_main.c:1.432 src/sys/kern/init_main.c:1.433 --- src/sys/kern/init_main.c:1.432 Sun Jun 12 03:35:56 2011 +++ src/sys/kern/init_main.c Sat Jul 2 17:53:50 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: init_main.c,v 1.432 2011/06/12 03:35:56 rmind Exp $ */ +/* $NetBSD: init_main.c,v 1.433 2011/07/02 17:53:50 bouyer Exp $ */ /*- * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. @@ -97,7 +97,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.432 2011/06/12 03:35:56 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.433 2011/07/02 17:53:50 bouyer Exp $"); #include "opt_ddb.h" #include "opt_ipsec.h" @@ -495,6 +495,10 @@ spldebug_start(); + /* Initialize sockets thread(s) */ + soinit1(); + + /* Configure the system hardware. This will enable interrupts. */ configure(); Index: src/sys/kern/uipc_socket.c diff -u src/sys/kern/uipc_socket.c:1.204 src/sys/kern/uipc_socket.c:1.205 --- src/sys/kern/uipc_socket.c:1.204 Sun Jun 26 16:42:42 2011 +++ src/sys/kern/uipc_socket.c Sat Jul 2 17:53:50 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket.c,v 1.204 2011/06/26 16:42:42 christos Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.205 2011/07/02 17:53:50 bouyer Exp $ */ /*- * Copyright (c) 2002, 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -63,7 +63,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.204 2011/06/26 16:42:42 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.205 2011/07/02 17:53:50 bouyer Exp $"); #include "opt_compat_netbsd.h" #include "opt_sock_counters.h" @@ -92,6 +92,7 @@ #include <sys/kauth.h> #include <sys/mutex.h> #include <sys/condvar.h> +#include <sys/kthread.h> #ifdef COMPAT_50 #include <compat/sys/time.h> @@ -144,7 +145,7 @@ #endif static kmutex_t so_pendfree_lock; -static struct mbuf *so_pendfree; +static struct mbuf *so_pendfree = NULL; #ifndef SOMAXKVA #define SOMAXKVA (16 * 1024 * 1024) @@ -157,8 +158,9 @@ #define SOCK_LOAN_CHUNK 65536 -static size_t sodopendfree(void); -static size_t sodopendfreel(void); +static void sopendfree_thread(void *); +static kcondvar_t pendfree_thread_cv; +static lwp_t *sopendfree_lwp; static void sysctl_kern_somaxkva_setup(void); static struct sysctllog *socket_sysctllog; @@ -170,21 +172,6 @@ mutex_enter(&so_pendfree_lock); while (socurkva + len > somaxkva) { - size_t freed; - - /* - * try to do pendfree. - */ - - freed = sodopendfreel(); - - /* - * if some kva was freed, try again. - */ - - if (freed) - continue; - SOSEND_COUNTER_INCR(&sosend_kvalimit); error = cv_wait_sig(&socurkva_cv, &so_pendfree_lock); if (error) { @@ -277,56 +264,45 @@ sokvafree(sva, len); } -static size_t -sodopendfree(void) -{ - size_t rv; - - if (__predict_true(so_pendfree == NULL)) - return 0; - - mutex_enter(&so_pendfree_lock); - rv = sodopendfreel(); - mutex_exit(&so_pendfree_lock); - - return rv; -} - /* - * sodopendfreel: free mbufs on "pendfree" list. + * sopendfree_thread: free mbufs on "pendfree" list. * unlock and relock so_pendfree_lock when freeing mbufs. - * - * => called with so_pendfree_lock held. */ -static size_t -sodopendfreel(void) +static void +sopendfree_thread(void *v) { struct mbuf *m, *next; - size_t rv = 0; - - KASSERT(mutex_owned(&so_pendfree_lock)); + size_t rv; - while (so_pendfree != NULL) { - m = so_pendfree; - so_pendfree = NULL; - mutex_exit(&so_pendfree_lock); + mutex_enter(&so_pendfree_lock); - for (; m != NULL; m = next) { - next = m->m_next; - KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0); - KASSERT(m->m_ext.ext_refcnt == 0); + for (;;) { + rv = 0; + while (so_pendfree != NULL) { + m = so_pendfree; + so_pendfree = NULL; + mutex_exit(&so_pendfree_lock); + + for (; m != NULL; m = next) { + next = m->m_next; + KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0); + KASSERT(m->m_ext.ext_refcnt == 0); + + rv += m->m_ext.ext_size; + sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf, + m->m_ext.ext_size); + pool_cache_put(mb_cache, m); + } - rv += m->m_ext.ext_size; - sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf, - m->m_ext.ext_size); - pool_cache_put(mb_cache, m); + mutex_enter(&so_pendfree_lock); } - - mutex_enter(&so_pendfree_lock); + if (rv) + cv_broadcast(&socurkva_cv); + cv_wait(&pendfree_thread_cv, &so_pendfree_lock); } - - return (rv); + panic("sopendfree_thread"); + /* NOTREACHED */ } void @@ -345,7 +321,7 @@ mutex_enter(&so_pendfree_lock); m->m_next = so_pendfree; so_pendfree = m; - cv_broadcast(&socurkva_cv); + cv_signal(&pendfree_thread_cv); mutex_exit(&so_pendfree_lock); } @@ -415,7 +391,6 @@ KASSERT(ce == &sokva_reclaimerentry); KASSERT(obj == NULL); - sodopendfree(); if (!vm_map_starved_p(kernel_map)) { return CALLBACK_CHAIN_ABORT; } @@ -497,6 +472,7 @@ mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM); softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); cv_init(&socurkva_cv, "sokva"); + cv_init(&pendfree_thread_cv, "sopendfr"); soinit2(); /* Set the initial adjusted socket buffer size. */ @@ -510,6 +486,15 @@ socket_listener_cb, NULL); } +void +soinit1(void) +{ + int error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, + sopendfree_thread, NULL, &sopendfree_lwp, "sopendfree"); + if (error) + panic("soinit1 %d", error); +} + /* * Socket operation routines. * These routines are called by the routines in @@ -878,7 +863,6 @@ error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, NULL, NULL); } - sodopendfree(); return (error); } @@ -911,7 +895,6 @@ short wakeup_state = 0; p = l->l_proc; - sodopendfree(); clen = 0; /* @@ -1187,9 +1170,6 @@ else flags = 0; - if ((flags & MSG_DONTWAIT) == 0) - sodopendfree(); - if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); solock(so); Index: src/sys/sys/socketvar.h diff -u src/sys/sys/socketvar.h:1.125 src/sys/sys/socketvar.h:1.126 --- src/sys/sys/socketvar.h:1.125 Sun Jun 26 16:43:12 2011 +++ src/sys/sys/socketvar.h Sat Jul 2 17:53:51 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: socketvar.h,v 1.125 2011/06/26 16:43:12 christos Exp $ */ +/* $NetBSD: socketvar.h,v 1.126 2011/07/02 17:53:51 bouyer Exp $ */ /*- * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. @@ -280,6 +280,7 @@ int sbwait(struct sockbuf *); int sb_max_set(u_long); void soinit(void); +void soinit1(void); void soinit2(void); int soabort(struct socket *); int soaccept(struct socket *, struct mbuf *);