Module Name:    src
Committed By:   bouyer
Date:           Sat Jul  2 17:53:51 UTC 2011

Modified Files:
        src/sys/kern: init_main.c uipc_socket.c
        src/sys/sys: socketvar.h

Log Message:
Fix kern/45093 as discussed on tech-kern@:
http://mail-index.netbsd.org/tech-kern/2011/06/17/msg010734.html

The cause of the problem is that the so_pendfree is processed with
the softnet_lock held at one point, and processing the list
calls sodoloanfree() which may kpause(). As the thread sleeps with
softnet_lock held, it ultimately cause a deadlock (see the PR or tech-kern
thread for details).
Although it should be possible to call sodopendfree() after releasing
the socket lock, it's not so easy to know where he socket lock is held and
where it's not, so we may hit the issue again later.
Add a kernel thread to handle the so_pendfree list, and wake up this
thread when adding mbufs to this list. Get rid of the various sodopendfree()
calls, hopefully fixing definitively the problem.


To generate a diff of this commit:
cvs rdiff -u -r1.432 -r1.433 src/sys/kern/init_main.c
cvs rdiff -u -r1.204 -r1.205 src/sys/kern/uipc_socket.c
cvs rdiff -u -r1.125 -r1.126 src/sys/sys/socketvar.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/init_main.c
diff -u src/sys/kern/init_main.c:1.432 src/sys/kern/init_main.c:1.433
--- src/sys/kern/init_main.c:1.432	Sun Jun 12 03:35:56 2011
+++ src/sys/kern/init_main.c	Sat Jul  2 17:53:50 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: init_main.c,v 1.432 2011/06/12 03:35:56 rmind Exp $	*/
+/*	$NetBSD: init_main.c,v 1.433 2011/07/02 17:53:50 bouyer Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
@@ -97,7 +97,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.432 2011/06/12 03:35:56 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.433 2011/07/02 17:53:50 bouyer Exp $");
 
 #include "opt_ddb.h"
 #include "opt_ipsec.h"
@@ -495,6 +495,10 @@
 
 	spldebug_start();
 
+	/* Initialize sockets thread(s) */
+	soinit1();
+
+
 	/* Configure the system hardware.  This will enable interrupts. */
 	configure();
 

Index: src/sys/kern/uipc_socket.c
diff -u src/sys/kern/uipc_socket.c:1.204 src/sys/kern/uipc_socket.c:1.205
--- src/sys/kern/uipc_socket.c:1.204	Sun Jun 26 16:42:42 2011
+++ src/sys/kern/uipc_socket.c	Sat Jul  2 17:53:50 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: uipc_socket.c,v 1.204 2011/06/26 16:42:42 christos Exp $	*/
+/*	$NetBSD: uipc_socket.c,v 1.205 2011/07/02 17:53:50 bouyer Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -63,7 +63,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.204 2011/06/26 16:42:42 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.205 2011/07/02 17:53:50 bouyer Exp $");
 
 #include "opt_compat_netbsd.h"
 #include "opt_sock_counters.h"
@@ -92,6 +92,7 @@
 #include <sys/kauth.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
+#include <sys/kthread.h>
 
 #ifdef COMPAT_50
 #include <compat/sys/time.h>
@@ -144,7 +145,7 @@
 #endif
 
 static kmutex_t so_pendfree_lock;
-static struct mbuf *so_pendfree;
+static struct mbuf *so_pendfree = NULL;
 
 #ifndef SOMAXKVA
 #define	SOMAXKVA (16 * 1024 * 1024)
@@ -157,8 +158,9 @@
 
 #define	SOCK_LOAN_CHUNK		65536
 
-static size_t sodopendfree(void);
-static size_t sodopendfreel(void);
+static void sopendfree_thread(void *);
+static kcondvar_t pendfree_thread_cv;
+static lwp_t *sopendfree_lwp;
 
 static void sysctl_kern_somaxkva_setup(void);
 static struct sysctllog *socket_sysctllog;
@@ -170,21 +172,6 @@
 
 	mutex_enter(&so_pendfree_lock);
 	while (socurkva + len > somaxkva) {
-		size_t freed;
-
-		/*
-		 * try to do pendfree.
-		 */
-
-		freed = sodopendfreel();
-
-		/*
-		 * if some kva was freed, try again.
-		 */
-
-		if (freed)
-			continue;
-
 		SOSEND_COUNTER_INCR(&sosend_kvalimit);
 		error = cv_wait_sig(&socurkva_cv, &so_pendfree_lock);
 		if (error) {
@@ -277,56 +264,45 @@
 	sokvafree(sva, len);
 }
 
-static size_t
-sodopendfree(void)
-{
-	size_t rv;
-
-	if (__predict_true(so_pendfree == NULL))
-		return 0;
-
-	mutex_enter(&so_pendfree_lock);
-	rv = sodopendfreel();
-	mutex_exit(&so_pendfree_lock);
-
-	return rv;
-}
-
 /*
- * sodopendfreel: free mbufs on "pendfree" list.
+ * sopendfree_thread: free mbufs on "pendfree" list.
  * unlock and relock so_pendfree_lock when freeing mbufs.
- *
- * => called with so_pendfree_lock held.
  */
 
-static size_t
-sodopendfreel(void)
+static void
+sopendfree_thread(void *v)
 {
 	struct mbuf *m, *next;
-	size_t rv = 0;
-
-	KASSERT(mutex_owned(&so_pendfree_lock));
+	size_t rv;
 
-	while (so_pendfree != NULL) {
-		m = so_pendfree;
-		so_pendfree = NULL;
-		mutex_exit(&so_pendfree_lock);
+	mutex_enter(&so_pendfree_lock);
 
-		for (; m != NULL; m = next) {
-			next = m->m_next;
-			KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0);
-			KASSERT(m->m_ext.ext_refcnt == 0);
+	for (;;) {
+		rv = 0;
+		while (so_pendfree != NULL) {
+			m = so_pendfree;
+			so_pendfree = NULL;
+			mutex_exit(&so_pendfree_lock);
+
+			for (; m != NULL; m = next) {
+				next = m->m_next;
+				KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0);
+				KASSERT(m->m_ext.ext_refcnt == 0);
+
+				rv += m->m_ext.ext_size;
+				sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf,
+				    m->m_ext.ext_size);
+				pool_cache_put(mb_cache, m);
+			}
 
-			rv += m->m_ext.ext_size;
-			sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf,
-			    m->m_ext.ext_size);
-			pool_cache_put(mb_cache, m);
+			mutex_enter(&so_pendfree_lock);
 		}
-
-		mutex_enter(&so_pendfree_lock);
+		if (rv)
+			cv_broadcast(&socurkva_cv);
+		cv_wait(&pendfree_thread_cv, &so_pendfree_lock);
 	}
-
-	return (rv);
+	panic("sopendfree_thread");
+	/* NOTREACHED */
 }
 
 void
@@ -345,7 +321,7 @@
 	mutex_enter(&so_pendfree_lock);
 	m->m_next = so_pendfree;
 	so_pendfree = m;
-	cv_broadcast(&socurkva_cv);
+	cv_signal(&pendfree_thread_cv);
 	mutex_exit(&so_pendfree_lock);
 }
 
@@ -415,7 +391,6 @@
 	KASSERT(ce == &sokva_reclaimerentry);
 	KASSERT(obj == NULL);
 
-	sodopendfree();
 	if (!vm_map_starved_p(kernel_map)) {
 		return CALLBACK_CHAIN_ABORT;
 	}
@@ -497,6 +472,7 @@
 	mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM);
 	softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
 	cv_init(&socurkva_cv, "sokva");
+	cv_init(&pendfree_thread_cv, "sopendfr");
 	soinit2();
 
 	/* Set the initial adjusted socket buffer size. */
@@ -510,6 +486,15 @@
 	    socket_listener_cb, NULL);
 }
 
+void
+soinit1(void)
+{
+	int error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
+	    sopendfree_thread, NULL, &sopendfree_lwp, "sopendfree");
+	if (error)
+		panic("soinit1 %d", error);
+}
+
 /*
  * Socket operation routines.
  * These routines are called by the routines in
@@ -878,7 +863,6 @@
 		error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
 		    NULL, NULL, NULL, NULL);
 	}
-	sodopendfree();
 	return (error);
 }
 
@@ -911,7 +895,6 @@
 	short		wakeup_state = 0;
 
 	p = l->l_proc;
-	sodopendfree();
 	clen = 0;
 
 	/*
@@ -1187,9 +1170,6 @@
 	else
 		flags = 0;
 
-	if ((flags & MSG_DONTWAIT) == 0)
-		sodopendfree();
-
 	if (flags & MSG_OOB) {
 		m = m_get(M_WAIT, MT_DATA);
 		solock(so);

Index: src/sys/sys/socketvar.h
diff -u src/sys/sys/socketvar.h:1.125 src/sys/sys/socketvar.h:1.126
--- src/sys/sys/socketvar.h:1.125	Sun Jun 26 16:43:12 2011
+++ src/sys/sys/socketvar.h	Sat Jul  2 17:53:51 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: socketvar.h,v 1.125 2011/06/26 16:43:12 christos Exp $	*/
+/*	$NetBSD: socketvar.h,v 1.126 2011/07/02 17:53:51 bouyer Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
@@ -280,6 +280,7 @@
 int	sbwait(struct sockbuf *);
 int	sb_max_set(u_long);
 void	soinit(void);
+void	soinit1(void);
 void	soinit2(void);
 int	soabort(struct socket *);
 int	soaccept(struct socket *, struct mbuf *);

Reply via email to