Module Name:    src
Committed By:   ozaki-r
Date:           Wed Sep 27 07:27:29 UTC 2017

Modified Files:
        src/sys/netipsec: key.c

Log Message:
Fix deadlock between pserialize_perform and localcount_drain

A typical ussage of localcount_drain looks like this:

  mutex_enter(&mtx);
  item = remove_from_list();
  pserialize_perform(psz);
  localcount_drain(&item->localcount, &cv, &mtx);
  mutex_exit(&mtx);

This sequence can cause a deadlock which happens for example on the following
situation:

- Thread A calls localcount_drain which calls xc_broadcast after releasing
  a specified mutex
- Thread B enters the sequence and calls pserialize_perform with holding
  the mutex while pserialize_perform also calls xc_broadcast
- Thread C (xc_thread) that calls an xcall callback of localcount_drain tries
  to hold the mutex

xc_broadcast of thread B doesn't start until xc_broadcast of thread A
finishes, which is a feature of xcall(9). This means that pserialize_perform
never complete until xc_broadcast of thread A finishes. On the other hand,
thread C that is a callee of xc_broadcast of thread A sticks on the mutex.
Finally the threads block each other (A blocks B, B blocks C and C blocks A).

A possible fix is to serialize executions of the above sequence by another
mutex, but adding another mutex makes the code complex, so fix the deadlock
by another way; the fix is to release the mutex before pserialize_perform
and instead use a condvar to prevent pserialize_perform from being called
simultaneously.

Note that the deadlock has happened only if NET_MPSAFE is enabled.


To generate a diff of this commit:
cvs rdiff -u -r1.225 -r1.226 src/sys/netipsec/key.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/netipsec/key.c
diff -u src/sys/netipsec/key.c:1.225 src/sys/netipsec/key.c:1.226
--- src/sys/netipsec/key.c:1.225	Mon Aug 21 07:38:42 2017
+++ src/sys/netipsec/key.c	Wed Sep 27 07:27:29 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: key.c,v 1.225 2017/08/21 07:38:42 knakahara Exp $	*/
+/*	$NetBSD: key.c,v 1.226 2017/09/27 07:27:29 ozaki-r Exp $	*/
 /*	$FreeBSD: src/sys/netipsec/key.c,v 1.3.2.3 2004/02/14 22:23:23 bms Exp $	*/
 /*	$KAME: key.c,v 1.191 2001/06/27 10:46:49 sakane Exp $	*/
 
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.225 2017/08/21 07:38:42 knakahara Exp $");
+__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.226 2017/09/27 07:27:29 ozaki-r Exp $");
 
 /*
  * This code is referd to RFC 2367
@@ -237,26 +237,31 @@ static u_int32_t acq_seq = 0;
  *   - key_misc.lock must be held even for read accesses
  */
 
-static pserialize_t key_spd_psz __read_mostly;
-static pserialize_t key_sad_psz __read_mostly;
-
 /* SPD */
 static struct {
 	kmutex_t lock;
-	kcondvar_t cv;
+	kcondvar_t cv_lc;
 	struct pslist_head splist[IPSEC_DIR_MAX];
 	/*
 	 * The list has SPs that are set to a socket via
 	 * setsockopt(IP_IPSEC_POLICY) from userland. See ipsec_set_policy.
 	 */
 	struct pslist_head socksplist;
+
+	pserialize_t psz;
+	kcondvar_t cv_psz;
+	bool psz_performing;
 } key_spd __cacheline_aligned;
 
 /* SAD */
 static struct {
 	kmutex_t lock;
-	kcondvar_t cv;
+	kcondvar_t cv_lc;
 	struct pslist_head sahlist;
+
+	pserialize_t psz;
+	kcondvar_t cv_psz;
+	bool psz_performing;
 } key_sad __cacheline_aligned;
 
 /* Misc data */
@@ -799,6 +804,24 @@ key_sp_refcnt(const struct secpolicy *sp
 	return 0;
 }
 
+static void
+key_spd_pserialize_perform(void)
+{
+
+	KASSERT(mutex_owned(&key_spd.lock));
+
+	while (key_spd.psz_performing)
+		cv_wait(&key_spd.cv_psz, &key_spd.lock);
+	key_spd.psz_performing = true;
+	mutex_exit(&key_spd.lock);
+
+	pserialize_perform(key_spd.psz);
+
+	mutex_enter(&key_spd.lock);
+	key_spd.psz_performing = false;
+	cv_broadcast(&key_spd.cv_psz);
+}
+
 /*
  * Remove the sp from the key_spd.splist and wait for references to the sp
  * to be released. key_spd.lock must be held.
@@ -817,10 +840,10 @@ key_unlink_sp(struct secpolicy *sp)
 
 #ifdef NET_MPSAFE
 	KASSERT(mutex_ownable(softnet_lock));
-	pserialize_perform(key_spd_psz);
+	key_spd_pserialize_perform();
 #endif
 
-	localcount_drain(&sp->localcount, &key_spd.cv, &key_spd.lock);
+	localcount_drain(&sp->localcount, &key_spd.cv_lc, &key_spd.lock);
 }
 
 /*
@@ -1354,7 +1377,7 @@ key_sp_unref(struct secpolicy *sp, const
 	    "DP SP:%p (ID=%u) from %s:%u; refcnt-- now %u\n",
 	    sp, sp->id, where, tag, key_sp_refcnt(sp));
 
-	localcount_release(&sp->localcount, &key_spd.cv, &key_spd.lock);
+	localcount_release(&sp->localcount, &key_spd.cv_lc, &key_spd.lock);
 }
 
 static void
@@ -1396,7 +1419,7 @@ key_sa_unref(struct secasvar *sav, const
 	    "DP cause refcnt--: SA:%p from %s:%u\n",
 	    sav, where, tag);
 
-	localcount_release(&sav->localcount, &key_sad.cv, &key_sad.lock);
+	localcount_release(&sav->localcount, &key_sad.cv_lc, &key_sad.lock);
 }
 
 #if 0
@@ -1474,6 +1497,24 @@ key_freesp_so(struct secpolicy **sp)
 }
 #endif
 
+static void
+key_sad_pserialize_perform(void)
+{
+
+	KASSERT(mutex_owned(&key_sad.lock));
+
+	while (key_sad.psz_performing)
+		cv_wait(&key_sad.cv_psz, &key_sad.lock);
+	key_sad.psz_performing = true;
+	mutex_exit(&key_sad.lock);
+
+	pserialize_perform(key_sad.psz);
+
+	mutex_enter(&key_sad.lock);
+	key_sad.psz_performing = false;
+	cv_broadcast(&key_sad.cv_psz);
+}
+
 /*
  * Remove the sav from the savlist of its sah and wait for references to the sav
  * to be released. key_sad.lock must be held.
@@ -1488,10 +1529,10 @@ key_unlink_sav(struct secasvar *sav)
 
 #ifdef NET_MPSAFE
 	KASSERT(mutex_ownable(softnet_lock));
-	pserialize_perform(key_sad_psz);
+	key_sad_pserialize_perform();
 #endif
 
-	localcount_drain(&sav->localcount, &key_sad.cv, &key_sad.lock);
+	localcount_drain(&sav->localcount, &key_sad.cv_lc, &key_sad.lock);
 }
 
 /*
@@ -1530,9 +1571,9 @@ key_destroy_sav_with_ref(struct secasvar
 	mutex_enter(&key_sad.lock);
 #ifdef NET_MPSAFE
 	KASSERT(mutex_ownable(softnet_lock));
-	pserialize_perform(key_sad_psz);
+	key_sad_pserialize_perform();
 #endif
-	localcount_drain(&sav->localcount, &key_sad.cv, &key_sad.lock);
+	localcount_drain(&sav->localcount, &key_sad.cv_lc, &key_sad.lock);
 	mutex_exit(&key_sad.lock);
 
 	key_destroy_sav(sav);
@@ -3079,10 +3120,10 @@ key_unlink_sah(struct secashead *sah)
 
 #ifdef NET_MPSAFE
 	KASSERT(mutex_ownable(softnet_lock));
-	pserialize_perform(key_sad_psz);
+	key_sad_pserialize_perform();
 #endif
 
-	localcount_drain(&sah->localcount, &key_sad.cv, &key_sad.lock);
+	localcount_drain(&sah->localcount, &key_sad.cv_lc, &key_sad.lock);
 }
 
 static void
@@ -3241,7 +3282,7 @@ key_sah_unref(struct secashead *sah)
 
 	KDASSERT(mutex_ownable(&key_sad.lock));
 
-	localcount_release(&sah->localcount, &key_sad.cv, &key_sad.lock);
+	localcount_release(&sah->localcount, &key_sad.cv_lc, &key_sad.lock);
 }
 
 /*
@@ -8055,12 +8096,18 @@ key_do_init(void)
 	int i, error;
 
 	mutex_init(&key_misc.lock, MUTEX_DEFAULT, IPL_NONE);
-	key_spd_psz = pserialize_create();
+
 	mutex_init(&key_spd.lock, MUTEX_DEFAULT, IPL_NONE);
-	cv_init(&key_spd.cv, "key_sp");
-	key_sad_psz = pserialize_create();
+	cv_init(&key_spd.cv_lc, "key_sp_lc");
+	key_spd.psz = pserialize_create();
+	cv_init(&key_spd.cv_psz, "key_sp_psz");
+	key_spd.psz_performing = false;
+
 	mutex_init(&key_sad.lock, MUTEX_DEFAULT, IPL_NONE);
-	cv_init(&key_sad.cv, "key_sa");
+	cv_init(&key_sad.cv_lc, "key_sa_lc");
+	key_sad.psz = pserialize_create();
+	cv_init(&key_sad.cv_psz, "key_sa_psz");
+	key_sad.psz_performing = false;
 
 	pfkeystat_percpu = percpu_alloc(sizeof(uint64_t) * PFKEY_NSTATS);
 

Reply via email to