Module Name:    src
Committed By:   martin
Date:           Fri Jan 26 15:41:13 UTC 2018

Modified Files:
        src/sys/kern [netbsd-8]: kern_timeout.c
        src/sys/netinet [netbsd-8]: if_arp.c
        src/sys/netinet6 [netbsd-8]: nd6_nbr.c

Log Message:
Pull up following revision(s) (requested by ozaki-r in ticket #511):
        sys/kern/kern_timeout.c: revision 1.54
        sys/netinet6/nd6_nbr.c: revision 1.141
        sys/netinet6/nd6_nbr.c: revision 1.144
        sys/netinet/if_arp.c: revision 1.256
Fix a deadlock on callout_halt of nd6_dad_timer
We must not call callout_halt of nd6_dad_timer with holding nd6_dad_lock because
the lock is taken in nd6_dad_timer. Once softnet_lock goes away, we can pass the
lock to callout_halt, but for now we cannot.
Make DAD destructions (MP-)safe with callout_stop
arp_dad_stoptimer and nd6_dad_stoptimer can be called with or without
softnet_lock held and unfortunately we have no easy way to statically know 
which.
So it is hard to use callout_halt there.
To address the situation, we use callout_stop to make the code safe. The new
approach copes with the issue by delegating the destruction of a callout to
callout itself, which allows us to not wait the callout to finish. This can be
done thanks to that DAD objects are separated from other data such as ifa.
The approach is suggested by riastradh@
Proposed on tech-kern@ and tech-net@
Sanity-check if interlock is held when it's passed


To generate a diff of this commit:
cvs rdiff -u -r1.52 -r1.52.2.1 src/sys/kern/kern_timeout.c
cvs rdiff -u -r1.250.2.4 -r1.250.2.5 src/sys/netinet/if_arp.c
cvs rdiff -u -r1.138.6.1 -r1.138.6.2 src/sys/netinet6/nd6_nbr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/kern_timeout.c
diff -u src/sys/kern/kern_timeout.c:1.52 src/sys/kern/kern_timeout.c:1.52.2.1
--- src/sys/kern/kern_timeout.c:1.52	Thu Jun  1 02:45:13 2017
+++ src/sys/kern/kern_timeout.c	Fri Jan 26 15:41:12 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_timeout.c,v 1.52 2017/06/01 02:45:13 chs Exp $	*/
+/*	$NetBSD: kern_timeout.c,v 1.52.2.1 2018/01/26 15:41:12 martin Exp $	*/
 
 /*-
  * Copyright (c) 2003, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -59,7 +59,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.52 2017/06/01 02:45:13 chs Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.52.2.1 2018/01/26 15:41:12 martin Exp $");
 
 /*
  * Timeouts are kept in a hierarchical timing wheel.  The c_time is the
@@ -473,6 +473,7 @@ callout_halt(callout_t *cs, void *interl
 
 	KASSERT(c->c_magic == CALLOUT_MAGIC);
 	KASSERT(!cpu_intr_p());
+	KASSERT(interlock == NULL || mutex_owned(interlock));
 
 	lock = callout_lock(c);
 	relock = NULL;

Index: src/sys/netinet/if_arp.c
diff -u src/sys/netinet/if_arp.c:1.250.2.4 src/sys/netinet/if_arp.c:1.250.2.5
--- src/sys/netinet/if_arp.c:1.250.2.4	Tue Jan  2 10:20:34 2018
+++ src/sys/netinet/if_arp.c	Fri Jan 26 15:41:12 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: if_arp.c,v 1.250.2.4 2018/01/02 10:20:34 snj Exp $	*/
+/*	$NetBSD: if_arp.c,v 1.250.2.5 2018/01/26 15:41:12 martin Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000, 2008 The NetBSD Foundation, Inc.
@@ -68,7 +68,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_arp.c,v 1.250.2.4 2018/01/02 10:20:34 snj Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_arp.c,v 1.250.2.5 2018/01/26 15:41:12 martin Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_ddb.h"
@@ -173,7 +173,8 @@ static	void revarprequest(struct ifnet *
 
 static	void arp_drainstub(void);
 
-static void arp_dad_timer(struct ifaddr *);
+struct dadq;
+static void arp_dad_timer(struct dadq *);
 static void arp_dad_start(struct ifaddr *);
 static void arp_dad_stop(struct ifaddr *);
 static void arp_dad_duplicated(struct ifaddr *, const char *);
@@ -1534,18 +1535,18 @@ arp_dad_starttimer(struct dadq *dp, int 
 {
 
 	callout_reset(&dp->dad_timer_ch, ticks,
-	    (void (*)(void *))arp_dad_timer, (void *)dp->dad_ifa);
+	    (void (*)(void *))arp_dad_timer, dp);
 }
 
 static void
-arp_dad_stoptimer(struct dadq *dp)
+arp_dad_destroytimer(struct dadq *dp)
 {
 
-#ifdef NET_MPSAFE
-	callout_halt(&dp->dad_timer_ch, NULL);
-#else
-	callout_halt(&dp->dad_timer_ch, softnet_lock);
-#endif
+	TAILQ_REMOVE(&dadq, dp, dad_list);
+	/* Request the timer to destroy dp. */
+	dp->dad_ifa = NULL;
+	callout_reset(&dp->dad_timer_ch, 0,
+	    (void (*)(void *))arp_dad_timer, dp);
 }
 
 static void
@@ -1665,36 +1666,39 @@ arp_dad_stop(struct ifaddr *ifa)
 	}
 
 	/* Prevent the timer from running anymore. */
-	TAILQ_REMOVE(&dadq, dp, dad_list);
-	mutex_exit(&arp_dad_lock);
+	arp_dad_destroytimer(dp);
 
-	arp_dad_stoptimer(dp);
+	mutex_exit(&arp_dad_lock);
 
-	kmem_intr_free(dp, sizeof(*dp));
 	ifafree(ifa);
 }
 
 static void
-arp_dad_timer(struct ifaddr *ifa)
+arp_dad_timer(struct dadq *dp)
 {
-	struct in_ifaddr *ia = (struct in_ifaddr *)ifa;
-	struct dadq *dp;
+	struct ifaddr *ifa;
+	struct in_ifaddr *ia;
 	char ipbuf[INET_ADDRSTRLEN];
 	bool need_free = false;
 
 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
 	mutex_enter(&arp_dad_lock);
 
-	/* Sanity check */
-	if (ia == NULL) {
-		log(LOG_ERR, "%s: called with null parameter\n", __func__);
-		goto done;
-	}
-	dp = arp_dad_find(ifa);
-	if (dp == NULL) {
-		/* DAD seems to be stopping, so do nothing. */
+	ifa = dp->dad_ifa;
+	if (ifa == NULL) {
+		/* ifa is being deleted. DAD should be freed too. */
+		if (callout_pending(&dp->dad_timer_ch)) {
+			/*
+			 * The callout is scheduled again, so we cannot destroy
+			 * dp in this run.
+			 */
+			goto done;
+		}
+		need_free = true;
 		goto done;
 	}
+
+	ia = (struct in_ifaddr *)ifa;
 	if (ia->ia4_flags & IN_IFF_DUPLICATED) {
 		log(LOG_ERR, "%s: called with duplicate address %s(%s)\n",
 		    __func__, IN_PRINT(ipbuf, &ia->ia_addr.sin_addr),
@@ -1769,8 +1773,10 @@ done:
 	mutex_exit(&arp_dad_lock);
 
 	if (need_free) {
+		callout_destroy(&dp->dad_timer_ch);
 		kmem_intr_free(dp, sizeof(*dp));
-		ifafree(ifa);
+		if (ifa != NULL)
+			ifafree(ifa);
 	}
 
 	SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();

Index: src/sys/netinet6/nd6_nbr.c
diff -u src/sys/netinet6/nd6_nbr.c:1.138.6.1 src/sys/netinet6/nd6_nbr.c:1.138.6.2
--- src/sys/netinet6/nd6_nbr.c:1.138.6.1	Tue Jan  2 10:20:34 2018
+++ src/sys/netinet6/nd6_nbr.c	Fri Jan 26 15:41:12 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: nd6_nbr.c,v 1.138.6.1 2018/01/02 10:20:34 snj Exp $	*/
+/*	$NetBSD: nd6_nbr.c,v 1.138.6.2 2018/01/26 15:41:12 martin Exp $	*/
 /*	$KAME: nd6_nbr.c,v 1.61 2001/02/10 16:06:14 jinmei Exp $	*/
 
 /*
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nd6_nbr.c,v 1.138.6.1 2018/01/02 10:20:34 snj Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nd6_nbr.c,v 1.138.6.2 2018/01/26 15:41:12 martin Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_inet.h"
@@ -79,8 +79,8 @@ __KERNEL_RCSID(0, "$NetBSD: nd6_nbr.c,v 
 struct dadq;
 static struct dadq *nd6_dad_find(struct ifaddr *);
 static void nd6_dad_starttimer(struct dadq *, int);
-static void nd6_dad_stoptimer(struct dadq *);
-static void nd6_dad_timer(struct ifaddr *);
+static void nd6_dad_destroytimer(struct dadq *);
+static void nd6_dad_timer(struct dadq *);
 static void nd6_dad_ns_output(struct dadq *, struct ifaddr *);
 static void nd6_dad_ns_input(struct ifaddr *);
 static void nd6_dad_na_input(struct ifaddr *);
@@ -1087,18 +1087,18 @@ nd6_dad_starttimer(struct dadq *dp, int 
 {
 
 	callout_reset(&dp->dad_timer_ch, ticks,
-	    (void (*)(void *))nd6_dad_timer, (void *)dp->dad_ifa);
+	    (void (*)(void *))nd6_dad_timer, dp);
 }
 
 static void
-nd6_dad_stoptimer(struct dadq *dp)
+nd6_dad_destroytimer(struct dadq *dp)
 {
 
-#ifdef NET_MPSAFE
-	callout_halt(&dp->dad_timer_ch, NULL);
-#else
-	callout_halt(&dp->dad_timer_ch, softnet_lock);
-#endif
+	TAILQ_REMOVE(&dadq, dp, dad_list);
+	/* Request the timer to destroy dp. */
+	dp->dad_ifa = NULL;
+	callout_reset(&dp->dad_timer_ch, 0,
+	    (void (*)(void *))nd6_dad_timer, dp);
 }
 
 /*
@@ -1210,20 +1210,18 @@ nd6_dad_stop(struct ifaddr *ifa)
 	}
 
 	/* Prevent the timer from running anymore. */
-	TAILQ_REMOVE(&dadq, dp, dad_list);
-	mutex_exit(&nd6_dad_lock);
+	nd6_dad_destroytimer(dp);
 
-	nd6_dad_stoptimer(dp);
+	mutex_exit(&nd6_dad_lock);
 
-	kmem_intr_free(dp, sizeof(*dp));
 	ifafree(ifa);
 }
 
 static void
-nd6_dad_timer(struct ifaddr *ifa)
+nd6_dad_timer(struct dadq *dp)
 {
-	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
-	struct dadq *dp;
+	struct ifaddr *ifa;
+	struct in6_ifaddr *ia;
 	int duplicate = 0;
 	char ip6buf[INET6_ADDRSTRLEN];
 	bool need_free = false;
@@ -1231,16 +1229,21 @@ nd6_dad_timer(struct ifaddr *ifa)
 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
 	mutex_enter(&nd6_dad_lock);
 
-	/* Sanity check */
-	if (ia == NULL) {
-		log(LOG_ERR, "nd6_dad_timer: called with null parameter\n");
-		goto done;
-	}
-	dp = nd6_dad_find(ifa);
-	if (dp == NULL) {
-		/* DAD seems to be stopping, so do nothing. */
+	ifa = dp->dad_ifa;
+	if (ifa == NULL) {
+		/* ifa is being deleted. DAD should be freed too. */
+		if (callout_pending(&dp->dad_timer_ch)) {
+			/*
+			 * The callout is scheduled again, so we cannot destroy
+			 * dp in this run.
+			 */
+			goto done;
+		}
+		need_free = true;
 		goto done;
 	}
+
+	ia = (struct in6_ifaddr *)ifa;
 	if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
 		log(LOG_ERR, "nd6_dad_timer: called with duplicate address "
 			"%s(%s)\n",
@@ -1316,9 +1319,10 @@ done:
 	mutex_exit(&nd6_dad_lock);
 
 	if (need_free) {
+		callout_destroy(&dp->dad_timer_ch);
 		kmem_intr_free(dp, sizeof(*dp));
-		ifafree(ifa);
-		ifa = NULL;
+		if (ifa != NULL)
+			ifafree(ifa);
 	}
 
 	if (duplicate)
@@ -1352,9 +1356,6 @@ nd6_dad_duplicated(struct ifaddr *ifa)
 	ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
 	ia->ia6_flags |= IN6_IFF_DUPLICATED;
 
-	/* We are done with DAD, with duplicated address found. (failure) */
-	nd6_dad_stoptimer(dp);
-
 	log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
 	    if_name(ifp), IN6_PRINT(ip6buf, &ia->ia_addr.sin6_addr));
 	log(LOG_ERR, "%s: manual intervention required\n",
@@ -1397,10 +1398,11 @@ nd6_dad_duplicated(struct ifaddr *ifa)
 		}
 	}
 
-	TAILQ_REMOVE(&dadq, dp, dad_list);
+	/* We are done with DAD, with duplicated address found. (failure) */
+	nd6_dad_destroytimer(dp);
+
 	mutex_exit(&nd6_dad_lock);
 
-	kmem_intr_free(dp, sizeof(*dp));
 	ifafree(ifa);
 }
 

Reply via email to