Module Name: src
Committed By: ad
Date: Sat May 23 17:08:05 UTC 2009
Modified Files:
src/sys/kern: kern_lock.c kern_tc.c
src/sys/sys: lwp.h
Log Message:
- Add lwp_pctr(), get an LWP's preemption/ctxsw counter.
- Fix a preemption bug in CURCPU_IDLE_P() that can lead to a bogus
assertion failure on DEBUG kernels.
- Fix MP/preemption races with timecounter detachment.
To generate a diff of this commit:
cvs rdiff -u -r1.147 -r1.148 src/sys/kern/kern_lock.c
cvs rdiff -u -r1.38 -r1.39 src/sys/kern/kern_tc.c
cvs rdiff -u -r1.117 -r1.118 src/sys/sys/lwp.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/kern/kern_lock.c
diff -u src/sys/kern/kern_lock.c:1.147 src/sys/kern/kern_lock.c:1.148
--- src/sys/kern/kern_lock.c:1.147 Wed Nov 12 12:36:16 2008
+++ src/sys/kern/kern_lock.c Sat May 23 17:08:04 2009
@@ -1,7 +1,7 @@
-/* $NetBSD: kern_lock.c,v 1.147 2008/11/12 12:36:16 ad Exp $ */
+/* $NetBSD: kern_lock.c,v 1.148 2009/05/23 17:08:04 ad Exp $ */
/*-
- * Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc.
+ * Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.147 2008/11/12 12:36:16 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.148 2009/05/23 17:08:04 ad Exp $");
#include <sys/param.h>
#include <sys/proc.h>
@@ -42,6 +42,7 @@
#include <sys/cpu.h>
#include <sys/syslog.h>
#include <sys/atomic.h>
+#include <sys/lwp.h>
#include <machine/stdarg.h>
#include <machine/lock.h>
@@ -59,6 +60,8 @@
assert_sleepable(void)
{
const char *reason;
+ uint64_t pctr;
+ bool idle;
if (panicstr != NULL) {
return;
@@ -66,14 +69,23 @@
LOCKDEBUG_BARRIER(kernel_lock, 1);
+ /*
+ * Avoid disabling/re-enabling preemption here since this
+ * routine may be called in delicate situatations.
+ */
+ do {
+ pctr = lwp_pctr();
+ idle = CURCPU_IDLE_P();
+ } while (pctr != lwp_pctr());
+
reason = NULL;
- if (CURCPU_IDLE_P() && !cold) {
+ if (idle && !cold) {
reason = "idle";
}
if (cpu_intr_p()) {
reason = "interrupt";
}
- if ((curlwp->l_pflag & LP_INTR) != 0) {
+ if (cpu_softintr_p()) {
reason = "softint";
}
Index: src/sys/kern/kern_tc.c
diff -u src/sys/kern/kern_tc.c:1.38 src/sys/kern/kern_tc.c:1.39
--- src/sys/kern/kern_tc.c:1.38 Sun Jan 11 02:45:52 2009
+++ src/sys/kern/kern_tc.c Sat May 23 17:08:04 2009
@@ -1,9 +1,12 @@
-/* $NetBSD: kern_tc.c,v 1.38 2009/01/11 02:45:52 christos Exp $ */
+/* $NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $ */
/*-
- * Copyright (c) 2008 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
* All rights reserved.
*
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Andrew Doran.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -37,7 +40,7 @@
#include <sys/cdefs.h>
/* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
-__KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.38 2009/01/11 02:45:52 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $");
#include "opt_ntp.h"
@@ -54,6 +57,7 @@
#include <sys/kauth.h>
#include <sys/mutex.h>
#include <sys/atomic.h>
+#include <sys/xcall.h>
/*
* A large step happens on boot. This constant detects such steps.
@@ -126,6 +130,7 @@
kmutex_t timecounter_lock;
static u_int timecounter_mods;
+static volatile int timecounter_removals = 1;
static u_int timecounter_bad;
#ifdef __FreeBSD__
@@ -309,15 +314,49 @@
binuptime(struct bintime *bt)
{
struct timehands *th;
- u_int gen;
+ lwp_t *l;
+ u_int lgen, gen;
TC_COUNT(nbinuptime);
+
+ /*
+ * Provide exclusion against tc_detach().
+ *
+ * We record the number of timecounter removals before accessing
+ * timecounter state. Note that the LWP can be using multiple
+ * "generations" at once, due to interrupts (interrupted while in
+ * this function). Hardware interrupts will borrow the interrupted
+ * LWP's l_tcgen value for this purpose, and can themselves be
+ * interrupted by higher priority interrupts. In this case we need
+ * to ensure that the oldest generation in use is recorded.
+ *
+ * splsched() is too expensive to use, so we take care to structure
+ * this code in such a way that it is not required. Likewise, we
+ * do not disable preemption.
+ *
+ * Memory barriers are also too expensive to use for such a
+ * performance critical function. The good news is that we do not
+ * need memory barriers for this type of exclusion, as the thread
+ * updating timecounter_removals will issue a broadcast cross call
+ * before inspecting our l_tcgen value (this elides memory ordering
+ * issues).
+ */
+ l = curlwp;
+ lgen = l->l_tcgen;
+ if (__predict_true(lgen == 0)) {
+ l->l_tcgen = timecounter_removals;
+ }
+ __insn_barrier();
+
do {
th = timehands;
gen = th->th_generation;
*bt = th->th_offset;
bintime_addx(bt, th->th_scale * tc_delta(th));
} while (gen == 0 || gen != th->th_generation);
+
+ __insn_barrier();
+ l->l_tcgen = lgen;
}
void
@@ -543,8 +582,11 @@
{
struct timecounter *tc;
struct timecounter **tcp = NULL;
- int rc = 0;
+ int removals;
+ uint64_t where;
+ lwp_t *l;
+ /* First, find the timecounter. */
mutex_spin_enter(&timecounter_lock);
for (tcp = &timecounters, tc = timecounters;
tc != NULL;
@@ -553,17 +595,62 @@
break;
}
if (tc == NULL) {
- rc = ESRCH;
- } else {
- *tcp = tc->tc_next;
- if (timecounter == target) {
- tc_pick();
- tc_windup();
- }
- timecounter_mods++;
+ mutex_spin_exit(&timecounter_lock);
+ return ESRCH;
+ }
+
+ /* And now, remove it. */
+ *tcp = tc->tc_next;
+ if (timecounter == target) {
+ tc_pick();
+ tc_windup();
}
+ timecounter_mods++;
+ removals = timecounter_removals++;
mutex_spin_exit(&timecounter_lock);
- return rc;
+
+ /*
+ * We now have to determine if any threads in the system are still
+ * making use of this timecounter.
+ *
+ * We issue a broadcast cross call to elide memory ordering issues,
+ * then scan all LWPs in the system looking at each's timecounter
+ * generation number. We need to see a value of zero (not actively
+ * using a timecounter) or a value greater than our removal value.
+ *
+ * We may race with threads that read `timecounter_removals' and
+ * and then get preempted before updating `l_tcgen'. This is not
+ * a problem, since it means that these threads have not yet started
+ * accessing timecounter state. All we do need is one clean
+ * snapshot of the system where every thread appears not to be using
+ * old timecounter state.
+ */
+ for (;;) {
+ where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
+ xc_wait(where);
+
+ mutex_enter(proc_lock);
+ LIST_FOREACH(l, &alllwp, l_list) {
+ if (l->l_tcgen == 0 || l->l_tcgen > removals) {
+ /*
+ * Not using timecounter or old timecounter
+ * state at time of our xcall or later.
+ */
+ continue;
+ }
+ break;
+ }
+ mutex_exit(proc_lock);
+
+ /*
+ * If the timecounter is still in use, wait at least 10ms
+ * before retrying.
+ */
+ if (l == NULL) {
+ return 0;
+ }
+ (void)kpause("tcdetach", false, mstohz(10), NULL);
+ }
}
/* Report the frequency of the current timecounter. */
Index: src/sys/sys/lwp.h
diff -u src/sys/sys/lwp.h:1.117 src/sys/sys/lwp.h:1.118
--- src/sys/sys/lwp.h:1.117 Wed Feb 4 21:17:39 2009
+++ src/sys/sys/lwp.h Sat May 23 17:08:05 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: lwp.h,v 1.117 2009/02/04 21:17:39 ad Exp $ */
+/* $NetBSD: lwp.h,v 1.118 2009/05/23 17:08:05 ad Exp $ */
/*-
* Copyright (c) 2001, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -173,6 +173,8 @@
uintptr_t l_pfailaddr; /* !: for kernel preemption */
uintptr_t l_pfaillock; /* !: for kernel preemption */
_TAILQ_HEAD(,struct lockdebug,volatile) l_ld_locks;/* !: locks held by LWP */
+ int l_tcgen; /* !: for timecounter removal */
+ int l_unused2; /* !: for future use */
/* These are only used by 'options SYSCALL_TIMES' */
uint32_t l_syscall_time; /* !: time epoch for current syscall */
@@ -306,6 +308,7 @@
void lwp_free(lwp_t *, bool, bool);
void lwp_sys_init(void);
u_int lwp_unsleep(lwp_t *, bool);
+uint64_t lwp_pctr(void);
int lwp_specific_key_create(specificdata_key_t *, specificdata_dtor_t);
void lwp_specific_key_delete(specificdata_key_t);