On Tue, Jun 20, 2023 at 08:35:11AM -0600, Theo de Raadt wrote:
> Claudio Jeker <[email protected]> wrote:
>
> > On Mon, Jun 19, 2023 at 06:41:14PM -0500, Scott Cheloha wrote:
> > > > On Jun 19, 2023, at 18:07, Theo de Raadt <[email protected]> wrote:
> > > >
> > > > ??? Make sure to STOP all kernel profiling before attempting to
> > > > suspend or hibernate your machine. Otherwise I expect it
> > > > will hang.
> > > >
> > > > It is completely acceptable if it produces wrong results, but it must
> > > > not hang the system.
> > >
> > > The hang is present in -current, with or
> > > without this patch.
> > >
> > > I am working to figure it out.
> >
> > I don't think the suspend or hibernate code has any code to disable
> > kernel profiling. This is bad since these code paths are extremly
> > sensitive and try not to do side-effects.
> >
> > So in the suspend/hibernate code path we should disable profiling early
> > on. It makes no sense to try to run gprof collection in those code paths.
>
> Yes, that's right.
>
> It will be somewhere in kern/subr_suspend.c
>
> Be careful that the "stop profiling" and "restart profiling" are at the
> correct places. The sleep_state() function has a bunch of unrolling
> goto's which are not 100% reflexive, so be careful.
Judging from the blinking light on my laptop, the crash is in the
resume path.
This patch appears to fix the problem on amd64:
- Add a new guard variable, gmon_suspended,
- Toggle gmon_suspended off at the top of sleep_state(), and
- Toggle gmon_suspended back on at the bottom of sleep_state().
With this applied, I can suspend/resume and hibernate/unhibernate a an
amd64/GENERIC.MP kernel w/ GPROF without issue, even if kgmon(8) has
enabled kernel profiling and increased the effective statclock
frequency.
Index: sys/kern/subr_prof.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_prof.c,v
retrieving revision 1.35
diff -u -p -r1.35 subr_prof.c
--- sys/kern/subr_prof.c 2 Jun 2023 17:44:29 -0000 1.35
+++ sys/kern/subr_prof.c 23 Jun 2023 21:25:51 -0000
@@ -34,6 +34,7 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/atomic.h>
#include <sys/pledge.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
@@ -59,6 +60,31 @@ int gmoninit = 0;
u_int gmon_cpu_count; /* [K] number of CPUs with profiling enabled */
extern char etext[];
+
+/*
+ * The suspend and hibernate paths need to be free
+ * of side effects. Keep all CPUs out of _mcount()
+ * while a suspend/resume is ongoing.
+ */
+#ifdef SUSPEND
+volatile int gmon_suspended;
+
+void
+prof_resume(void)
+{
+ KASSERT(CPU_IS_PRIMARY(curcpu()));
+ gmon_suspended = 0;
+ membar_producer();
+}
+
+void
+prof_suspend(void)
+{
+ KASSERT(CPU_IS_PRIMARY(curcpu()));
+ gmon_suspended = 1;
+ membar_producer();
+}
+#endif /* SUSPEND */
void
prof_init(void)
Index: sys/kern/subr_suspend.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_suspend.c,v
retrieving revision 1.14
diff -u -p -r1.14 subr_suspend.c
--- sys/kern/subr_suspend.c 10 Nov 2022 10:37:40 -0000 1.14
+++ sys/kern/subr_suspend.c 23 Jun 2023 21:25:51 -0000
@@ -20,6 +20,7 @@
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/clockintr.h>
+#include <sys/gmon.h>
#include <sys/reboot.h>
#include <sys/sensors.h>
#include <sys/sysctl.h>
@@ -63,6 +64,11 @@ top:
if (sleep_showstate(v, sleepmode))
return EOPNOTSUPP;
+
+#if defined(GPROF) || defined(DDBPROF)
+ prof_suspend();
+#endif
+
#if NWSDISPLAY > 0
wsdisplay_suspend();
#endif
@@ -193,6 +199,9 @@ fail_hiballoc:
start_periodic_resettodr();
#if NWSDISPLAY > 0
wsdisplay_resume();
+#endif
+#if defined(GPROF) || defined(DDBPROF)
+ prof_resume();
#endif
sys_sync(curproc, NULL, NULL);
if (cpu_setperf != NULL)
Index: sys/sys/gmon.h
===================================================================
RCS file: /cvs/src/sys/sys/gmon.h,v
retrieving revision 1.9
diff -u -p -r1.9 gmon.h
--- sys/sys/gmon.h 11 Jan 2022 23:59:55 -0000 1.9
+++ sys/sys/gmon.h 23 Jun 2023 21:25:51 -0000
@@ -158,6 +158,13 @@ struct gmonparam {
#ifdef _KERNEL
extern int gmoninit; /* Is the kernel ready for being profiled? */
+#ifdef SUSPEND
+extern volatile int gmon_suspended; /* Ongoing suspend/resume? */
+
+void prof_resume(void);
+void prof_suspend(void);
+#endif
+
#else /* !_KERNEL */
#include <sys/cdefs.h>
Index: sys/lib/libkern/mcount.c
===================================================================
RCS file: /cvs/src/sys/lib/libkern/mcount.c,v
retrieving revision 1.14
diff -u -p -r1.14 mcount.c
--- sys/lib/libkern/mcount.c 11 Jan 2022 09:21:34 -0000 1.14
+++ sys/lib/libkern/mcount.c 23 Jun 2023 21:25:51 -0000
@@ -64,6 +64,10 @@ _MCOUNT_DECL(u_long frompc, u_long selfp
if (gmoninit == 0)
return;
+ /* Don't profile execution during suspend/resume. */
+ if (gmon_suspended)
+ return;
+
if ((p = curcpu()->ci_gmon) == NULL)
return;
#else
Index: lib/libc/gmon/mcount.c
===================================================================
RCS file: /cvs/src/lib/libc/gmon/mcount.c,v
retrieving revision 1.16
diff -u -p -r1.16 mcount.c
--- lib/libc/gmon/mcount.c 11 Jan 2022 09:21:34 -0000 1.16
+++ lib/libc/gmon/mcount.c 23 Jun 2023 21:25:51 -0000
@@ -62,6 +62,10 @@ _MCOUNT_DECL(u_long frompc, u_long selfp
if (gmoninit == 0)
return;
+ /* Don't profile execution during suspend/resume. */
+ if (prof_is_suspended())
+ return;
+
if ((p = curcpu()->ci_gmon) == NULL)
return;
#else