Thanks a lot for figuring that out. This is awesome!
On 24/07/24(Wed) 16:19, Claudio Jeker wrote:
> On Fri, Jun 21, 2024 at 01:24:27PM +0200, Martin Pieuchot wrote:
> > So I'm trying to see where the remaining sched_yield() are coming from
> > ld(1):
> >
> > $ cd /sys/arch/arm64/compile/GENERIC.MP
> > $ LD="egdb --args ld" make -j32
> >
> > Then I add a breakpoint on sched_yield & hit run.
> >
> > As soon as the first thread is stopped, I can see the trace as usual,
> > however the process is now in a "stopped" state, impossible to kill or
> > continue. Even ddb's kill command doesn't help.
>
> So this is caused by multiple issues in ptrace, the single thread API
> and the sleep machinery. The fundamental issue is that the ps_singlecnt
> gets off and egdb hangs in single_thread_wait().
>
> The issue at hand is that single_thread_check() for suspends while in deep
> is just not right. It will decrement ps_singlecnt but if the proc was
> already in SSLEEP then that decrement was already done in
> single_thread_set().
I don't fully understand. Are you saying this is a race because two
threads are in the middle of sleep_setup()? One is flagged as SSLEEP
but it currently running and that is why the decrement in single_thread_set()
is wrong an causes an off-by-one?
> Here is a possible solution to this problem by adding yet another proc
> flag to track the time between sleep_setup and single_thread_check (via
> sleep_signal_check). Using this information single_thread_set() can be
> adjusted to no decrement ps_singlecnt in that case.
I wonder if the road to simplify sleep_setup() doesn't start with setting
`p_stat' to something other than SSLEEP when the thread is placed on the
sleeping queue. It seems to me that most of the complexity of the various
`p_stat' machinery are due to the fact that a thread in SSLEEP might not be
sleeping.
That's more or less what the existing P_WSLEEP flag is for. Can't we
use that instead of adding P_SINGLESLEEP?
> On top of this I had to fix ptrace to better respect gdb's request to
> progress a single thread or all threads. Without that I just constantly
> hit egdb asserts because of unexpected progress of unnrelated threads.
This is another can of worms worth a complete different thread...
Thanks for digging it!
That means single_thread_set() can now be called twice in a row with all
sibling already parked right?
I doubt the single_thread API is the right tool for PS_TRACED. Stopped
& traced threads are not parked. The tracing API already know which
thread(s) it wants to resume. Is it possible with the single_thread API
to resume a single thread which is not the one that is currently ps_single?
Shouldn't we let the tracing process decided if it unstop on or all
thread directly?
However if we need to communicate more information between the tracing &
traced processes, then I'd suggest extending `ps_xsig' rather than add
a P_TRACESINGLE flag.
> With this I can add the sched_yield break point and continue through the
> full run. Does this break other stuff?
> Most probably :) there is a lot of bits that are not quite right when it
> comes to signals and multiple threads.
Comments below.
> Index: kern/kern_sig.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sig.c,v
> diff -u -p -r1.333 kern_sig.c
> --- kern/kern_sig.c 22 Jul 2024 09:43:47 -0000 1.333
> +++ kern/kern_sig.c 24 Jul 2024 14:14:06 -0000
> @@ -851,7 +851,9 @@ trapsignal(struct proc *p, int signum, u
> SCHED_UNLOCK();
>
> signum = pr->ps_xsig;
> - single_thread_clear(p, 0);
> + if ((p->p_flag & P_TRACESINGLE) == 0)
> + single_thread_clear(p, 0);
> + atomic_clearbits_int(&p->p_flag, P_TRACESINGLE);
>
> /*
> * If we are no longer being traced, or the parent
> @@ -1370,7 +1372,9 @@ cursig(struct proc *p, struct sigctx *sc
> atomic_clearbits_int(&pr->ps_siglist, mask);
> }
>
> - single_thread_clear(p, 0);
> + if ((p->p_flag & P_TRACESINGLE) == 0)
> + single_thread_clear(p, 0);
> + atomic_clearbits_int(&p->p_flag, P_TRACESINGLE);
>
> /*
> * If we are no longer being traced, or the parent
> @@ -2077,6 +2081,9 @@ single_thread_check_locked(struct proc *
>
> MUTEX_ASSERT_LOCKED(&pr->ps_mtx);
>
> + if (deep)
> + atomic_clearbits_int(&p->p_flag, P_SINGLESLEEP);
I don't understand why this is needed and how that fits in the
description of the problem.
> if (pr->ps_single == NULL || pr->ps_single == p)
> return (0);
>
> @@ -2177,16 +2184,23 @@ single_thread_set(struct proc *p, int fl
> if (mode == SINGLE_EXIT) {
> unsleep(q);
> setrunnable(q);
> - } else
> + } else {
> --pr->ps_singlecnt;
> + }
> break;
> case SSLEEP:
> /* if it's not interruptible, then just have to wait */
> if (q->p_flag & P_SINTR) {
> /* merely need to suspend? just stop it */
> if (mode == SINGLE_SUSPEND) {
> + /*
> + * if between sleep_setup and
> + * sleep_signal_check don't count us
> + * out.
> + */
> + if ((q->p_flag & P_SINGLESLEEP) == 0)
> + --pr->ps_singlecnt;
> q->p_stat = SSTOP;
> - --pr->ps_singlecnt;
> break;
> }
> /* need to unwind or exit, so wake it */
> @@ -2263,6 +2277,8 @@ single_thread_clear(struct proc *p, int
> */
> SCHED_LOCK();
> if (q->p_stat == SSTOP && (q->p_flag & flag) == 0) {
> + if (flag == 0)
> + atomic_clearbits_int(&q->p_flag, P_SUSPSIG);
OMG that's scary. I dislike the fact that the single_thread API now
grows signal logic. I don't understand why this is needed. If a thread
has been stopped by signal, we should clear that somewhere else.
> if (q->p_wchan == NULL)
> setrunnable(q);
> else {
> Index: kern/kern_synch.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_synch.c,v
> diff -u -p -r1.206 kern_synch.c
> --- kern/kern_synch.c 23 Jul 2024 08:38:02 -0000 1.206
> +++ kern/kern_synch.c 24 Jul 2024 14:14:06 -0000
> @@ -356,7 +356,7 @@ sleep_setup(const volatile void *ident,
> atomic_setbits_int(&p->p_flag, P_WSLEEP);
> TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
> if (prio & PCATCH)
> - atomic_setbits_int(&p->p_flag, P_SINTR);
> + atomic_setbits_int(&p->p_flag, P_SINTR | P_SINGLESLEEP);
> p->p_stat = SSLEEP;
>
> SCHED_UNLOCK();
> @@ -399,15 +399,18 @@ sleep_finish(int timo, int do_sleep)
> */
> if (p->p_wchan == NULL)
> do_sleep = 0;
> + KASSERT((p->p_flag & P_SINGLESLEEP) == 0);
> atomic_clearbits_int(&p->p_flag, P_WSLEEP);
>
> + /* If requested to stop always force a stop even if do_sleep == 0 */
> + if (p->p_stat == SSTOP)
> + do_sleep = 1;
This is also scary. The problem with the current scheme is that we don't
know who changed `p_stat' and if we already did our context switch or
not.
> if (do_sleep) {
> KASSERT(p->p_stat == SSLEEP || p->p_stat == SSTOP);
> p->p_ru.ru_nvcsw++;
> mi_switch();
> } else {
> - KASSERT(p->p_stat == SONPROC || p->p_stat == SSLEEP ||
> - p->p_stat == SSTOP);
> + KASSERT(p->p_stat == SONPROC || p->p_stat == SSLEEP);
> unsleep(p);
> p->p_stat = SONPROC;
> }
> @@ -420,10 +423,7 @@ sleep_finish(int timo, int do_sleep)
> p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
> SCHED_UNLOCK();
>
> - /*
> - * Even though this belongs to the signal handling part of sleep,
> - * we need to clear it before the ktrace.
> - */
> + /* Must clear this before hitting another sleep point. */
> atomic_clearbits_int(&p->p_flag, P_SINTR);
Nothing related but I believe the P_SINTR comment above cursig()
regarding locking is no longer true, right?
>
> if (timo != 0) {
> Index: kern/sys_process.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_process.c,v
> diff -u -p -r1.98 sys_process.c
> --- kern/sys_process.c 3 Jun 2024 12:48:25 -0000 1.98
> +++ kern/sys_process.c 24 Jul 2024 14:14:06 -0000
> @@ -441,6 +441,8 @@ ptrace_ctrl(struct proc *p, int req, pid
>
> if (pid < THREAD_PID_OFFSET && tr->ps_single)
> t = tr->ps_single;
> + else
> + atomic_setbits_int(&t->p_flag, P_TRACESINGLE);
>
> /* If the address parameter is not (int *)1, set the pc. */
> if ((int *)addr != (int *)1)
> Index: sys/proc.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/proc.h,v
> diff -u -p -r1.365 proc.h
> --- sys/proc.h 22 Jul 2024 09:43:47 -0000 1.365
> +++ sys/proc.h 24 Jul 2024 14:14:06 -0000
> @@ -430,9 +430,11 @@ struct proc {
> #define P_SIGSUSPEND 0x00000008 /* Need to restore
> before-suspend mask*/
> #define P_CANTSLEEP 0x00000010 /* insomniac thread */
> #define P_WSLEEP 0x00000020 /* Working on going to sleep. */
> +#define P_SINGLESLEEP 0x00000040 /* Like P_WSLEEP for single
> thread api */
> #define P_SINTR 0x00000080 /* Sleep is interruptible. */
> #define P_SYSTEM 0x00000200 /* No sigs, stats or swapping.
> */
> #define P_TIMEOUT 0x00000400 /* Timing out during sleep. */
> +#define P_TRACESINGLE 0x00001000 /* keep single threaded
> ptraced. */
> #define P_WEXIT 0x00002000 /* Working on exiting. */
> #define P_OWEUPC 0x00008000 /* Owe proc an addupc() at next
> ast. */
> #define P_SUSPSINGLE 0x00080000 /* Need to stop for single
> threading. */
> @@ -443,9 +445,9 @@ struct proc {
>
> #define P_BITS \
> ("\20" "\01INKTR" "\02PROFPEND" "\03ALRMPEND" "\04SIGSUSPEND" \
> - "\05CANTSLEEP" "\06WSLEEP" "\010SINTR" "\012SYSTEM" "\013TIMEOUT" \
> - "\016WEXIT" "\020OWEUPC" "\024SUSPSINGLE" "\030CONTINUED" "\033THREAD" \
> - "\034SUSPSIG" "\037CPUPEG")
> + "\05CANTSLEEP" "\06WSLEEP" "\07SINGLESLEEP" "\010SINTR" "\012SYSTEM" \
> + "\013TIMEOUT" "\015TRACESINGLE" "\016WEXIT" "\020OWEUPC"
> "\024SUSPSINGLE" \
> + "\030CONTINUED" "\033THREAD" "\034SUSPSIG" "\037CPUPEG")
>
> #define THREAD_PID_OFFSET 100000
>