I've noted that cpu_lwp_free in the current design must not sleep:

src/src/sys/kern/kern_lwp.c:1138

        /*
         * We can no longer block.  At this point, lwp_free() may already
         * be gunning for us.  On a multi-CPU system, we may be off p_lwps.
         *
         * Free MD LWP resources.
         */
cpu_lwp_free(l, 0);


src/src/sys/kern/kern_exit.c:587

        /* Verify that we hold no locks other than the kernel lock. */
        LOCKDEBUG_BARRIER(&kernel_lock, 0);
        /*
         * NOTE: WE ARE NO LONGER ALLOWED TO SLEEP!
         */
        /*
         * Give machine-dependent code a chance to free any MD LWP
         * resources.  This must be done before uvm_lwp_exit(), in
         * case these resources are in the PCB.
         */
cpu_lwp_free(l, 1);


In the following ports we have sleepable locks

1. HPPA

sys/arch/hppa/hppa/vm_machdep.c:

    183 void
    184 cpu_lwp_free(struct lwp *l, int proc)
    185 {
    186         struct pcb *pcb = lwp_getpcb(l);
    187
    188         /*
    189          * If this thread was using the FPU, disable the FPU and record
    190          * that it's unused.
    191          */
    192
    193         hppa_fpu_flush(l);
    194         pool_put(&hppa_fppl, pcb->pcb_fpregs);
    195 }

pool_put is calling mutex internally src/src/sys/kern/subr_pool.c:

void
pool_put(struct pool *pp, void *v)
{
        struct pool_pagelist pq;
        LIST_INIT(&pq);
        mutex_enter(&pp->pr_lock);
        pool_do_put(pp, v, &pq);
        mutex_exit(&pp->pr_lock);
        pr_pagelist_free(pp, &pq);
}

2. SPARC

src/sys/arch/sparc/sparc/vm_machdep.c:

    291 /*
    292  * Cleanup FPU state.
    293  */
    294 void
    295 cpu_lwp_free(struct lwp *l, int proc)
    296 {
    297         struct fpstate *fs;
    298
    299         if ((fs = l->l_md.md_fpstate) != NULL) {
    300                 struct cpu_info *cpi;
    301                 int s;
    302
    303                 FPU_LOCK(s);
    304                 if ((cpi = l->l_md.md_fpu) != NULL) {
    305                         if (cpi->fplwp != l)
    306                                 panic("FPU(%d): fplwp %p",
    307                                         cpi->ci_cpuid, cpi->fplwp);
    308                         if (l == cpuinfo.fplwp)
    309                                 savefpstate(fs);
    310 #if defined(MULTIPROCESSOR)
    311                         else
    312                                 XCALL1(ipi_savefpstate, fs, 1 << 
cpi->ci_cpuid);
    313 #endif
    314                         cpi->fplwp = NULL;
    315                 }
    316                 l->l_md.md_fpu = NULL;
    317                 FPU_UNLOCK(s);
    318         }
    319 }

FPU_LOCK() and FPU_UNLOCK wrap regular mutex
src/sys/arch/sparc/include/proc.h:

     65 /*
     66  * FPU context switch lock
     67  * Prevent interrupts that grab the kernel lock
     68  * XXX mrg: remove (s) argument
     69  */
     70 extern kmutex_t fpu_mtx;
     71
     72 #define FPU_LOCK(s)             do {    \
     73         (void)&(s);                     \
     74         mutex_enter(&fpu_mtx);          \
     75 } while (/* CONSTCOND */ 0)
     76
     77 #define FPU_UNLOCK(s)           do {    \
     78         mutex_exit(&fpu_mtx);           \
     79 } while (/* CONSTCOND */ 0)
     80 #endif

My understanding is that these calls should be moved to cpu_lwp_free2.

I was following similar approach from hppa on amd64 with 8 CPUs and I
was able to trigger - during distribution build - crash, that LWP went
to sleep on lock and once was woken up it faced LSZOMB kernel assertions.

Attachment: signature.asc
Description: OpenPGP digital signature

Reply via email to