kettenis and I have spent some time trying to track this down.

There is an obscure MP-unsafe / thread-unsafe in the W^X
line-in-the-sand mechanism, which is done using GDT CS manipulation.
It is related to sleeps in the trap function.

On PAE NX systems, the GDT CS manipulation is not neccessary, so here is
a diff to bypass all that.

The pile of KERNEL_LOCK changes mechanically remove the need for the NX
codepath to grab the lock -- they just get moved inside the non-NX fixup
function.

We will need to fix the non-NX case seperately.  Such systems are
quite rare now.  The bug probably doesn't affect single-cpu i386.


Index: i386/machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/i386/i386/machdep.c,v
retrieving revision 1.639
diff -u -p -u -r1.639 machdep.c
--- i386/machdep.c      13 Sep 2020 05:57:28 -0000      1.639
+++ i386/machdep.c      23 Sep 2020 13:06:57 -0000
@@ -2955,15 +2955,21 @@ setregs(struct proc *p, struct exec_pack
        p->p_md.md_flags &= ~MDP_USEDFPU;
 #endif
 
-       /*
-        * Reset the code segment limit to I386_MAX_EXE_ADDR in the pmap;
-        * this gets copied into the GDT for GUCODE_SEL by pmap_activate().
-        * Similarly, reset the base of each of the two thread data
-        * segments to zero in the pcb; they'll get copied into the
-        * GDT for GUFS_SEL and GUGS_SEL.
-        */
-       setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR) - 1,
-           SDT_MEMERA, SEL_UPL, 1, 1);
+       if (cpu_pae) {
+               setsegment(&pmap->pm_codeseg, 0, atop(VM_MAXUSER_ADDRESS - 1),
+                   SDT_MEMERA, SEL_UPL, 1, 1);
+       } else {
+               /*
+                * For pmap.c's non-PAE/NX line-in-the-sand execution,
+                * reset the code segment limit to I386_MAX_EXE_ADDR in the 
pmap;
+                * this gets copied into the GDT for GUCODE_SEL by 
pmap_activate().
+                * Similarly, reset the base of each of the two thread data
+                * segments to zero in the pcb; they'll get copied into the
+                * GDT for GUFS_SEL and GUGS_SEL.
+                */
+               setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR - 1),
+                   SDT_MEMERA, SEL_UPL, 1, 1);
+       }
        setsegment(&pcb->pcb_threadsegs[TSEG_FS], 0,
            atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
        setsegment(&pcb->pcb_threadsegs[TSEG_GS], 0,
Index: i386/pmap.c
===================================================================
RCS file: /cvs/src/sys/arch/i386/i386/pmap.c,v
retrieving revision 1.207
diff -u -p -u -r1.207 pmap.c
--- i386/pmap.c 23 Sep 2020 15:13:26 -0000      1.207
+++ i386/pmap.c 23 Sep 2020 16:15:22 -0000
@@ -582,6 +582,9 @@ pmap_exec_account(struct pmap *pm, vaddr
        if ((opte ^ npte) & PG_X)
                pmap_tlb_shootpage(pm, va);
                        
+       if (cpu_pae)
+               return;
+
        /*
         * Executability was removed on the last executable change.
         * Reset the code segment to something conservative and
@@ -614,6 +617,8 @@ pmap_exec_fixup(struct vm_map *map, stru
        vaddr_t va = 0;
        vaddr_t pm_cs, gdt_cs;
 
+       KERNEL_LOCK();
+
        vm_map_lock(map);
        RBT_FOREACH_REVERSE(ent, uvm_map_addr, &map->addr) {
                if (ent->protection & PROT_EXEC)
@@ -640,6 +645,7 @@ pmap_exec_fixup(struct vm_map *map, stru
         */
        if (va <= pm->pm_hiexec && pm_cs == pm->pm_hiexec &&
            gdt_cs == pm->pm_hiexec) {
+               KERNEL_UNLOCK();
                return (0);
        }
 
@@ -652,6 +658,7 @@ pmap_exec_fixup(struct vm_map *map, stru
         */
        setcslimit(pm, tf, pcb, va);
 
+       KERNEL_UNLOCK();
        return (1);
 }
 
@@ -1344,8 +1351,12 @@ pmap_create(void)
        pmap->pm_hiexec = 0;
        pmap->pm_flags = 0;
 
-       setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR) - 1,
-           SDT_MEMERA, SEL_UPL, 1, 1);
+       if (cpu_pae)
+               setsegment(&pmap->pm_codeseg, 0, atop(VM_MAXUSER_ADDRESS - 1),
+                   SDT_MEMERA, SEL_UPL, 1, 1);
+       else
+               setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR - 1),
+                   SDT_MEMERA, SEL_UPL, 1, 1);
 
        pmap_pinit_pd(pmap);
        return (pmap);
Index: i386/trap.c
===================================================================
RCS file: /cvs/src/sys/arch/i386/i386/trap.c,v
retrieving revision 1.145
diff -u -p -u -r1.145 trap.c
--- i386/trap.c 15 Sep 2020 09:30:31 -0000      1.145
+++ i386/trap.c 23 Sep 2020 16:14:15 -0000
@@ -258,18 +258,14 @@ trap(struct trapframe *frame)
                return;
 
        case T_PROTFLT|T_USER:          /* protection fault */
-               KERNEL_LOCK();
-
                /* If pmap_exec_fixup does something, let's retry the trap. */
-               if (pmap_exec_fixup(&p->p_vmspace->vm_map, frame,
-                   &p->p_addr->u_pcb)) {
-                       KERNEL_UNLOCK();
+               if (cpu_pae == 0 &&
+                   pmap_exec_fixup(&p->p_vmspace->vm_map, frame,
+                   &p->p_addr->u_pcb))
                        goto out;
-               }
 
                sv.sival_int = frame->tf_eip;
                trapsignal(p, SIGSEGV, type &~ T_USER, SEGV_MAPERR, sv);
-               KERNEL_UNLOCK();
                goto out;
 
        case T_TSSFLT|T_USER:

Reply via email to