On Sun, Oct 31, 2010 at 17:10 -0700, Philip Guenther wrote:
> Okay, enough cleanup; time to simplify the TSS handling by moving from
> TSS-per-process to TSS-per-CPU. This eliminates the need to ever change
> GDT entries after startup and the associated ~4k process limit.
>
> Where do we place that TSS? Before, it was part of the PCB; here I've put
> the primary CPU's TSS between the IDT and the primary CPU's GDT. For
> secondary CPU's I place it right after the CPU's GDT. The exact location
> isn't important, they just need to be in unpageable, unmanaged memory like
> the GDT, so putting it next to the GDT is convenient.
>
> With the TSS being per-CPU, we no longer need to change the 'task busy'
> flag or load the task register when doing a context switch. Instead, we
> just poke into the TSS the pointer to the process's kernel stack.
>
> The TSS also contains the pointer to the stack used for double faults.
> Previously, this was placed one page above each process's PCB. Rather
> than change that on each context switch, I've just left it set to one page
> above each CPU's idle process's PCB.
i failed to find the code that does this. could you please point me to it.
is it supposed to be referenced by the tss_ist[1]?
> I think we can reduce the size of
> normal processes' USPACE by a page, or maybe even two, what with the PCB
> shrinking, but that can wait.
>
> To increase speed and paranoia slightly, I've inlined pmap_activate() and
> pmap_deactivate() into the asm cpu_switchto routine. That let me add a
> 'free' check for the new pmap already being marked as active on the CPU;
> that check earlier caught some sloppy handling of that bitmap.
>
> Garbage collect the hasn't-been-used-in-years GDT update IPI.
>
>
> As before, I would like to hear tests for both real AMD and Intel CPUs;
> thank you to everyone that sent results before!
>
>
> Philip Guenther
>
>
you might also want to remove disabled amd64_{get,set}_ioperm code.
some comments inline, otherwise looks good to me. i'm running it on
my core i5.
> Index: amd64/locore.S
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v
> retrieving revision 1.42
> diff -u -p -r1.42 locore.S
> --- amd64/locore.S 26 Oct 2010 05:49:10 -0000 1.42
> +++ amd64/locore.S 31 Oct 2010 23:04:12 -0000
> @@ -727,6 +727,7 @@ ENTRY(cpu_switchto)
> pushq %r15
>
> movq %rdi, %r13
> + movq %rdi, %r15
why do you need to load 'r15' with 'rdi'?
it looks like a dead store to me
> movq %rsi, %r12
>
> #ifdef DIAGNOSTIC
> @@ -741,6 +742,8 @@ ENTRY(cpu_switchto)
> movb $SONPROC,P_STAT(%r12) # p->p_stat = SONPROC
> SET_CURPROC(%r12,%rcx)
>
> + movl CPUVAR(CPUID),%edi
> +
> /* If old proc exited, don't bother. */
> testq %r13,%r13
> jz switch_exited
> @@ -752,13 +755,16 @@ ENTRY(cpu_switchto)
> * %rax, %rcx - scratch
> * %r13 - old proc, then old pcb
> * %r12 - new proc
> + * %edi - cpuid
> */
>
> - movq %r13,%rdi
> - call pmap_deactivate
> -
> movq P_ADDR(%r13),%r13
>
> + /* clear the old pmap's bit for the cpu */
> + movq PCB_PMAP(%r13),%rcx
> + lock
> + btcl %edi,PM_CPUS(%rcx)
> +
'btr' looks a bit more explicit to me. but that's just nitpicking.
> /* Save stack pointers. */
> movq %rsp,PCB_RSP(%r13)
> movq %rbp,PCB_RBP(%r13)
> @@ -781,30 +787,29 @@ switch_exited:
> movq PCB_RSP(%r13),%rsp
> movq PCB_RBP(%r13),%rbp
>
> -#if 0
> + movq CPUVAR(TSS),%rcx
> + movq PCB_KSTACK(%r13),%rdx
> + movq %rdx,TSS_RSP0(%rcx)
> +
> + movq PCB_CR3(%r13),%rax
> + movq %rax,%cr3
> +
> /* Don't bother with the rest if switching to a system process. */
> testl $P_SYSTEM,P_FLAG(%r12)
> jnz switch_restored
> -#endif
>
> - /* Load TSS info. */
> -#ifdef MULTIPROCESSOR
> - movq CPUVAR(GDT),%rax
> -#else
> - movq _C_LABEL(gdtstore)(%rip),%rax
> + /* set the new pmap's bit for the cpu */
> + movl CPUVAR(CPUID),%edi
> + movq PCB_PMAP(%r13),%rcx
> + movl PM_CPUS(%rcx),%eax
> + movq %rax,%r14
the previous line looks like a dead store to me.
> + lock
> + btsl %edi,PM_CPUS(%rcx)
> +#ifdef DIAGNOSTIC
> + jc _C_LABEL(switch_pmcpu_set)
that's a nice 'free' check, indeed.
> #endif
> - movl P_MD_TSS_SEL(%r12),%edx
> -
> - /* Switch TSS. Reset "task busy" flag before */
> - andl $~0x0200,4(%rax,%rdx, 1)
> - ltr %dx
>
> - movq %r12,%rdi
> - call _C_LABEL(pmap_activate)
> -
> -#if 0
> switch_restored:
> -#endif
> /* Restore cr0 (including FPU state). */
> movl PCB_CR0(%r13),%ecx
> #ifdef MULTIPROCESSOR
> Index: amd64/machdep.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
> retrieving revision 1.126
> diff -u -p -r1.126 machdep.c
> --- amd64/machdep.c 26 Oct 2010 05:49:10 -0000 1.126
> +++ amd64/machdep.c 31 Oct 2010 23:04:13 -0000
> @@ -1489,9 +1482,14 @@ init_x86_64(paddr_t first_avail)
> set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE_SEL), 0,
> atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 0, 1);
>
> + set_sys_segment(GDT_ADDR_SYS(gdtstore, GPROC0_SEL),
> + cpu_info_primary.ci_tss, sizeof (struct x86_64_tss)-1,
> + SDT_SYS386TSS, SEL_KPL, 0);
> +
> /* exceptions */
> for (x = 0; x < 32; x++) {
> ist = (x == 8) ? 1 : 0;
> + // ist = (x == 8) ? 1 : (x == 2) ? 2 : 0;
why do you need this comment? or is it a reminder to have
a separate stack for the NMI in the future?
> setgate(&idt[x], IDTVEC(exceptions)[x], ist, SDT_SYS386IGT,
> (x == 3 || x == 4) ? SEL_UPL : SEL_KPL,
> GSEL(GCODE_SEL, SEL_KPL));