Module Name: src Committed By: chs Date: Wed Jul 7 01:14:53 UTC 2010
Modified Files: src/sys/arch/amd64/amd64: cpufunc.S gdt.c genassym.cf locore.S machdep.c netbsd32_machdep.c vector.S src/sys/arch/amd64/include: frameasm.h gdt.h pcb.h segments.h src/sys/arch/x86/include: cpufunc.h sysarch.h src/sys/arch/x86/x86: pmap.c Log Message: add the guts of TLS support on amd64. based on joerg's patch, reworked by me to support 32-bit processes as well. we now keep %fs and %gs loaded with the user values while in the kernel, which means we don't need to reload them when returning to user mode. To generate a diff of this commit: cvs rdiff -u -r1.16 -r1.17 src/sys/arch/amd64/amd64/cpufunc.S cvs rdiff -u -r1.23 -r1.24 src/sys/arch/amd64/amd64/gdt.c cvs rdiff -u -r1.44 -r1.45 src/sys/arch/amd64/amd64/genassym.cf cvs rdiff -u -r1.57 -r1.58 src/sys/arch/amd64/amd64/locore.S cvs rdiff -u -r1.145 -r1.146 src/sys/arch/amd64/amd64/machdep.c cvs rdiff -u -r1.63 -r1.64 src/sys/arch/amd64/amd64/netbsd32_machdep.c cvs rdiff -u -r1.33 -r1.34 src/sys/arch/amd64/amd64/vector.S cvs rdiff -u -r1.13 -r1.14 src/sys/arch/amd64/include/frameasm.h cvs rdiff -u -r1.6 -r1.7 src/sys/arch/amd64/include/gdt.h cvs rdiff -u -r1.16 -r1.17 src/sys/arch/amd64/include/pcb.h cvs rdiff -u -r1.19 -r1.20 src/sys/arch/amd64/include/segments.h cvs rdiff -u -r1.11 -r1.12 src/sys/arch/x86/include/cpufunc.h cvs rdiff -u -r1.8 -r1.9 src/sys/arch/x86/include/sysarch.h cvs rdiff -u -r1.110 -r1.111 src/sys/arch/x86/x86/pmap.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/amd64/amd64/cpufunc.S diff -u src/sys/arch/amd64/amd64/cpufunc.S:1.16 src/sys/arch/amd64/amd64/cpufunc.S:1.17 --- src/sys/arch/amd64/amd64/cpufunc.S:1.16 Thu Oct 1 09:13:54 2009 +++ src/sys/arch/amd64/amd64/cpufunc.S Wed Jul 7 01:14:52 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: cpufunc.S,v 1.16 2009/10/01 09:13:54 skrll Exp $ */ +/* $NetBSD: cpufunc.S,v 1.17 2010/07/07 01:14:52 chs Exp $ */ /*- * Copyright (c) 1998, 2007, 2008 The NetBSD Foundation, Inc. @@ -34,6 +34,7 @@ */ #include <machine/asm.h> +#include <machine/frameasm.h> #include <machine/specialreg.h> #include <machine/segments.h> @@ -513,3 +514,15 @@ rep outsl ret + +ENTRY(setfs) + movw %di, %fs + ret + +ENTRY(setusergs) + CLI(ax) + swapgs + movw %di, %gs + swapgs + STI(ax) + ret Index: src/sys/arch/amd64/amd64/gdt.c diff -u src/sys/arch/amd64/amd64/gdt.c:1.23 src/sys/arch/amd64/amd64/gdt.c:1.24 --- src/sys/arch/amd64/amd64/gdt.c:1.23 Sat Nov 21 03:11:01 2009 +++ src/sys/arch/amd64/amd64/gdt.c Wed Jul 7 01:14:52 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: gdt.c,v 1.23 2009/11/21 03:11:01 rmind Exp $ */ +/* $NetBSD: gdt.c,v 1.24 2010/07/07 01:14:52 chs Exp $ */ /*- * Copyright (c) 1996, 1997, 2009 The NetBSD Foundation, Inc. @@ -37,10 +37,11 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.23 2009/11/21 03:11:01 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.24 2010/07/07 01:14:52 chs Exp $"); #include "opt_multiprocessor.h" #include "opt_xen.h" +#include "opt_user_ldt.h" #include <sys/param.h> #include <sys/systm.h> @@ -56,7 +57,6 @@ #include <xen/hypervisor.h> #endif - int gdt_size; /* size of GDT in bytes */ int gdt_dyncount; /* number of dyn. allocated GDT entries in use */ int gdt_dynavail; @@ -69,48 +69,45 @@ void gdt_put_slot(int); void -set_mem_gdt(struct mem_segment_descriptor *sd, void *base, size_t limit, - int type, int dpl, int gran, int def32, int is64) +update_descriptor(void *tp, void *ep) { -#if 0 - CPU_INFO_ITERATOR cii; - struct cpu_info *ci; - int off; -#endif + uint64_t *table, *entry; - set_mem_segment(sd, base, limit, type, dpl, gran, def32, is64); -#if 0 - off = (char *)sd - gdtstore; - for (CPU_INFO_FOREACH(cii, ci)) { - if (ci->ci_gdt != NULL) - *(struct mem_segment_descriptor *)(ci->ci_gdt + off) = - *sd; - } + table = tp; + entry = ep; + +#ifndef XEN + *table = *entry; +#else + paddr_t pa; + + if (!pmap_extract_ma(pmap_kernel(), (vaddr_t)table, &pa) || + HYPERVISOR_update_descriptor(pa, *entry)) + panic("HYPERVISOR_update_descriptor failed\n"); #endif } void -set_sys_gdt(struct sys_segment_descriptor *sd, void *base, size_t limit, +set_sys_gdt(int slot, void *base, size_t limit, int type, int dpl, int gran) { -#if 0 + union { + struct sys_segment_descriptor sd; + uint64_t bits[2]; + } d; CPU_INFO_ITERATOR cii; struct cpu_info *ci; - int off; -#endif + int idx; - set_sys_segment(sd, base, limit, type, dpl, gran); -#if 0 - off = (char *)sd - gdtstore; + set_sys_segment(&d.sd, base, limit, type, dpl, gran); + idx = IDXSEL(GDYNSEL(slot, SEL_KPL)); for (CPU_INFO_FOREACH(cii, ci)) { - if (ci->ci_gdt != NULL) - *(struct sys_segment_descriptor *)(ci->ci_gdt + off) = - *sd; + KASSERT(ci->ci_gdt != NULL); + update_descriptor(&ci->ci_gdt[idx + 0], &d.bits[0]); + update_descriptor(&ci->ci_gdt[idx + 1], &d.bits[1]); } -#endif } - /* * Initialize the GDT. */ @@ -157,16 +154,25 @@ void gdt_alloc_cpu(struct cpu_info *ci) { -#if 0 - ci->ci_gdt = (void *)uvm_km_valloc(kernel_map, MAXGDTSIZ); - uvm_map_pageable(kernel_map, (vaddr_t)ci->ci_gdt, - (vaddr_t)ci->ci_gdt + MINGDTSIZ, false, false); - memset(ci->ci_gdt, 0, MINGDTSIZ); - memcpy(ci->ci_gdt, gdtstore, - DYNSEL_START + gdt_dyncount * sizeof(struct sys_segment_descriptor)); -#else - ci->ci_gdt = (void *)gdtstore; -#endif + int max_len = MAXGDTSIZ; + int min_len = MINGDTSIZ; + struct vm_page *pg; + vaddr_t va; + + ci->ci_gdt = (union descriptor *)uvm_km_alloc(kernel_map, max_len, + 0, UVM_KMF_VAONLY); + for (va = (vaddr_t)ci->ci_gdt; va < (vaddr_t)ci->ci_gdt + min_len; + va += PAGE_SIZE) { + while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) + == NULL) { + uvm_wait("gdt_alloc_cpu"); + } + pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), + VM_PROT_READ | VM_PROT_WRITE, 0); + } + pmap_update(pmap_kernel()); + memset(ci->ci_gdt, 0, min_len); + memcpy(ci->ci_gdt, gdtstore, gdt_size); } @@ -182,14 +188,12 @@ #ifndef XEN setregion(®ion, ci->ci_gdt, (uint16_t)(MAXGDTSIZ - 1)); #else - /* Enter only allocated frames */ setregion(®ion, ci->ci_gdt, (uint16_t)(gdt_size - 1)); #endif lgdt(®ion); } #ifdef MULTIPROCESSOR - void gdt_reload_cpu(struct cpu_info *ci) { @@ -198,7 +202,6 @@ #ifndef XEN setregion(®ion, ci->ci_gdt, MAXGDTSIZ - 1); #else - /* Enter only allocated frames */ setregion(®ion, ci->ci_gdt, gdt_size - 1); #endif lgdt(®ion); @@ -303,28 +306,9 @@ mutex_enter(&cpu_lock); slot = gdt_get_slot(); -#if 0 - printf("tss_alloc: slot %d addr %p\n", slot, &gdt[slot]); -#endif - set_sys_gdt(&gdt[slot], tss, sizeof (struct x86_64_tss)-1, + set_sys_gdt(slot, tss, sizeof (struct x86_64_tss) - 1, SDT_SYS386TSS, SEL_KPL, 0); mutex_exit(&cpu_lock); -#if 0 - printf("lolimit %lx lobase %lx type %lx dpl %lx p %lx hilimit %lx\n" - "xx1 %lx gran %lx hibase %lx xx2 %lx zero %lx xx3 %lx pad %lx\n", - (unsigned long)gdt[slot].sd_lolimit, - (unsigned long)gdt[slot].sd_lobase, - (unsigned long)gdt[slot].sd_type, - (unsigned long)gdt[slot].sd_dpl, - (unsigned long)gdt[slot].sd_p, - (unsigned long)gdt[slot].sd_hilimit, - (unsigned long)gdt[slot].sd_xx1, - (unsigned long)gdt[slot].sd_gran, - (unsigned long)gdt[slot].sd_hibase, - (unsigned long)gdt[slot].sd_xx2, - (unsigned long)gdt[slot].sd_zero, - (unsigned long)gdt[slot].sd_xx3); -#endif return GDYNSEL(slot, SEL_KPL); #else /* XEN */ /* TSS, what for? */ @@ -344,6 +328,7 @@ #endif } +#ifdef USER_LDT void ldt_alloc(struct pmap *pmap, char *ldt, size_t len) { @@ -355,7 +340,7 @@ gdt = (struct sys_segment_descriptor *)&gdtstore[DYNSEL_START]; slot = gdt_get_slot(); - set_sys_gdt(&gdt[slot], ldt, len - 1, SDT_SYSLDT, SEL_KPL, 0); + set_sys_gdt(slot, ldt, len - 1, SDT_SYSLDT, SEL_KPL, 0); pmap->pm_ldt_sel = GSEL(slot, SEL_KPL); } @@ -370,6 +355,7 @@ gdt_put_slot(slot); } +#endif #ifdef XEN void @@ -380,22 +366,22 @@ vaddr_t va; /* - * XXX: Xen even checks descriptors AFTER limit. - * Zero out last frame after limit if needed. - */ + * XXX: Xen even checks descriptors AFTER limit. + * Zero out last frame after limit if needed. + */ va = desc->rd_base + desc->rd_limit + 1; __PRINTK(("memset 0x%lx -> 0x%lx\n", va, roundup(va, PAGE_SIZE))); memset((void *) va, 0, roundup(va, PAGE_SIZE) - va); - for (i = 0; i < roundup(desc->rd_limit,PAGE_SIZE) >> PAGE_SHIFT; i++) { + for (i = 0; i < roundup(desc->rd_limit, PAGE_SIZE) >> PAGE_SHIFT; i++) { + /* - * The lgdt instr uses virtual addresses, do some translation fo -r Xen. - * Mark pages R/O too, else Xen will refuse to use them - */ + * The lgdt instruction uses virtual addresses, + * do some translation for Xen. + * Mark pages R/O too, else Xen will refuse to use them. + */ frames[i] = ((paddr_t) xpmap_ptetomach( - (pt_entry_t *) (desc->rd_base + (i << PAGE_SHIFT -)))) + (pt_entry_t *) (desc->rd_base + (i << PAGE_SHIFT)))) >> PAGE_SHIFT; __PRINTK(("frames[%d] = 0x%lx (pa 0x%lx)\n", i, frames[i], xpmap_mtop(frames[i] << PAGE_SHIFT))); Index: src/sys/arch/amd64/amd64/genassym.cf diff -u src/sys/arch/amd64/amd64/genassym.cf:1.44 src/sys/arch/amd64/amd64/genassym.cf:1.45 --- src/sys/arch/amd64/amd64/genassym.cf:1.44 Wed Apr 28 19:17:03 2010 +++ src/sys/arch/amd64/amd64/genassym.cf Wed Jul 7 01:14:52 2010 @@ -1,4 +1,4 @@ -# $NetBSD: genassym.cf,v 1.44 2010/04/28 19:17:03 dyoung Exp $ +# $NetBSD: genassym.cf,v 1.45 2010/07/07 01:14:52 chs Exp $ # # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -188,6 +188,10 @@ define PCB_CR0 offsetof(struct pcb, pcb_cr0) define PCB_ONFAULT offsetof(struct pcb, pcb_onfault) define PCB_FPCPU offsetof(struct pcb, pcb_fpcpu) +define PCB_FLAGS offsetof(struct pcb, pcb_flags) +define PCB_COMPAT32 PCB_COMPAT32 +define PCB_FS offsetof(struct pcb, pcb_fs) +define PCB_GS offsetof(struct pcb, pcb_gs) define TF_RDI offsetof(struct trapframe, tf_rdi) define TF_RSI offsetof(struct trapframe, tf_rsi) @@ -298,7 +302,7 @@ define LINUX32_RT_SF_UC offsetof(struct linux32_rt_sigframe, sf_uc) define LINUX32_SYS_rt_sigreturn LINUX32_SYS_rt_sigreturn define LINUX32_SYS_sigreturn LINUX32_SYS_sigreturn -define LINUX32_SYS_exit LINUX32_SYS_netbsd32_exit +define LINUX32_SYS_exit LINUX32_SYS_exit endif ifdef XEN Index: src/sys/arch/amd64/amd64/locore.S diff -u src/sys/arch/amd64/amd64/locore.S:1.57 src/sys/arch/amd64/amd64/locore.S:1.58 --- src/sys/arch/amd64/amd64/locore.S:1.57 Sun Apr 18 23:47:50 2010 +++ src/sys/arch/amd64/amd64/locore.S Wed Jul 7 01:14:52 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.57 2010/04/18 23:47:50 jym Exp $ */ +/* $NetBSD: locore.S,v 1.58 2010/07/07 01:14:52 chs Exp $ */ /* * Copyright-o-rama! @@ -887,7 +887,7 @@ ret /* - * struct lwp *cpu_switchto(struct lwp *oldlwp, struct newlwp, + * struct lwp *cpu_switchto(struct lwp *oldlwp, struct lwp *newlwp, * bool returning) * * 1. if (oldlwp != NULL), save its context. @@ -976,10 +976,58 @@ /* Reloading CR0 is very expensive - avoid if possible. */ 3: cmpq %rdx,%rcx - je 4f + je 6f movq %rcx,%cr0 -#endif +6: testl $PCB_COMPAT32, PCB_FLAGS(%r14) + jne 32f + + /* Zero out %fs/%gs registers and GDT descriptors. */ + xorq %rax, %rax + movw %ax, %fs + CLI(cx) + swapgs + movw %ax, %gs + swapgs + STI(cx) + + movq CPUVAR(GDT),%rcx + movq %rax, (GUFS_SEL*8)(%rcx) + movq %rax, (GUGS_SEL*8)(%rcx) + + /* Reload 64-bit %fs/%gs MSRs. */ + movl $MSR_FSBASE, %ecx + movl PCB_FS(%r14), %eax + movl 4+PCB_FS(%r14), %edx + wrmsr + movl $MSR_KERNELGSBASE, %ecx + movl PCB_GS(%r14), %eax + movl 4+PCB_GS(%r14), %edx + wrmsr + jmp 4f + +32: + /* Reload %fs/%gs GDT descriptors. */ + movq CPUVAR(GDT),%rcx + movq PCB_FS(%r14), %rax + movq %rax, (GUFS_SEL*8)(%rcx) + movq PCB_GS(%r14), %rax + movq %rax, (GUGS_SEL*8)(%rcx) + + /* Reload %fs and %gs */ + movq L_MD_REGS(%r12), %rbx + movw TF_FS(%rbx), %fs + CLI(ax) + swapgs + movw TF_GS(%rbx), %gs + swapgs + STI(ax) + +#else + movq %r12,%rdi + callq _C_LABEL(x86_64_tls_switch) +#endif + /* Return to the new LWP, returning 'oldlwp' in %rax. */ 4: movq %r13,%rax popq %r15 @@ -1078,9 +1126,6 @@ movl L_MD_ASTPENDING(%r14), %eax orl CPUVAR(WANT_PMAPLOAD), %eax jnz 9f -#ifdef XEN - STI(si) /* wtf is this? */ -#endif testl $MDP_IRET, L_MD_FLAGS(%r14) jne iret_return; syscall_return: @@ -1088,13 +1133,9 @@ cmpl $IPL_NONE,CPUVAR(ILEVEL) jne 3f #endif -#ifndef XEN - swapgs -#endif movw TF_ES(%rsp),%es - movw TF_FS(%rsp),%fs #ifndef XEN - movw TF_GS(%rsp),%gs + swapgs #endif INTR_RESTORE_GPRS movw $(LSEL(LUDATA_SEL, SEL_UPL)),%r11 Index: src/sys/arch/amd64/amd64/machdep.c diff -u src/sys/arch/amd64/amd64/machdep.c:1.145 src/sys/arch/amd64/amd64/machdep.c:1.146 --- src/sys/arch/amd64/amd64/machdep.c:1.145 Tue May 4 23:27:13 2010 +++ src/sys/arch/amd64/amd64/machdep.c Wed Jul 7 01:14:52 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: machdep.c,v 1.145 2010/05/04 23:27:13 jym Exp $ */ +/* $NetBSD: machdep.c,v 1.146 2010/07/07 01:14:52 chs Exp $ */ /*- * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008 @@ -107,7 +107,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.145 2010/05/04 23:27:13 jym Exp $"); +__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.146 2010/07/07 01:14:52 chs Exp $"); /* #define XENDEBUG_LOW */ @@ -999,8 +999,6 @@ l->l_md.md_flags &= ~MDP_USEDFPU; pcb->pcb_flags = 0; - pcb->pcb_fs = 0; - pcb->pcb_gs = 0; pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__; pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__; pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__; @@ -1010,8 +1008,7 @@ tf = l->l_md.md_regs; tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL); tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL); - tf->tf_fs = LSEL(LUDATA_SEL, SEL_UPL); - tf->tf_gs = LSEL(LUDATA_SEL, SEL_UPL); + cpu_fsgs_zero(l); tf->tf_rdi = 0; tf->tf_rsi = 0; tf->tf_rbp = 0; @@ -1061,7 +1058,7 @@ } void -unsetgate( struct gate_descriptor *gd) +unsetgate(struct gate_descriptor *gd) { kpreempt_disable(); @@ -1414,6 +1411,12 @@ set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA32_SEL), 0, x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0); + set_mem_segment(GDT_ADDR_MEM(gdtstore, GUFS_SEL), 0, + x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0); + + set_mem_segment(GDT_ADDR_MEM(gdtstore, GUGS_SEL), 0, + x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0); + /* * 32 bit LDT entries. */ @@ -1800,3 +1803,105 @@ { return memseg_baseaddr(l, seg, ldtp, len, NULL); } + +/* + * Zero out an LWP's TLS context (%fs and %gs and associated stuff). + * Used when exec'ing a new program. + */ + +void +cpu_fsgs_zero(struct lwp *l) +{ + struct trapframe *tf; + struct pcb *pcb; + uint64_t zero = 0; + + pcb = lwp_getpcb(l); + if (l == curlwp) { + tf = l->l_md.md_regs; + kpreempt_disable(); + tf->tf_fs = 0; + tf->tf_gs = 0; + if (l->l_proc->p_flag & PK_32) { + setfs(0); +#ifndef XEN + setusergs(0); +#else + HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0); +#endif + } else { +#ifndef XEN + wrmsr(MSR_FSBASE, 0); + wrmsr(MSR_KERNELGSBASE, 0); +#else + HYPERVISOR_set_segment_base(SEGBASE_FS, 0); + HYPERVISOR_set_segment_base(SEGBASE_GS_USER, 0); +#endif + } + pcb->pcb_fs = 0; + pcb->pcb_gs = 0; + update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &zero); + update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &zero); + kpreempt_enable(); + } else { + pcb->pcb_fs = 0; + pcb->pcb_gs = 0; + } + +} + +/* + * Load an LWP's TLS context, possibly changing the %fs and %gs selectors. + * Used only for 32-bit processes. + */ + +void +cpu_fsgs_reload(struct lwp *l, int fssel, int gssel) +{ + struct trapframe *tf; + struct pcb *pcb; + + KASSERT(l->l_proc->p_flag & PK_32); + tf = l->l_md.md_regs; + if (l == curlwp) { + pcb = lwp_getpcb(l); + kpreempt_disable(); + update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs); + update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs); + setfs(fssel); +#ifndef XEN + setusergs(gssel); +#else + HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gssel); +#endif + tf->tf_fs = fssel; + tf->tf_gs = gssel; + kpreempt_enable(); + } else { + tf->tf_fs = fssel; + tf->tf_gs = gssel; + } +} + +#ifdef XEN +void x86_64_tls_switch(struct lwp *); + +void +x86_64_tls_switch(struct lwp *l) +{ + struct pcb *pcb = lwp_getpcb(l); + struct trapframe *tf = l->l_md.md_regs; + + if (pcb->pcb_flags & PCB_COMPAT32) { + update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs); + update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs); + setfs(tf->tf_fs); + HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, tf->tf_gs); + } else { + setfs(0); + HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0); + HYPERVISOR_set_segment_base(SEGBASE_FS, pcb->pcb_fs); + HYPERVISOR_set_segment_base(SEGBASE_GS_USER, pcb->pcb_gs); + } +} +#endif Index: src/sys/arch/amd64/amd64/netbsd32_machdep.c diff -u src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.63 src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.64 --- src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.63 Tue May 11 02:34:39 2010 +++ src/sys/arch/amd64/amd64/netbsd32_machdep.c Wed Jul 7 01:14:52 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: netbsd32_machdep.c,v 1.63 2010/05/11 02:34:39 joerg Exp $ */ +/* $NetBSD: netbsd32_machdep.c,v 1.64 2010/07/07 01:14:52 chs Exp $ */ /* * Copyright (c) 2001 Wasabi Systems, Inc. @@ -36,7 +36,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.63 2010/05/11 02:34:39 joerg Exp $"); +__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.64 2010/07/07 01:14:52 chs Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" @@ -95,9 +95,8 @@ #define x86_64_set_mtrr32(x, y, z) ENOSYS #endif -static int check_sigcontext32(const struct netbsd32_sigcontext *, - struct trapframe *); -static int check_mcontext32(const mcontext32_t *, struct trapframe *); +static int check_sigcontext32(struct lwp *, const struct netbsd32_sigcontext *); +static int check_mcontext32(struct lwp *, const mcontext32_t *); #ifdef EXEC_AOUT /* @@ -128,11 +127,13 @@ void netbsd32_setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack) { - struct pcb *pcb = lwp_getpcb(l); + struct pcb *pcb; struct trapframe *tf; struct proc *p = l->l_proc; void **retaddr; + pcb = lwp_getpcb(l); + /* If we were using the FPU, forget about it. */ if (pcb->pcb_fpcpu != NULL) { fpusave_lwp(l, false); @@ -145,19 +146,18 @@ netbsd32_adjust_limits(p); l->l_md.md_flags &= ~MDP_USEDFPU; - pcb->pcb_flags = 0; + pcb->pcb_flags = PCB_COMPAT32; pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__; pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__; pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__; - p->p_flag |= PK_32; tf = l->l_md.md_regs; tf->tf_ds = LSEL(LUDATA32_SEL, SEL_UPL); tf->tf_es = LSEL(LUDATA32_SEL, SEL_UPL); - tf->tf_fs = LSEL(LUDATA32_SEL, SEL_UPL); - tf->tf_gs = LSEL(LUDATA32_SEL, SEL_UPL); + cpu_fsgs_zero(l); + cpu_fsgs_reload(l, tf->tf_ds, tf->tf_ds); tf->tf_rdi = 0; tf->tf_rsi = 0; tf->tf_rbp = 0; @@ -402,20 +402,18 @@ if (copyin(scp, &context, sizeof(*scp)) != 0) return (EFAULT); - /* Restore register context. */ - tf = l->l_md.md_regs; /* * Check for security violations. */ - error = check_sigcontext32(&context, tf); + error = check_sigcontext32(l, &context); if (error != 0) return error; + /* Restore register context. */ + tf = l->l_md.md_regs; tf->tf_ds = context.sc_ds; tf->tf_es = context.sc_es; - tf->tf_fs = context.sc_fs; - tf->tf_gs = context.sc_gs; - + cpu_fsgs_reload(l, context.sc_fs, context.sc_gs); tf->tf_rflags = context.sc_eflags; tf->tf_rdi = context.sc_edi; tf->tf_rsi = context.sc_esi; @@ -838,11 +836,11 @@ /* * Check for security violations. */ - error = check_mcontext32(mcp, tf); + error = check_mcontext32(l, mcp); if (error != 0) return error; - tf->tf_gs = gr[_REG32_GS]; - tf->tf_fs = gr[_REG32_FS]; + + cpu_fsgs_reload(l, gr[_REG32_FS], gr[_REG32_GS]); tf->tf_es = gr[_REG32_ES]; tf->tf_ds = gr[_REG32_DS]; /* Only change the user-alterable part of eflags */ @@ -955,13 +953,21 @@ * These functions perform the needed checks. */ static int -check_sigcontext32(const struct netbsd32_sigcontext *scp, struct trapframe *tf) +check_sigcontext32(struct lwp *l, const struct netbsd32_sigcontext *scp) { + struct trapframe *tf; + struct pcb *pcb; + + tf = l->l_md.md_regs; + pcb = lwp_getpcb(curlwp); + if (((scp->sc_eflags ^ tf->tf_rflags) & PSL_USERSTATIC) != 0) return EINVAL; - if (scp->sc_fs != 0 && !VALID_USER_DSEL32(scp->sc_fs)) + if (scp->sc_fs != 0 && !VALID_USER_DSEL32(scp->sc_fs) && + !(scp->sc_fs == GSEL(GUFS_SEL, SEL_UPL) && pcb->pcb_fs != 0)) return EINVAL; - if (scp->sc_gs != 0 && !VALID_USER_DSEL32(scp->sc_gs)) + if (scp->sc_gs != 0 && !VALID_USER_DSEL32(scp->sc_gs) && + !(scp->sc_gs == GSEL(GUGS_SEL, SEL_UPL) && pcb->pcb_gs != 0)) return EINVAL; if (scp->sc_es != 0 && !VALID_USER_DSEL32(scp->sc_es)) return EINVAL; @@ -973,17 +979,23 @@ } static int -check_mcontext32(const mcontext32_t *mcp, struct trapframe *tf) +check_mcontext32(struct lwp *l, const mcontext32_t *mcp) { const __greg32_t *gr; + struct trapframe *tf; + struct pcb *pcb; gr = mcp->__gregs; + tf = l->l_md.md_regs; + pcb = lwp_getpcb(l); if (((gr[_REG32_EFL] ^ tf->tf_rflags) & PSL_USERSTATIC) != 0) return EINVAL; - if (gr[_REG32_FS] != 0 && !VALID_USER_DSEL32(gr[_REG32_FS])) + if (gr[_REG32_FS] != 0 && !VALID_USER_DSEL32(gr[_REG32_FS]) && + !(gr[_REG32_FS] == GSEL(GUFS_SEL, SEL_UPL) && pcb->pcb_fs != 0)) return EINVAL; - if (gr[_REG32_GS] != 0 && !VALID_USER_DSEL32(gr[_REG32_GS])) + if (gr[_REG32_GS] != 0 && !VALID_USER_DSEL32(gr[_REG32_GS]) && + !(gr[_REG32_GS] == GSEL(GUGS_SEL, SEL_UPL) && pcb->pcb_gs != 0)) return EINVAL; if (gr[_REG32_ES] != 0 && !VALID_USER_DSEL32(gr[_REG32_ES])) return EINVAL; Index: src/sys/arch/amd64/amd64/vector.S diff -u src/sys/arch/amd64/amd64/vector.S:1.33 src/sys/arch/amd64/amd64/vector.S:1.34 --- src/sys/arch/amd64/amd64/vector.S:1.33 Tue Feb 23 06:27:40 2010 +++ src/sys/arch/amd64/amd64/vector.S Wed Jul 7 01:14:52 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: vector.S,v 1.33 2010/02/23 06:27:40 cegger Exp $ */ +/* $NetBSD: vector.S,v 1.34 2010/07/07 01:14:52 chs Exp $ */ /*- * Copyright (c) 1998, 2007, 2008 The NetBSD Foundation, Inc. @@ -154,11 +154,9 @@ movq %rsp,%rdi incl CPUVAR(NTRAP) call _C_LABEL(trap) - swapgs - movw TF_GS(%rsp),%gs - movw TF_FS(%rsp),%fs movw TF_ES(%rsp),%es movw TF_DS(%rsp),%ds + swapgs jmp 2f 1: movq %rsp,%rdi Index: src/sys/arch/amd64/include/frameasm.h diff -u src/sys/arch/amd64/include/frameasm.h:1.13 src/sys/arch/amd64/include/frameasm.h:1.14 --- src/sys/arch/amd64/include/frameasm.h:1.13 Fri Nov 21 10:05:41 2008 +++ src/sys/arch/amd64/include/frameasm.h Wed Jul 7 01:14:52 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: frameasm.h,v 1.13 2008/11/21 10:05:41 ad Exp $ */ +/* $NetBSD: frameasm.h,v 1.14 2010/07/07 01:14:52 chs Exp $ */ #ifndef _AMD64_MACHINE_FRAMEASM_H #define _AMD64_MACHINE_FRAMEASM_H @@ -66,7 +66,7 @@ movw %fs,TF_FS(%rsp) ; \ movw %es,TF_ES(%rsp) ; \ movw %ds,TF_DS(%rsp) ; \ -98: INTR_SAVE_GPRS +98: INTR_SAVE_GPRS #ifndef XEN #define INTRFASTEXIT \ @@ -74,11 +74,9 @@ testq $SEL_UPL,TF_CS(%rsp) /* Interrupted %cs */ ; \ je 99f ; \ cli ; \ - swapgs ; \ - movw TF_GS(%rsp),%gs ; \ - movw TF_FS(%rsp),%fs ; \ movw TF_ES(%rsp),%es ; \ movw TF_DS(%rsp),%ds ; \ + swapgs ; \ 99: addq $TF_REGSIZE+16,%rsp /* + T_xxx and error code */ ; \ iretq @@ -101,7 +99,6 @@ INTR_RESTORE_GPRS ; \ testq $SEL_UPL,TF_CS(%rsp) ; \ je 99f ; \ - movw TF_FS(%rsp),%fs ; \ movw TF_ES(%rsp),%es ; \ movw TF_DS(%rsp),%ds ; \ 99: addq $TF_REGSIZE+16,%rsp /* + T_xxx and error code */ ; \ @@ -122,13 +119,13 @@ #endif /* !XEN */ #define DO_DEFERRED_SWITCH \ - cmpq $0, CPUVAR(WANT_PMAPLOAD) ; \ + cmpl $0, CPUVAR(WANT_PMAPLOAD) ; \ jz 1f ; \ call _C_LABEL(do_pmap_load) ; \ 1: #define CHECK_DEFERRED_SWITCH \ - cmpq $0, CPUVAR(WANT_PMAPLOAD) + cmpl $0, CPUVAR(WANT_PMAPLOAD) #define CHECK_ASTPENDING(reg) cmpl $0, L_MD_ASTPENDING(reg) #define CLEAR_ASTPENDING(reg) movl $0, L_MD_ASTPENDING(reg) Index: src/sys/arch/amd64/include/gdt.h diff -u src/sys/arch/amd64/include/gdt.h:1.6 src/sys/arch/amd64/include/gdt.h:1.7 --- src/sys/arch/amd64/include/gdt.h:1.6 Sat Mar 14 14:45:54 2009 +++ src/sys/arch/amd64/include/gdt.h Wed Jul 7 01:14:52 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: gdt.h,v 1.6 2009/03/14 14:45:54 dsl Exp $ */ +/* $NetBSD: gdt.h,v 1.7 2010/07/07 01:14:52 chs Exp $ */ /*- * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. @@ -45,10 +45,7 @@ void ldt_alloc(struct pmap *, char *, size_t); void ldt_free(struct pmap *); -void set_mem_gdt(struct mem_segment_descriptor *, void *, size_t, - int, int, int, int, int); -void set_sys_gdt(struct sys_segment_descriptor *, void *, size_t, int, int, - int); +void set_sys_gdt(int, void *, size_t, int, int, int); #endif #define MINGDTSIZ PAGE_SIZE Index: src/sys/arch/amd64/include/pcb.h diff -u src/sys/arch/amd64/include/pcb.h:1.16 src/sys/arch/amd64/include/pcb.h:1.17 --- src/sys/arch/amd64/include/pcb.h:1.16 Tue Oct 27 03:05:28 2009 +++ src/sys/arch/amd64/include/pcb.h Wed Jul 7 01:14:52 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: pcb.h,v 1.16 2009/10/27 03:05:28 rmind Exp $ */ +/* $NetBSD: pcb.h,v 1.17 2010/07/07 01:14:52 chs Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -88,8 +88,7 @@ struct pcb { int pcb_flags; #define PCB_USER_LDT 0x01 /* has user-set LDT */ -#define PCB_GS64 0x02 -#define PCB_FS64 0x04 +#define PCB_COMPAT32 0x02 u_int pcb_cr0; /* saved image of CR0 */ uint64_t pcb_rsp0; uint64_t pcb_cr2; /* page fault address (CR2) */ @@ -101,8 +100,8 @@ struct savefpu pcb_savefpu __aligned(16); /* floating point state */ void *pcb_onfault; /* copyin/out fault recovery */ struct cpu_info *pcb_fpcpu; /* cpu holding our fp state. */ - uint64_t pcb_gs; uint64_t pcb_fs; + uint64_t pcb_gs; int pcb_iopl; }; Index: src/sys/arch/amd64/include/segments.h diff -u src/sys/arch/amd64/include/segments.h:1.19 src/sys/arch/amd64/include/segments.h:1.20 --- src/sys/arch/amd64/include/segments.h:1.19 Sun Oct 26 00:08:15 2008 +++ src/sys/arch/amd64/include/segments.h Wed Jul 7 01:14:52 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: segments.h,v 1.19 2008/10/26 00:08:15 mrg Exp $ */ +/* $NetBSD: segments.h,v 1.20 2010/07/07 01:14:52 chs Exp $ */ /*- * Copyright (c) 1990 The Regents of the University of California. @@ -209,6 +209,15 @@ } __packed; /* + * Generic descriptor + */ +union descriptor { + struct mem_segment_descriptor sd; + uint32_t raw[2]; + uint64_t raw64; +} __packed; + +/* * region descriptors, used to load gdt/idt tables before segments yet exist. */ struct region_descriptor { @@ -236,6 +245,7 @@ void set_mem_segment(struct mem_segment_descriptor *, void *, size_t, int, int, int, int, int); void cpu_init_idt(void); +void update_descriptor(void *, void *); #if !defined(XEN) void idt_init(void); @@ -248,7 +258,8 @@ struct lwp; int memseg_baseaddr(struct lwp *, uint64_t, char *, int, uint64_t *); int valid_user_selector(struct lwp *, uint64_t, char *, int); - +void cpu_fsgs_zero(struct lwp *); +void cpu_fsgs_reload(struct lwp *, int, int); #endif /* _KERNEL */ @@ -348,7 +359,9 @@ #define GPNPBIOSTRAMP_SEL 13 #define GUCODE32_SEL 14 #define GUDATA32_SEL 15 -#define NGDT_MEM 16 +#define GUFS_SEL 16 /* 32-bit Per-thread %fs */ +#define GUGS_SEL 17 /* 32-bit Per-thread %gs */ +#define NGDT_MEM 18 #define GLDT_SEL 0 /* Default LDT descriptor */ #define NGDT_SYS 1 Index: src/sys/arch/x86/include/cpufunc.h diff -u src/sys/arch/x86/include/cpufunc.h:1.11 src/sys/arch/x86/include/cpufunc.h:1.12 --- src/sys/arch/x86/include/cpufunc.h:1.11 Tue Jan 27 21:59:24 2009 +++ src/sys/arch/x86/include/cpufunc.h Wed Jul 7 01:14:53 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: cpufunc.h,v 1.11 2009/01/27 21:59:24 christos Exp $ */ +/* $NetBSD: cpufunc.h,v 1.12 2010/07/07 01:14:53 chs Exp $ */ /*- * Copyright (c) 1998, 2007 The NetBSD Foundation, Inc. @@ -122,6 +122,8 @@ uint64_t rdpmc(u_int); void wrmsr(u_int, uint64_t); void wrmsr_locked(u_int, u_int, uint64_t); +void setfs(int); +void setusergs(int); #endif /* _KERNEL */ Index: src/sys/arch/x86/include/sysarch.h diff -u src/sys/arch/x86/include/sysarch.h:1.8 src/sys/arch/x86/include/sysarch.h:1.9 --- src/sys/arch/x86/include/sysarch.h:1.8 Sat Mar 21 14:41:30 2009 +++ src/sys/arch/x86/include/sysarch.h Wed Jul 7 01:14:53 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: sysarch.h,v 1.8 2009/03/21 14:41:30 ad Exp $ */ +/* $NetBSD: sysarch.h,v 1.9 2010/07/07 01:14:53 chs Exp $ */ /*- * Copyright (c) 2007 The NetBSD Foundation, Inc. @@ -174,10 +174,12 @@ int x86_iopl(struct lwp *, void *, register_t *); int x86_get_mtrr(struct lwp *, void *, register_t *); int x86_set_mtrr(struct lwp *, void *, register_t *); -int x86_get_ldt(struct lwp *l, void *, register_t *); -int x86_get_ldt1(struct lwp *l, struct x86_get_ldt_args *, union descriptor *); -int x86_set_ldt(struct lwp *l, void *, register_t *); -int x86_set_ldt1(struct lwp *l, struct x86_set_ldt_args *, union descriptor *); +int x86_get_ldt(struct lwp *, void *, register_t *); +int x86_get_ldt1(struct lwp *, struct x86_get_ldt_args *, union descriptor *); +int x86_set_ldt(struct lwp *, void *, register_t *); +int x86_set_ldt1(struct lwp *, struct x86_set_ldt_args *, union descriptor *); +int x86_set_sdbase(void *, char, lwp_t *, bool); +int x86_get_sdbase(void *, char); #else #include <sys/cdefs.h> __BEGIN_DECLS Index: src/sys/arch/x86/x86/pmap.c diff -u src/sys/arch/x86/x86/pmap.c:1.110 src/sys/arch/x86/x86/pmap.c:1.111 --- src/sys/arch/x86/x86/pmap.c:1.110 Tue Jul 6 20:50:35 2010 +++ src/sys/arch/x86/x86/pmap.c Wed Jul 7 01:14:53 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.110 2010/07/06 20:50:35 cegger Exp $ */ +/* $NetBSD: pmap.c,v 1.111 2010/07/07 01:14:53 chs Exp $ */ /* * Copyright (c) 2007 Manuel Bouyer. @@ -149,7 +149,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.110 2010/07/06 20:50:35 cegger Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.111 2010/07/07 01:14:53 chs Exp $"); #include "opt_user_ldt.h" #include "opt_lockdebug.h" @@ -2557,8 +2557,6 @@ ci = curcpu(); if (l == ci->ci_curlwp) { - struct pcb *pcb; - KASSERT(ci->ci_want_pmapload == 0); KASSERT(ci->ci_tlbstate != TLBSTATE_VALID); #ifdef KSTACK_CHECK_DR0 @@ -2581,15 +2579,7 @@ return; } - pcb = lwp_getpcb(l); ci->ci_want_pmapload = 1; - -#if defined(__x86_64__) - if (pcb->pcb_flags & PCB_GS64) - wrmsr(MSR_KERNELGSBASE, pcb->pcb_gs); - if (pcb->pcb_flags & PCB_FS64) - wrmsr(MSR_FSBASE, pcb->pcb_fs); -#endif /* defined(__x86_64__) */ } }