Module Name: src Committed By: riz Date: Tue Jun 12 20:43:59 UTC 2012
Modified Files: src/sys/arch/amd64/amd64 [netbsd-5-0]: trap.c vector.S src/sys/arch/amd64/include [netbsd-5-0]: frameasm.h Log Message: Pull up following revision(s) (requested by spz in ticket #1772): sys/arch/amd64/amd64/trap.c: revision 1.71 via patch sys/arch/amd64/amd64/vector.S: revision 1.41 via patch sys/arch/amd64/include/frameasm.h: patch Treat traps in kernel mode during the 'return to user' iret sequence as user faults. Based heavily in the i386 code with the correct opcode bytes inserted. iret path tested, arranging for segment register errors is harder. User %fs and %gs (32bit apps) are loaded much earlier and any errors will generate kernel panics - there is probably code to try to stop the invalid values being set. If we get a fault setting the user %gs, or on a iret that is returning to userspace, we must do a 'swapgs' to reload the kernel %gs_base. Also save the %ds, %es, %fs, %gs selector values in the frame so they can be restored if we finally return to user (probably after an application SIGSEGV handler has fixed the error). Without this any such fault leaves the kernel running with the wrong %gs offset and it will most likely fault again early in trap(). Repeats until the stack tramples on something important. iret change works, invalid %gs is a little harder to arrange. To generate a diff of this commit: cvs rdiff -u -r1.52.6.2 -r1.52.6.3 src/sys/arch/amd64/amd64/trap.c cvs rdiff -u -r1.28.6.1 -r1.28.6.1.2.1 src/sys/arch/amd64/amd64/vector.S cvs rdiff -u -r1.12 -r1.12.14.1 src/sys/arch/amd64/include/frameasm.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/amd64/amd64/trap.c diff -u src/sys/arch/amd64/amd64/trap.c:1.52.6.2 src/sys/arch/amd64/amd64/trap.c:1.52.6.3 --- src/sys/arch/amd64/amd64/trap.c:1.52.6.2 Fri Aug 14 21:32:18 2009 +++ src/sys/arch/amd64/amd64/trap.c Tue Jun 12 20:43:59 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.52.6.2 2009/08/14 21:32:18 snj Exp $ */ +/* $NetBSD: trap.c,v 1.52.6.3 2012/06/12 20:43:59 riz Exp $ */ /*- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. @@ -68,7 +68,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.52.6.2 2009/08/14 21:32:18 snj Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.52.6.3 2012/06/12 20:43:59 riz Exp $"); #include "opt_ddb.h" #include "opt_kgdb.h" @@ -124,6 +124,7 @@ __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.5 #endif void trap(struct trapframe *); +void trap_return_fault_return(struct trapframe *) __dead; const char *trap_type[] = { "privileged instruction fault", /* 0 T_PRIVINFLT */ @@ -178,16 +179,11 @@ trap(struct trapframe *frame) struct proc *p; int type = (int)frame->tf_trapno; struct pcb *pcb; - extern char fusuintrfailure[], kcopy_fault[], - resume_iret[]; + extern char fusuintrfailure[], kcopy_fault[]; #if defined(COMPAT_10) || defined(COMPAT_IBCS2) extern char IDTVEC(oosyscall)[]; #endif -#if 0 - extern char resume_pop_ds[], resume_pop_es[]; -#endif struct trapframe *vframe; - void *resume; void *onfault; int error; uint64_t cr2; @@ -274,50 +270,78 @@ copyfault: /* * Check for failure during return to user mode. + * This can happen loading invalid values into the segment + * registers, or during the 'iret' itself. * - * XXXfvdl check for rex prefix? - * - * We do this by looking at the instruction we faulted on. The - * specific instructions we recognize only happen when + * We do this by looking at the instruction we faulted on. + * The specific instructions we recognize only happen when * returning from a trap, syscall, or interrupt. - * - * XXX - * The heuristic used here will currently fail for the case of - * one of the 2 pop instructions faulting when returning from a - * a fast interrupt. This should not be possible. It can be - * fixed by rearranging the trap frame so that the stack format - * at this point is the same as on exit from a `slow' - * interrupt. */ - switch (*(u_char *)frame->tf_rip) { - case 0xcf: /* iret */ - vframe = (void *)((uint64_t)&frame->tf_rsp - 44); - resume = resume_iret; - break; -/* - * XXXfvdl these are illegal in long mode (not in compat mode, though) - * and we do not take back the descriptors from the signal context anyway, - * but may do so later for USER_LDT, in which case we need to intercept - * other instructions (movl %eax, %Xs). - */ -#if 0 - case 0x1f: /* popl %ds */ - vframe = (void *)((uint64_t)&frame->tf_rsp - 4); - resume = resume_pop_ds; - break; - case 0x07: /* popl %es */ - vframe = (void *)((uint64_t)&frame->tf_rsp - 0); - resume = resume_pop_es; + +kernelfault: +#ifdef XEN + /* + * XXX: there has to be an equivalent 'problem' + * but I (dsl) don't know exactly what happens! + * For now panic the kernel. + */ + goto we_re_toast; +#else + KSI_INIT_TRAP(&ksi); + ksi.ksi_signo = SIGSEGV; + ksi.ksi_code = SEGV_ACCERR; + ksi.ksi_trap = type; + + /* Get %rsp value before fault - there may be a pad word + * below the trap frame. */ + vframe = (void *)frame->tf_rsp; + switch (*(uint16_t *)frame->tf_rip) { + case 0xcf48: /* iretq */ + /* + * The 'iretq' instruction faulted, wo we have the + * 'user' registers saved after the kernel + * %rip:%cs:%fl:%rsp:%ss of the iret, and below that + * the user %rip:%cs:%fl:%rsp:%ss the 'iret' was + * processing. + * We must copy the user register back over the + * kernel fault frame to generate a normal stack + * frame (eg for sending a SIGSEGV). + */ + vframe = (void *)((char *)vframe + - offsetof(struct trapframe, tf_rip)); + memmove(vframe, frame, + offsetof(struct trapframe, tf_rip)); + /* Set the faulting address to the user %eip */ + ksi.ksi_addr = (void *)vframe->tf_rip; + break; + case 0xac8e: /* mov 0x98(%rsp),%gs (8e ac 24 98 00 00 00) */ + case 0xa48e: /* mov 0xa0(%rsp),%fs (8e a4 24 a0 00 00 00) */ + case 0x848e: /* mov 0xa8(%rsp),%es (8e 84 24 a8 00 00 00) */ + case 0x9c8e: /* mov 0xb0(%rsp),%ds (8e 9c 24 b0 00 00 00) */ + /* + * We faulted loading one if the user segment registers. + * The stack frame containing the user registers is + * still valid and pointed to by tf_rsp. + * Maybe we should check the iretq follows. + */ + if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags)) + goto we_re_toast; + /* There is no valid address for the fault */ break; -#endif + default: goto we_re_toast; } - if (KERNELMODE(vframe->tf_cs, vframe->tf_rflags)) - goto we_re_toast; - frame->tf_rip = (uint64_t)resume; - return; + /* XXX: worry about on-stack trampolines for nested + * handlers?? */ + /* Save outer frame for any signal return */ + l->l_md.md_regs = vframe; + (*p->p_emul->e_trapsignal)(l, &ksi); + /* Return to user by reloading the user frame */ + trap_return_fault_return(vframe); + /* NOTREACHED */ +#endif case T_PROTFLT|T_USER: /* protection fault */ case T_TSSFLT|T_USER: @@ -570,7 +594,7 @@ faultcommon: goto copyfault; printf("uvm_fault(%p, 0x%lx, %d) -> %x\n", map, va, ftype, error); - goto we_re_toast; + goto kernelfault; } if (error == ENOMEM) { ksi.ksi_signo = SIGKILL; Index: src/sys/arch/amd64/amd64/vector.S diff -u src/sys/arch/amd64/amd64/vector.S:1.28.6.1 src/sys/arch/amd64/amd64/vector.S:1.28.6.1.2.1 --- src/sys/arch/amd64/amd64/vector.S:1.28.6.1 Tue Nov 25 18:24:31 2008 +++ src/sys/arch/amd64/amd64/vector.S Tue Jun 12 20:43:59 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: vector.S,v 1.28.6.1 2008/11/25 18:24:31 snj Exp $ */ +/* $NetBSD: vector.S,v 1.28.6.1.2.1 2012/06/12 20:43:59 riz Exp $ */ /*- * Copyright (c) 1998, 2007, 2008 The NetBSD Foundation, Inc. @@ -98,17 +98,17 @@ /*****************************************************************************/ -#ifndef XEN -#define PRE_TRAP -#define TRAP(a) pushq $(a) ; jmp _C_LABEL(alltraps) -#define ZTRAP(a) pushq $0 ; TRAP(a) -#else +#ifdef XEN #define PRE_TRAP movq (%rsp),%rcx ; movq 8(%rsp),%r11 ; addq $0x10,%rsp -#define POST_TRAP(a) pushq $(a) ; jmp _C_LABEL(alltraps) -#define TRAP(a) PRE_TRAP ; POST_TRAP(a) -#define ZTRAP(a) PRE_TRAP ; pushq $0 ; POST_TRAP(a) +#else +#define PRE_TRAP #endif +#define TRAP_NJ(a) PRE_TRAP ; pushq $(a) +#define ZTRAP_NJ(a) PRE_TRAP ; pushq $0 ; pushq $(a) +#define TRAP(a) TRAP_NJ(a) ; jmp _C_LABEL(alltraps) +#define ZTRAP(a) ZTRAP_NJ(a) ; jmp _C_LABEL(alltraps) + #define BPTTRAP(a) ZTRAP(a) .text @@ -177,12 +177,52 @@ IDTVEC(trap09) ZTRAP(T_FPOPFLT) IDTVEC(trap0a) TRAP(T_TSSFLT) -IDTVEC(trap0b) + +IDTVEC(trap0b) /* #NP() Segment not present */ +#ifdef XEN TRAP(T_SEGNPFLT) -IDTVEC(trap0c) +#else + TRAP_NJ(T_SEGNPFLT) + jmp check_swapgs +#endif +IDTVEC(trap0c) /* #SS() Stack exception */ +#ifdef XEN TRAP(T_STKFLT) -IDTVEC(trap0d) +#else + TRAP_NJ(T_STKFLT) + jmp check_swapgs +#endif + +IDTVEC(trap0d) /* #GP() General protection */ +#ifdef XEN TRAP(T_PROTFLT) +#else + TRAP_NJ(T_PROTFLT) + +/* We need to worry about traps before iret after the swapgs. + * Might be loading %gs or the iret itself. */ +check_swapgs: + INTRENTRY_L(3f,1:) +2: sti + jmp calltrap +3: + /* Trap in kernel mode. */ + /* If faulting instruction is 'iret' or mov to %gs + * we may need to do a 'swapgs'. */ + movq TF_RIP(%rsp),%rax + movq TF_RSP(%rsp),%rbx /* Must read %rsp, may be a pad word */ + cmpl $0x9824ac8e,(%rax) /* Fault is mov %gs,0x98(%rsp) ? */ + jne 4f + addw $7+7,%rax /* Advance %rip to iret */ + addq $TF_REGSIZE+8+8,%rbx /* %rsp adjust before iret */ +4: cmpw $0xcf48,(%rax) /* Faulting instruction is iretq ? */ + jne 2b /* normal fault in kernel */ + + testb $SEL_UPL,8(%rbx) /* Check %cs of outer iret frame */ + je 2b /* if iret was to user ... */ + jmp 1b /* ... must restore kernel %gs */ +#endif + IDTVEC(trap0e) TRAP(T_PAGEFLT) IDTVEC(intrspurious) @@ -255,25 +295,20 @@ IDTVEC(exceptions) .quad _C_LABEL(Xtrap1e), _C_LABEL(Xtrap1f) /* - * If an error is detected during trap, syscall, or interrupt exit, trap() will - * change %eip to point to one of these labels. We clean up the stack, if - * necessary, and resume as if we were handling a general protection fault. - * This will cause the process to get a SIGBUS. - * - * XXXfvdl currently unused, as pop %ds and pop %es are illegal in long - * mode. However, if the x86-64 port is going to support USER_LDT, we - * may need something like this after all. + * trap() calls here when it detects a fault in INTRFASTEXIT (loading the + * segment registers or during the iret itself). + * The address of the (possibly reconstructed) user trap frame is + * passed as an argument. + * Typically the code will have raised a SIGSEGV which will be actioned + * by the code below. */ -NENTRY(resume_iret) - ZTRAP(T_PROTFLT) -#if 0 -NENTRY(resume_pop_ds) - movl $GSEL(GDATA_SEL, SEL_KPL),%eax - movl %eax,%es -NENTRY(resume_pop_es) - movl $T_PROTFLT,TF_TRAPNO(%rsp) - jmp calltrap -#endif +_C_LABEL(trap_return_fault_return): .globl trap_return_fault_return + mov %rdi,%rsp /* frame for user return */ +#ifdef DIAGNOSTIC + /* We can't recover the saved %rbx, so suppress warning */ + movl CPUVAR(ILEVEL),%ebx +#endif /* DIAGNOSTIC */ + jmp .Lalltraps_checkusr /* * All traps go through here. Call the generic trap handler, and Index: src/sys/arch/amd64/include/frameasm.h diff -u src/sys/arch/amd64/include/frameasm.h:1.12 src/sys/arch/amd64/include/frameasm.h:1.12.14.1 --- src/sys/arch/amd64/include/frameasm.h:1.12 Mon Apr 21 15:15:33 2008 +++ src/sys/arch/amd64/include/frameasm.h Tue Jun 12 20:43:59 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: frameasm.h,v 1.12 2008/04/21 15:15:33 cegger Exp $ */ +/* $NetBSD: frameasm.h,v 1.12.14.1 2012/06/12 20:43:59 riz Exp $ */ #ifndef _AMD64_MACHINE_FRAMEASM_H #define _AMD64_MACHINE_FRAMEASM_H @@ -54,16 +54,22 @@ movq TF_RBX(%rsp),%rbx ; \ movq TF_RAX(%rsp),%rax -#define INTRENTRY \ + +#define INTRENTRY_L(kernel_trap, usertrap) \ subq $TF_REGSIZE,%rsp ; \ - testq $SEL_UPL,TF_CS(%rsp) ; \ - je 98f ; \ + INTR_SAVE_GPRS ; \ + testb $SEL_UPL,TF_CS(%rsp) ; \ + je kernel_trap ; \ +usertrap ; \ swapgs ; \ movw %gs,TF_GS(%rsp) ; \ movw %fs,TF_FS(%rsp) ; \ movw %es,TF_ES(%rsp) ; \ - movw %ds,TF_DS(%rsp) ; \ -98: INTR_SAVE_GPRS + movw %ds,TF_DS(%rsp) + +#define INTRENTRY \ + INTRENTRY_L(98f,) ; \ +98: #ifndef XEN #define INTRFASTEXIT \ @@ -71,11 +77,11 @@ testq $SEL_UPL,TF_CS(%rsp) /* Interrupted %cs */ ; \ je 99f ; \ cli ; \ - swapgs ; \ - movw TF_GS(%rsp),%gs ; \ movw TF_FS(%rsp),%fs ; \ movw TF_ES(%rsp),%es ; \ movw TF_DS(%rsp),%ds ; \ + swapgs ; \ + movw TF_GS(%rsp),%gs ; /* can fault */ \ 99: addq $TF_REGSIZE+16,%rsp /* + T_xxx and error code */ ; \ iretq