Module Name:    src
Committed By:   riz
Date:           Tue Jun 12 20:43:59 UTC 2012

Modified Files:
        src/sys/arch/amd64/amd64 [netbsd-5-0]: trap.c vector.S
        src/sys/arch/amd64/include [netbsd-5-0]: frameasm.h

Log Message:
Pull up following revision(s) (requested by spz in ticket #1772):
        sys/arch/amd64/amd64/trap.c: revision 1.71 via patch
        sys/arch/amd64/amd64/vector.S: revision 1.41 via patch
        sys/arch/amd64/include/frameasm.h: patch

Treat traps in kernel mode during the 'return to user' iret sequence
as user faults.
Based heavily in the i386 code with the correct opcode bytes inserted.
iret path tested, arranging for segment register errors is harder.
User %fs and %gs (32bit apps) are loaded much earlier and any errors
will generate kernel panics - there is probably code to try to stop
the invalid values being set.
If we get a fault setting the user %gs, or on a iret that is returning
to userspace, we must do a 'swapgs' to reload the kernel %gs_base.
Also save the %ds, %es, %fs, %gs selector values in the frame so
they can be restored if we finally return to user (probably after
an application SIGSEGV handler has fixed the error).
Without this any such fault leaves the kernel running with the wrong
%gs offset and it will most likely fault again early in trap().
Repeats until the stack tramples on something important.
iret change works, invalid %gs is a little harder to arrange.


To generate a diff of this commit:
cvs rdiff -u -r1.52.6.2 -r1.52.6.3 src/sys/arch/amd64/amd64/trap.c
cvs rdiff -u -r1.28.6.1 -r1.28.6.1.2.1 src/sys/arch/amd64/amd64/vector.S
cvs rdiff -u -r1.12 -r1.12.14.1 src/sys/arch/amd64/include/frameasm.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/amd64/trap.c
diff -u src/sys/arch/amd64/amd64/trap.c:1.52.6.2 src/sys/arch/amd64/amd64/trap.c:1.52.6.3
--- src/sys/arch/amd64/amd64/trap.c:1.52.6.2	Fri Aug 14 21:32:18 2009
+++ src/sys/arch/amd64/amd64/trap.c	Tue Jun 12 20:43:59 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: trap.c,v 1.52.6.2 2009/08/14 21:32:18 snj Exp $	*/
+/*	$NetBSD: trap.c,v 1.52.6.3 2012/06/12 20:43:59 riz Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
@@ -68,7 +68,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.52.6.2 2009/08/14 21:32:18 snj Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.52.6.3 2012/06/12 20:43:59 riz Exp $");
 
 #include "opt_ddb.h"
 #include "opt_kgdb.h"
@@ -124,6 +124,7 @@ __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.5
 #endif
 
 void trap(struct trapframe *);
+void trap_return_fault_return(struct trapframe *) __dead;
 
 const char *trap_type[] = {
 	"privileged instruction fault",		/*  0 T_PRIVINFLT */
@@ -178,16 +179,11 @@ trap(struct trapframe *frame)
 	struct proc *p;
 	int type = (int)frame->tf_trapno;
 	struct pcb *pcb;
-	extern char fusuintrfailure[], kcopy_fault[],
-		    resume_iret[];
+	extern char fusuintrfailure[], kcopy_fault[];
 #if defined(COMPAT_10) || defined(COMPAT_IBCS2)
 	extern char IDTVEC(oosyscall)[];
 #endif
-#if 0
-	extern char resume_pop_ds[], resume_pop_es[];
-#endif
 	struct trapframe *vframe;
-	void *resume;
 	void *onfault;
 	int error;
 	uint64_t cr2;
@@ -274,50 +270,78 @@ copyfault:
 
 		/*
 		 * Check for failure during return to user mode.
+		 * This can happen loading invalid values into the segment
+		 * registers, or during the 'iret' itself.
 		 *
-		 * XXXfvdl check for rex prefix?
-		 *
-		 * We do this by looking at the instruction we faulted on.  The
-		 * specific instructions we recognize only happen when
+		 * We do this by looking at the instruction we faulted on.
+		 * The specific instructions we recognize only happen when
 		 * returning from a trap, syscall, or interrupt.
-		 *
-		 * XXX
-		 * The heuristic used here will currently fail for the case of
-		 * one of the 2 pop instructions faulting when returning from a
-		 * a fast interrupt.  This should not be possible.  It can be
-		 * fixed by rearranging the trap frame so that the stack format
-		 * at this point is the same as on exit from a `slow'
-		 * interrupt.
 		 */
-		switch (*(u_char *)frame->tf_rip) {
-		case 0xcf:	/* iret */
-			vframe = (void *)((uint64_t)&frame->tf_rsp - 44);
-			resume = resume_iret;
-			break;
-/*
- * XXXfvdl these are illegal in long mode (not in compat mode, though)
- * and we do not take back the descriptors from the signal context anyway,
- * but may do so later for USER_LDT, in which case we need to intercept
- * other instructions (movl %eax, %Xs).
- */
-#if 0
-		case 0x1f:	/* popl %ds */
-			vframe = (void *)((uint64_t)&frame->tf_rsp - 4);
-			resume = resume_pop_ds;
-			break;
-		case 0x07:	/* popl %es */
-			vframe = (void *)((uint64_t)&frame->tf_rsp - 0);
-			resume = resume_pop_es;
+
+kernelfault:
+#ifdef XEN
+		/*
+		 * XXX: there has to be an equivalent 'problem'
+		 * but I (dsl) don't know exactly what happens!
+		 * For now panic the kernel.
+		 */
+		goto we_re_toast;
+#else
+		KSI_INIT_TRAP(&ksi);
+		ksi.ksi_signo = SIGSEGV;
+		ksi.ksi_code = SEGV_ACCERR;
+		ksi.ksi_trap = type;
+
+		/* Get %rsp value before fault - there may be a pad word
+		 * below the trap frame. */
+		vframe = (void *)frame->tf_rsp;
+		switch (*(uint16_t *)frame->tf_rip) {
+		case 0xcf48:	/* iretq */
+			/*
+			 * The 'iretq' instruction faulted, wo we have the
+			 * 'user' registers saved after the kernel
+			 * %rip:%cs:%fl:%rsp:%ss of the iret, and below that
+			 * the user %rip:%cs:%fl:%rsp:%ss the 'iret' was
+			 * processing.
+			 * We must copy the user register back over the
+			 * kernel fault frame to generate a normal stack
+			 * frame (eg for sending a SIGSEGV).
+			 */
+			vframe = (void *)((char *)vframe
+			    - offsetof(struct trapframe, tf_rip));
+			memmove(vframe, frame,
+			    offsetof(struct trapframe, tf_rip));
+			/* Set the faulting address to the user %eip */
+			ksi.ksi_addr = (void *)vframe->tf_rip;
+			break;
+		case 0xac8e:	/* mov 0x98(%rsp),%gs (8e ac 24 98 00 00 00) */
+		case 0xa48e:	/* mov 0xa0(%rsp),%fs (8e a4 24 a0 00 00 00) */
+		case 0x848e:	/* mov 0xa8(%rsp),%es (8e 84 24 a8 00 00 00) */
+		case 0x9c8e:	/* mov 0xb0(%rsp),%ds (8e 9c 24 b0 00 00 00) */
+			/*
+			 * We faulted loading one if the user segment registers.
+			 * The stack frame containing the user registers is
+			 * still valid and pointed to by tf_rsp.
+			 * Maybe we should check the iretq follows.
+			 */
+			if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
+				goto we_re_toast;
+			/* There is no valid address for the fault */
 			break;
-#endif
+
 		default:
 			goto we_re_toast;
 		}
-		if (KERNELMODE(vframe->tf_cs, vframe->tf_rflags))
-			goto we_re_toast;
 
-		frame->tf_rip = (uint64_t)resume;
-		return;
+		/* XXX: worry about on-stack trampolines for nested
+		 * handlers?? */
+		/* Save outer frame for any signal return */
+		l->l_md.md_regs = vframe;
+		(*p->p_emul->e_trapsignal)(l, &ksi);
+		/* Return to user by reloading the user frame */
+		trap_return_fault_return(vframe);
+		/* NOTREACHED */
+#endif
 
 	case T_PROTFLT|T_USER:		/* protection fault */
 	case T_TSSFLT|T_USER:
@@ -570,7 +594,7 @@ faultcommon:
 				goto copyfault;
 			printf("uvm_fault(%p, 0x%lx, %d) -> %x\n",
 			    map, va, ftype, error);
-			goto we_re_toast;
+			goto kernelfault;
 		}
 		if (error == ENOMEM) {
 			ksi.ksi_signo = SIGKILL;

Index: src/sys/arch/amd64/amd64/vector.S
diff -u src/sys/arch/amd64/amd64/vector.S:1.28.6.1 src/sys/arch/amd64/amd64/vector.S:1.28.6.1.2.1
--- src/sys/arch/amd64/amd64/vector.S:1.28.6.1	Tue Nov 25 18:24:31 2008
+++ src/sys/arch/amd64/amd64/vector.S	Tue Jun 12 20:43:59 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: vector.S,v 1.28.6.1 2008/11/25 18:24:31 snj Exp $	*/
+/*	$NetBSD: vector.S,v 1.28.6.1.2.1 2012/06/12 20:43:59 riz Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2007, 2008 The NetBSD Foundation, Inc.
@@ -98,17 +98,17 @@
 
 /*****************************************************************************/
 
-#ifndef	XEN
-#define PRE_TRAP
-#define	TRAP(a)		pushq $(a) ; jmp _C_LABEL(alltraps)
-#define	ZTRAP(a)	pushq $0 ; TRAP(a)
-#else
+#ifdef	XEN
 #define	PRE_TRAP	movq (%rsp),%rcx ; movq 8(%rsp),%r11 ; addq $0x10,%rsp 
-#define	POST_TRAP(a)	pushq $(a) ; jmp _C_LABEL(alltraps)
-#define	TRAP(a)		PRE_TRAP ; POST_TRAP(a)
-#define ZTRAP(a)	PRE_TRAP ; pushq $0 ; POST_TRAP(a)
+#else
+#define	PRE_TRAP
 #endif
 
+#define	TRAP_NJ(a)	PRE_TRAP ; pushq $(a)
+#define	ZTRAP_NJ(a)	PRE_TRAP ; pushq $0 ; pushq $(a)
+#define	TRAP(a)		TRAP_NJ(a) ; jmp _C_LABEL(alltraps)
+#define	ZTRAP(a)	ZTRAP_NJ(a) ; jmp _C_LABEL(alltraps)
+
 #define	BPTTRAP(a)	ZTRAP(a)
 
 	.text
@@ -177,12 +177,52 @@ IDTVEC(trap09)
 	ZTRAP(T_FPOPFLT)
 IDTVEC(trap0a)
 	TRAP(T_TSSFLT)
-IDTVEC(trap0b)
+
+IDTVEC(trap0b)		/* #NP() Segment not present */
+#ifdef	XEN
 	TRAP(T_SEGNPFLT)
-IDTVEC(trap0c)
+#else
+	TRAP_NJ(T_SEGNPFLT)
+	jmp	check_swapgs
+#endif
+IDTVEC(trap0c)		/* #SS() Stack exception */
+#ifdef	XEN
 	TRAP(T_STKFLT)
-IDTVEC(trap0d)
+#else
+	TRAP_NJ(T_STKFLT)
+	jmp	check_swapgs
+#endif
+
+IDTVEC(trap0d)		/* #GP() General protection */
+#ifdef	XEN
 	TRAP(T_PROTFLT)
+#else
+	TRAP_NJ(T_PROTFLT)
+
+/* We need to worry about traps before iret after the swapgs.
+ * Might be loading %gs or the iret itself. */
+check_swapgs:
+	INTRENTRY_L(3f,1:)
+2:	sti
+	jmp	calltrap
+3:
+	/* Trap in kernel mode. */
+	/* If faulting instruction is 'iret' or mov to %gs
+	 * we may need to do a 'swapgs'. */
+	movq	TF_RIP(%rsp),%rax
+	movq	TF_RSP(%rsp),%rbx	/* Must read %rsp, may be a pad word */
+	cmpl	$0x9824ac8e,(%rax)	/* Fault is mov %gs,0x98(%rsp) ? */
+	jne	4f
+	addw	$7+7,%rax		/* Advance %rip to iret */
+	addq	$TF_REGSIZE+8+8,%rbx	/* %rsp adjust before iret */
+4:	cmpw	$0xcf48,(%rax)		/* Faulting instruction is iretq ? */
+	jne	2b			/* normal fault in kernel */
+
+	testb	$SEL_UPL,8(%rbx)	/* Check %cs of outer iret frame */
+	je	2b			/* if iret was to user ... */
+	jmp	1b			/* ... must restore kernel %gs */
+#endif
+
 IDTVEC(trap0e)
 	TRAP(T_PAGEFLT)
 IDTVEC(intrspurious)
@@ -255,25 +295,20 @@ IDTVEC(exceptions)
 	.quad	_C_LABEL(Xtrap1e), _C_LABEL(Xtrap1f)
 
 /*
- * If an error is detected during trap, syscall, or interrupt exit, trap() will
- * change %eip to point to one of these labels.  We clean up the stack, if
- * necessary, and resume as if we were handling a general protection fault.
- * This will cause the process to get a SIGBUS.
- *
- * XXXfvdl currently unused, as pop %ds and pop %es are illegal in long
- * mode. However, if the x86-64 port is going to support USER_LDT, we
- * may need something like this after all.
+ * trap() calls here when it detects a fault in INTRFASTEXIT (loading the
+ * segment registers or during the iret itself).
+ * The address of the (possibly reconstructed) user trap frame is
+ * passed as an argument.
+ * Typically the code will have raised a SIGSEGV which will be actioned
+ * by the code below.
  */
-NENTRY(resume_iret)
-	ZTRAP(T_PROTFLT)
-#if 0
-NENTRY(resume_pop_ds)
-	movl	$GSEL(GDATA_SEL, SEL_KPL),%eax
-	movl	%eax,%es
-NENTRY(resume_pop_es)
-	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
-	jmp	calltrap
-#endif
+_C_LABEL(trap_return_fault_return):	.globl	trap_return_fault_return
+	mov	%rdi,%rsp		/* frame for user return */
+#ifdef DIAGNOSTIC
+	/* We can't recover the saved %rbx, so suppress warning */
+	movl	CPUVAR(ILEVEL),%ebx
+#endif /* DIAGNOSTIC */
+	jmp	.Lalltraps_checkusr
 
 /*
  * All traps go through here. Call the generic trap handler, and

Index: src/sys/arch/amd64/include/frameasm.h
diff -u src/sys/arch/amd64/include/frameasm.h:1.12 src/sys/arch/amd64/include/frameasm.h:1.12.14.1
--- src/sys/arch/amd64/include/frameasm.h:1.12	Mon Apr 21 15:15:33 2008
+++ src/sys/arch/amd64/include/frameasm.h	Tue Jun 12 20:43:59 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: frameasm.h,v 1.12 2008/04/21 15:15:33 cegger Exp $	*/
+/*	$NetBSD: frameasm.h,v 1.12.14.1 2012/06/12 20:43:59 riz Exp $	*/
 
 #ifndef _AMD64_MACHINE_FRAMEASM_H
 #define _AMD64_MACHINE_FRAMEASM_H
@@ -54,16 +54,22 @@
 	movq	TF_RBX(%rsp),%rbx	; \
 	movq	TF_RAX(%rsp),%rax
 
-#define	INTRENTRY \
+
+#define	INTRENTRY_L(kernel_trap, usertrap) \
 	subq	$TF_REGSIZE,%rsp	; \
-	testq	$SEL_UPL,TF_CS(%rsp)	; \
-	je	98f			; \
+	INTR_SAVE_GPRS			; \
+	testb	$SEL_UPL,TF_CS(%rsp)	; \
+	je	kernel_trap		; \
+usertrap				; \
 	swapgs				; \
 	movw	%gs,TF_GS(%rsp)		; \
 	movw	%fs,TF_FS(%rsp)		; \
 	movw	%es,TF_ES(%rsp)		; \
-	movw	%ds,TF_DS(%rsp)		; \
-98: 	INTR_SAVE_GPRS
+	movw	%ds,TF_DS(%rsp)	
+
+#define	INTRENTRY \
+	INTRENTRY_L(98f,)		; \
+98:
 
 #ifndef XEN
 #define INTRFASTEXIT \
@@ -71,11 +77,11 @@
 	testq	$SEL_UPL,TF_CS(%rsp)	/* Interrupted %cs */ ; \
 	je	99f			; \
 	cli				; \
-	swapgs				; \
-	movw	TF_GS(%rsp),%gs		; \
 	movw	TF_FS(%rsp),%fs		; \
 	movw	TF_ES(%rsp),%es		; \
 	movw	TF_DS(%rsp),%ds		; \
+	swapgs				; \
+	movw	TF_GS(%rsp),%gs		; /* can fault */ \
 99:	addq	$TF_REGSIZE+16,%rsp	/* + T_xxx and error code */ ; \
 	iretq
 

Reply via email to