Module Name:    src
Committed By:   dsl
Date:           Sun Jul 15 15:17:57 UTC 2012

Modified Files:
        src/sys/arch/amd64/amd64: genassym.cf locore.S machdep.c
            netbsd32_machdep.c trap.c
        src/sys/arch/amd64/include: frameasm.h proc.h
        src/sys/arch/x86/include: cpu.h
        src/sys/arch/x86/x86: vm_machdep.c
        src/sys/compat/linux32/arch/amd64: linux32_machdep.c

Log Message:
Rename MDP_IRET to MDL_IRET since it is an lwp flag, not a proc one.
Add an MDL_COMPAT32 flag to the lwp's md_flags, set it for 32bit lwps
  and use it to force 'return to user' with iret (as is done when
  MDL_IRET is set).
Split the iret/sysret code paths much later.
Remove all the replicated code for 32bit system calls - which was only
  needed so that iret was always used.
frameasm.h for XEN contains '#define swapgs', while XEN probable never
  needs swapgs, this is likely to be confusing.
Add a SWAPGS which is a nop on XEN and swapgs otherwise.
(I've not yet checked all the swapgs in files that include frameasm.h)
Simple x86 programs still work.
Hijack 6.99.9 kernel bump (needed for compat32 modules)


To generate a diff of this commit:
cvs rdiff -u -r1.51 -r1.52 src/sys/arch/amd64/amd64/genassym.cf
cvs rdiff -u -r1.69 -r1.70 src/sys/arch/amd64/amd64/locore.S
cvs rdiff -u -r1.188 -r1.189 src/sys/arch/amd64/amd64/machdep.c
cvs rdiff -u -r1.78 -r1.79 src/sys/arch/amd64/amd64/netbsd32_machdep.c
cvs rdiff -u -r1.72 -r1.73 src/sys/arch/amd64/amd64/trap.c
cvs rdiff -u -r1.19 -r1.20 src/sys/arch/amd64/include/frameasm.h
cvs rdiff -u -r1.14 -r1.15 src/sys/arch/amd64/include/proc.h
cvs rdiff -u -r1.51 -r1.52 src/sys/arch/x86/include/cpu.h
cvs rdiff -u -r1.15 -r1.16 src/sys/arch/x86/x86/vm_machdep.c
cvs rdiff -u -r1.30 -r1.31 \
    src/sys/compat/linux32/arch/amd64/linux32_machdep.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/amd64/genassym.cf
diff -u src/sys/arch/amd64/amd64/genassym.cf:1.51 src/sys/arch/amd64/amd64/genassym.cf:1.52
--- src/sys/arch/amd64/amd64/genassym.cf:1.51	Mon Jun 11 15:18:05 2012
+++ src/sys/arch/amd64/amd64/genassym.cf	Sun Jul 15 15:17:56 2012
@@ -1,4 +1,4 @@
-#	$NetBSD: genassym.cf,v 1.51 2012/06/11 15:18:05 chs Exp $
+#	$NetBSD: genassym.cf,v 1.52 2012/07/15 15:17:56 dsl Exp $
 
 #
 # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -162,7 +162,8 @@ define	L_MD_ASTPENDING		offsetof(struct 
 
 define	PAGE_SIZE		PAGE_SIZE
 
-define	MDP_IRET		MDP_IRET
+define	MDL_IRET		MDL_IRET
+define	MDL_COMPAT32		MDL_COMPAT32
 
 define	P_FLAG			offsetof(struct proc, p_flag)
 define	P_RASLIST		offsetof(struct proc, p_raslist)

Index: src/sys/arch/amd64/amd64/locore.S
diff -u src/sys/arch/amd64/amd64/locore.S:1.69 src/sys/arch/amd64/amd64/locore.S:1.70
--- src/sys/arch/amd64/amd64/locore.S:1.69	Sat Jun 16 17:30:19 2012
+++ src/sys/arch/amd64/amd64/locore.S	Sun Jul 15 15:17:56 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.69 2012/06/16 17:30:19 chs Exp $	*/
+/*	$NetBSD: locore.S,v 1.70 2012/07/15 15:17:56 dsl Exp $	*/
 
 /*
  * Copyright-o-rama!
@@ -986,9 +986,9 @@ ENTRY(cpu_switchto)
 	xorq	%rax, %rax
 	movw	%ax, %fs
 	CLI(cx)
-	swapgs
+	SWAPGS
 	movw	%ax, %gs
-	swapgs
+	SWAPGS
 	STI(cx)
 
 	movq	CPUVAR(GDT),%rcx
@@ -1018,9 +1018,9 @@ ENTRY(cpu_switchto)
 	movq	L_MD_REGS(%r12), %rbx
 	movw	TF_FS(%rbx), %fs
 	CLI(ax)
-	swapgs
+	SWAPGS
 	movw	TF_GS(%rbx), %gs
-	swapgs
+	SWAPGS
 	STI(ax)
 
 #else
@@ -1063,40 +1063,50 @@ IDTVEC(syscall32)
 /*
  * syscall()
  *
- * syscall insn entry. This currently isn't much faster, but
- * it can be made faster in the future.
+ * syscall insn entry.
+ * This currently isn't much faster, but it can be made faster in the future.
+ * (Actually we've already saved a few 100 clocks by not loading the trap gate)
  */
 IDTVEC(syscall)
 #ifndef XEN
+	/*
+	 * The user %rip is in %rcx and the user %flags in %r11.
+	 * The kernel %cs and %ss are loaded, but nothing else is.
+	 * The 'swapgs' gives us access to cpu-specific memory where
+	 * we can save a user register and then read the lwps
+	 * kernel stack pointer,
+	 * This code doesn't seem to set %ds, this may not matter since it
+	 * is ignored in 64bit mode, OTOH the syscall instruction sets %ss
+	 * and that is ignored as well.
+	 */
 	swapgs
 	movq	%r15,CPUVAR(SCRATCH)
 	movq	CPUVAR(CURLWP),%r15
 	movq	L_PCB(%r15),%r15
-	movq	PCB_RSP0(%r15),%r15
-	xchgq	%r15,%rsp
+	movq	PCB_RSP0(%r15),%r15	/* LWP's kernel stack pointer */
 
-	/*
-	 * XXX don't need this whole frame, split of the
-	 * syscall frame and trapframe is needed.
-	 * First, leave some room for the trapno, error,
-	 * ss:rsp, etc, so that all GP registers can be
-	 * saved. Then, fill in the rest.
-	 */
-	pushq	$(LSEL(LUDATA_SEL, SEL_UPL))	/* Known to be user ss */
-	pushq	%r15				/* User space rsp */
+	/* Make stack look like an 'int nn' frame */
+#define SP(x)	(x)-(TF_SS+8)(%r15)
+	movq	$(LSEL(LUDATA_SEL, SEL_UPL)), SP(TF_SS) /* user %ss */
+	movq	%rsp, SP(TF_RSP) 	/* User space rsp */
+
+	movq	%r11, SP(TF_RFLAGS)	/* old rflags from syscall insn */
+	movq	$(LSEL(LUCODE_SEL, SEL_UPL)), SP(TF_CS)
+	movq	%rcx, SP(TF_RIP)	/* syscall saves rip in rcx */
+
+	leaq	SP(0),%rsp		/* %rsp now valid after frame */
 	movq	CPUVAR(SCRATCH),%r15
-	subq	$TF_REGSIZE+(TF_RSP-TF_TRAPNO),%rsp
+#undef SP
+
+	movq	$2,TF_ERR(%rsp)		/* syscall instruction size */
+	movq	$T_ASTFLT, TF_TRAPNO(%rsp)
+
 	movw	%es,TF_ES(%rsp)
 	sti
 	INTR_SAVE_GPRS
 	movw	%fs,TF_FS(%rsp)
 	movw	%gs,TF_GS(%rsp)
 	movw	$(LSEL(LUDATA_SEL, SEL_UPL)),TF_DS(%rsp)
-	movq	%r11, TF_RFLAGS(%rsp)	/* old rflags from syscall insn */
-	movq	$(LSEL(LUCODE_SEL, SEL_UPL)), TF_CS(%rsp)
-	movq	%rcx,TF_RIP(%rsp)	/* syscall saves rip in rcx */
-	movq	$2,TF_ERR(%rsp)
-	movq	$T_ASTFLT, TF_TRAPNO(%rsp)
 #else
 	/* Xen already switched to kernel stack */
 	pushq	%rsi
@@ -1113,44 +1123,59 @@ IDTVEC(syscall)
 	movw	$(LSEL(LUDATA_SEL, SEL_UPL)),TF_DS(%rsp)
 #endif
 
+do_syscall:
 	movq	CPUVAR(CURLWP),%r14
 	incq	CPUVAR(NSYSCALL)	# count it atomically
 	movq	%rsp,L_MD_REGS(%r14)	# save pointer to frame
 	movq	L_PROC(%r14),%r15
-	andl	$~MDP_IRET,L_MD_FLAGS(%r14)
+	andl	$~MDL_IRET,L_MD_FLAGS(%r14)   /* Allow sysret return */
 	movq	%rsp,%rdi		/* Pass frame as arg0 */
 	call	*P_MD_SYSCALL(%r15)
 .Lsyscall_checkast:
-	/* Check for ASTs on exit to user mode. */
+	/*
+	 * Disable interrupts to avoid new ASTs (etc) being added and
+	 * to ensure we don't take an interrupt with some of the user
+	 * registers loaded.
+	 */
 	CLI(si)
+	/* Check for ASTs on exit to user mode. */
 	movl	L_MD_ASTPENDING(%r14), %eax
 	orl	CPUVAR(WANT_PMAPLOAD), %eax
 	jnz	9f
-	testl	$MDP_IRET, L_MD_FLAGS(%r14)
-	jne	iret_return;
 #ifdef DIAGNOSTIC
 	cmpl	$IPL_NONE,CPUVAR(ILEVEL)
 	jne	3f
 #endif
-	movw	TF_ES(%rsp),%es
-#ifndef XEN
-	swapgs
-#endif
+	testl	$(MDL_IRET | MDL_COMPAT32), L_MD_FLAGS(%r14)
 	INTR_RESTORE_GPRS
-	movw	$(LSEL(LUDATA_SEL, SEL_UPL)), %r11w
-	movw	%r11w,%ds
-	addq	$TF_REGSIZE+16,%rsp	/* + T_xxx and error code */
+	movw	TF_ES(%rsp),%es
+	SWAPGS
+	jnz	2f
 #ifndef XEN
-	popq	%rcx	/* return rip */
-	addq	$8,%rsp	/* discard cs */
-	popq	%r11	/* flags as set by sysret insn */
-	movq	%ss:(%rsp),%rsp
+	movq	TF_RIP(%rsp), %rcx	/* %rip for sysret */
+	movq	TF_RFLAGS(%rsp), %r11	/* %flags for sysret */
+	movw	TF_DS(%rsp), %ds
+	movq	TF_RSP(%rsp), %rsp
 	sysretq
 #else
+	movw	TF_DS(%rsp), %ds
+	addq	$TF_RIP, %rsp
 	pushq	$256	/* VGCF_IN_SYSCALL */
 	jmp	HYPERVISOR_iret
 #endif
 
+/*
+ * If the syscall might have modified some registers, or we are a 32bit
+ * process we must return to user with an 'iret' instruction.
+ * If the iret faults in kernel (assumed due to illegal register values)
+ * then a SIGSEGV will be signalled.
+ */
+2:
+	movw	TF_DS(%rsp), %ds
+	addq	$TF_RIP, %rsp
+	iretq
+
+/* Report SPL error */
 #ifdef DIAGNOSTIC
 3:	movabsq	$4f, %rdi
 	movl	TF_RAX(%rsp),%esi
@@ -1164,6 +1189,8 @@ IDTVEC(syscall)
 	jmp	.Lsyscall_checkast
 4:	.asciz	"WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n"
 #endif
+
+/* AST pending or pmap load needed */
 9:
 	cmpl	$0, CPUVAR(WANT_PMAPLOAD)
 	jz	10f
@@ -1182,27 +1209,10 @@ IDTVEC(syscall)
  * void lwp_trampoline(void);
  *
  * This is a trampoline function pushed run by newly created LWPs
- * in order to do additional setup in their context.  32-bit
- * binaries begin life here.
+ * in order to do additional setup in their context.
  */
 NENTRY(lwp_trampoline)
 	movq	%rbp,%rsi
-	movq	%rbp,%r14	/* for .Losyscall_checkast */
-	movq	%rax,%rdi
-	xorq	%rbp,%rbp
-	call	_C_LABEL(lwp_startup)
-	movq	%r13,%rdi
-	call	*%r12
-	jmp	.Losyscall_checkast
-	/* NOTREACHED */
-
-/*
- * void child_trampoline(void);
- *
- * As per lwp_trampoline(), but 64-bit binaries start here.
- */
-NENTRY(child_trampoline)
-	movq	%rbp,%rsi
 	movq	%rbp,%r14	/* for .Lsyscall_checkast */
 	movq	%rax,%rdi
 	xorq	%rbp,%rbp
@@ -1211,8 +1221,6 @@ NENTRY(child_trampoline)
 	call	*%r12
 	jmp	.Lsyscall_checkast
 
-	.globl  _C_LABEL(osyscall_return)
-
 /*
  * oosyscall()
  *
@@ -1249,50 +1257,7 @@ osyscall1:
 	pushq	$T_ASTFLT	# trap # for doing ASTs
 	INTRENTRY
 	STI(si)
-	movq	CPUVAR(CURLWP),%r14
-	movq	%rsp,L_MD_REGS(%r14)	# save pointer to frame
-	movq	L_PROC(%r14),%rdx
-	movq	%rsp,%rdi
-	call	*P_MD_SYSCALL(%rdx)
-_C_LABEL(osyscall_return):
-.Losyscall_checkast:
-	/* Check for ASTs on exit to user mode. */
-	CLI(si)
-	movl	L_MD_ASTPENDING(%r14), %eax
-	orl	CPUVAR(WANT_PMAPLOAD), %eax
-	jnz	9f
-iret_return:
-#ifdef DIAGNOSTIC
-	cmpl	$IPL_NONE,CPUVAR(ILEVEL)
-	jne	3f
-#endif
-	INTRFASTEXIT
-#ifdef DIAGNOSTIC
-3:	movabsq	$4f, %rdi
-	movl	TF_RAX(%rsp),%esi
-	movl	TF_RDI(%rsp),%edx
-	movl	%ebx,%ecx
-	movl	CPUVAR(ILEVEL),%r8d
-	xorq	%rax,%rax
-	call	_C_LABEL(printf)
-	movl	$IPL_NONE,%edi
-	call	_C_LABEL(spllower)
-	jmp	.Losyscall_checkast
-4:	.asciz	"WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n"
-#endif
-9:
-	cmpl	$0, CPUVAR(WANT_PMAPLOAD)
-	jz	10f
-	STI(si)
-	call	_C_LABEL(do_pmap_load)
-	jmp	.Losyscall_checkast	/* re-check ASTs */
-10:
-	CLEAR_ASTPENDING(%r14)
-	STI(si)
-	/* Pushed T_ASTFLT into tf_trapno on entry. */
-	movq	%rsp,%rdi
-	call	_C_LABEL(trap)
-	jmp	.Losyscall_checkast	/* re-check ASTs */
+	jmp	do_syscall
 
 /*
  * bool sse2_idlezero_page(void *pg)

Index: src/sys/arch/amd64/amd64/machdep.c
diff -u src/sys/arch/amd64/amd64/machdep.c:1.188 src/sys/arch/amd64/amd64/machdep.c:1.189
--- src/sys/arch/amd64/amd64/machdep.c:1.188	Sun Jul  8 20:14:11 2012
+++ src/sys/arch/amd64/amd64/machdep.c	Sun Jul 15 15:17:56 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.188 2012/07/08 20:14:11 dsl Exp $	*/
+/*	$NetBSD: machdep.c,v 1.189 2012/07/15 15:17:56 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -111,7 +111,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.188 2012/07/08 20:14:11 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.189 2012/07/15 15:17:56 dsl Exp $");
 
 /* #define XENDEBUG_LOW  */
 
@@ -2054,7 +2054,7 @@ cpu_setmcontext(struct lwp *l, const mco
 		tf->tf_err = err;
 		tf->tf_trapno = trapno;
 
-		l->l_md.md_flags |= MDP_IRET;
+		l->l_md.md_flags |= MDL_IRET;
 	}
 
 	if (pcb->pcb_fpcpu != NULL)

Index: src/sys/arch/amd64/amd64/netbsd32_machdep.c
diff -u src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.78 src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.79
--- src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.78	Sun Jul  8 20:14:11 2012
+++ src/sys/arch/amd64/amd64/netbsd32_machdep.c	Sun Jul 15 15:17:56 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: netbsd32_machdep.c,v 1.78 2012/07/08 20:14:11 dsl Exp $	*/
+/*	$NetBSD: netbsd32_machdep.c,v 1.79 2012/07/15 15:17:56 dsl Exp $	*/
 
 /*
  * Copyright (c) 2001 Wasabi Systems, Inc.
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.78 2012/07/08 20:14:11 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.79 2012/07/15 15:17:56 dsl Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd.h"
@@ -83,8 +83,6 @@ __KERNEL_RCSID(0, "$NetBSD: netbsd32_mac
 const char	machine32[] = "i386";
 const char	machine_arch32[] = "i386";	
 
-extern void (osyscall_return)(void);
-
 #ifdef MTRR
 static int x86_64_get_mtrr32(struct lwp *, void *, register_t *);
 static int x86_64_set_mtrr32(struct lwp *, void *, register_t *);
@@ -127,7 +125,6 @@ netbsd32_setregs(struct lwp *l, struct e
 	struct pcb *pcb;
 	struct trapframe *tf;
 	struct proc *p = l->l_proc;
-	void **retaddr;
 
 	pcb = lwp_getpcb(l);
 
@@ -143,6 +140,7 @@ netbsd32_setregs(struct lwp *l, struct e
 	netbsd32_adjust_limits(p);
 
 	l->l_md.md_flags &= ~MDL_USEDFPU;
+	l->l_md.md_flags |= MDL_COMPAT32;	/* Force iret not sysret */
 	pcb->pcb_flags = PCB_COMPAT32;
         pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__;
         pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;  
@@ -167,10 +165,6 @@ netbsd32_setregs(struct lwp *l, struct e
 	tf->tf_rflags = PSL_USERSET;
 	tf->tf_rsp = stack;
 	tf->tf_ss = LSEL(LUDATA32_SEL, SEL_UPL);
-
-	/* XXX frob return address to return via old iret method, not sysret */
-	retaddr = (void **)tf - 1;
-	*retaddr = (void *)osyscall_return;
 }
 
 #ifdef COMPAT_16

Index: src/sys/arch/amd64/amd64/trap.c
diff -u src/sys/arch/amd64/amd64/trap.c:1.72 src/sys/arch/amd64/amd64/trap.c:1.73
--- src/sys/arch/amd64/amd64/trap.c:1.72	Sat Jun 30 23:33:10 2012
+++ src/sys/arch/amd64/amd64/trap.c	Sun Jul 15 15:17:56 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: trap.c,v 1.72 2012/06/30 23:33:10 rmind Exp $	*/
+/*	$NetBSD: trap.c,v 1.73 2012/07/15 15:17:56 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
@@ -68,7 +68,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.72 2012/06/30 23:33:10 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.73 2012/07/15 15:17:56 dsl Exp $");
 
 #include "opt_ddb.h"
 #include "opt_kgdb.h"
@@ -343,7 +343,7 @@ kernelfault:
 		switch (*(uint16_t *)frame->tf_rip) {
 		case 0xcf48:	/* iretq */
 			/*
-			 * The 'iretq' instruction faulted, wo we have the
+			 * The 'iretq' instruction faulted, so we have the
 			 * 'user' registers saved after the kernel
 			 * %rip:%cs:%fl:%rsp:%ss of the iret, and below that
 			 * the user %rip:%cs:%fl:%rsp:%ss the 'iret' was

Index: src/sys/arch/amd64/include/frameasm.h
diff -u src/sys/arch/amd64/include/frameasm.h:1.19 src/sys/arch/amd64/include/frameasm.h:1.20
--- src/sys/arch/amd64/include/frameasm.h:1.19	Thu May 17 19:38:53 2012
+++ src/sys/arch/amd64/include/frameasm.h	Sun Jul 15 15:17:56 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: frameasm.h,v 1.19 2012/05/17 19:38:53 dsl Exp $	*/
+/*	$NetBSD: frameasm.h,v 1.20 2012/07/15 15:17:56 dsl Exp $	*/
 
 #ifndef _AMD64_MACHINE_FRAMEASM_H
 #define _AMD64_MACHINE_FRAMEASM_H
@@ -35,6 +35,8 @@
 #define STI(temp_reg) sti
 #endif	/* XEN */
 
+#define	SWAPGS	NOT_XEN(swapgs)
+
 /*
  * These are used on interrupt or trap entry or exit.
  */
@@ -79,7 +81,7 @@
 	testb	$SEL_UPL,TF_CS(%rsp)	; \
 	je	kernel_trap		; \
 usertrap				; \
-	swapgs				; \
+	SWAPGS				; \
 	movw	%gs,TF_GS(%rsp)		; \
 	movw	%fs,TF_FS(%rsp)		; \
 	movw	%es,TF_ES(%rsp)		; \
@@ -93,11 +95,11 @@ usertrap				; \
 	INTR_RESTORE_GPRS 		; \
 	testq	$SEL_UPL,TF_CS(%rsp)	/* Interrupted %cs */ ; \
 	je	99f			; \
-/* XEN: Disabling events before going to user mode sounds like a BAD idea */ \
+/* Disable interrupts until the 'iret', user registers loaded. */ \
 	NOT_XEN(cli;)			  \
 	movw	TF_ES(%rsp),%es		; \
 	movw	TF_DS(%rsp),%ds		; \
-	swapgs				; \
+	SWAPGS				; \
 99:	addq	$TF_REGSIZE+16,%rsp	/* + T_xxx and error code */ ; \
 	iretq
 

Index: src/sys/arch/amd64/include/proc.h
diff -u src/sys/arch/amd64/include/proc.h:1.14 src/sys/arch/amd64/include/proc.h:1.15
--- src/sys/arch/amd64/include/proc.h:1.14	Sun Jul  8 20:14:11 2012
+++ src/sys/arch/amd64/include/proc.h	Sun Jul 15 15:17:56 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: proc.h,v 1.14 2012/07/08 20:14:11 dsl Exp $	*/
+/*	$NetBSD: proc.h,v 1.15 2012/07/15 15:17:56 dsl Exp $	*/
 
 /*
  * Copyright (c) 1991 Regents of the University of California.
@@ -54,6 +54,8 @@ struct mdlwp {
 };
 
 #define	MDL_USEDFPU	0x0001	/* has used the FPU */
+#define	MDL_COMPAT32	0x0008	/* i386, always return via iret */
+#define	MDL_IRET	0x0010	/* force return via iret, not sysret */
 
 struct mdproc {
 	int	md_flags;
@@ -62,10 +64,7 @@ struct mdproc {
 };
 
 /* md_flags */
-#define MDP_COMPAT	0x0002	/* x86 compatibility process */
-#define MDP_SYSCALL	0x0004	/* entered kernel via syscall ins */
 #define MDP_USEDMTRR	0x0008	/* has set volatile MTRRs */
-#define MDP_IRET	0x0010	/* return via iret, not sysret */
 
 #define	UAREA_PCB_OFFSET	(USPACE - ALIGN(sizeof(struct pcb)))
 #define	KSTACK_LOWEST_ADDR(l)	\

Index: src/sys/arch/x86/include/cpu.h
diff -u src/sys/arch/x86/include/cpu.h:1.51 src/sys/arch/x86/include/cpu.h:1.52
--- src/sys/arch/x86/include/cpu.h:1.51	Sat Jun 16 17:30:18 2012
+++ src/sys/arch/x86/include/cpu.h	Sun Jul 15 15:17:56 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.51 2012/06/16 17:30:18 chs Exp $	*/
+/*	$NetBSD: cpu.h,v 1.52 2012/07/15 15:17:56 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -407,7 +407,6 @@ void	lgdt_finish(void);
 struct pcb;
 void	savectx(struct pcb *);
 void	lwp_trampoline(void);
-void	child_trampoline(void);
 #ifdef XEN
 void	startrtclock(void);
 void	xen_delay(unsigned int);

Index: src/sys/arch/x86/x86/vm_machdep.c
diff -u src/sys/arch/x86/x86/vm_machdep.c:1.15 src/sys/arch/x86/x86/vm_machdep.c:1.16
--- src/sys/arch/x86/x86/vm_machdep.c:1.15	Sun Feb 19 21:06:35 2012
+++ src/sys/arch/x86/x86/vm_machdep.c	Sun Jul 15 15:17:56 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: vm_machdep.c,v 1.15 2012/02/19 21:06:35 rmind Exp $	*/
+/*	$NetBSD: vm_machdep.c,v 1.16 2012/07/15 15:17:56 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
@@ -80,7 +80,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.15 2012/02/19 21:06:35 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.16 2012/07/15 15:17:56 dsl Exp $");
 
 #include "opt_mtrr.h"
 
@@ -224,10 +224,7 @@ cpu_lwp_fork(struct lwp *l1, struct lwp 
 #ifdef __x86_64__
 	sf->sf_r12 = (uint64_t)func;
 	sf->sf_r13 = (uint64_t)arg;
-	if (func == child_return && !(l2->l_proc->p_flag & PK_32))
-		sf->sf_rip = (uint64_t)child_trampoline;
-	else
-		sf->sf_rip = (uint64_t)lwp_trampoline;
+	sf->sf_rip = (uint64_t)lwp_trampoline;
 	pcb2->pcb_rsp = (uint64_t)sf;
 	pcb2->pcb_rbp = (uint64_t)l2;
 #else

Index: src/sys/compat/linux32/arch/amd64/linux32_machdep.c
diff -u src/sys/compat/linux32/arch/amd64/linux32_machdep.c:1.30 src/sys/compat/linux32/arch/amd64/linux32_machdep.c:1.31
--- src/sys/compat/linux32/arch/amd64/linux32_machdep.c:1.30	Sun Jul  8 20:14:12 2012
+++ src/sys/compat/linux32/arch/amd64/linux32_machdep.c	Sun Jul 15 15:17:56 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux32_machdep.c,v 1.30 2012/07/08 20:14:12 dsl Exp $ */
+/*	$NetBSD: linux32_machdep.c,v 1.31 2012/07/15 15:17:56 dsl Exp $ */
 
 /*-
  * Copyright (c) 2006 Emmanuel Dreyfus, all rights reserved.
@@ -31,7 +31,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.30 2012/07/08 20:14:12 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.31 2012/07/15 15:17:56 dsl Exp $");
 
 #include <sys/param.h>
 #include <sys/proc.h>
@@ -72,8 +72,6 @@ extern char linux32_sigcode[];
 extern char linux32_rt_sigcode[];
 extern char linux32_esigcode[];
 
-extern void (osyscall_return)(void);
-
 static void linux32_save_ucontext(struct lwp *, struct trapframe *,
     const sigset_t *, struct sigaltstack *, struct linux32_ucontext *);
 static void linux32_save_sigcontext(struct lwp *, struct trapframe *,
@@ -274,7 +272,6 @@ linux32_setregs(struct lwp *l, struct ex
 	struct pcb *pcb = lwp_getpcb(l);
 	struct trapframe *tf;
 	struct proc *p = l->l_proc;
-	void **retaddr;
 
 	/* If we were using the FPU, forget about it. */
 	if (pcb->pcb_fpcpu != NULL)
@@ -287,6 +284,7 @@ linux32_setregs(struct lwp *l, struct ex
 	netbsd32_adjust_limits(p);
 
 	l->l_md.md_flags &= ~MDL_USEDFPU;
+	l->l_md.md_flags |= MDL_COMPAT32;	/* Forces iret not sysret */
 	pcb->pcb_flags = PCB_COMPAT32;
 	pcb->pcb_savefpu.fp_fxsave.fx_fcw = __Linux_NPXCW__;
 	pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
@@ -319,11 +317,6 @@ linux32_setregs(struct lwp *l, struct ex
 	tf->tf_es = GSEL(GUDATA32_SEL, SEL_UPL);
 	cpu_fsgs_zero(l);
 	cpu_fsgs_reload(l, GSEL(GUDATA32_SEL, SEL_UPL), GSEL(GUDATA32_SEL, SEL_UPL));
-
-	/* XXX frob return address to return via old iret method, not sysret */
-	retaddr = (void **)tf - 1;
-	*retaddr = (void *)osyscall_return;
-	return;
 }
 
 static void

Reply via email to