Module Name:    src
Committed By:   maxv
Date:           Tue Oct 17 06:58:15 UTC 2017

Modified Files:
        src/sys/arch/amd64/amd64: copy.S trap.c
        src/sys/arch/amd64/include: frameasm.h
        src/sys/arch/x86/x86: cpu.c patch.c

Log Message:
Add support for SMAP on amd64.

PSL_AC is cleared from %rflags in each kernel entry point. In the copy
sections, a copy window is opened and the kernel can touch userland
pages. This window is closed when the kernel is done, either at the end
of the copy sections or in the fault-recover functions.

This implementation is not optimized yet, due to the fact that INTRENTRY
is a macro, and we can't hotpatch macros.

Sent on tech-kern@ a month or two ago, tested on a Kabylake.


To generate a diff of this commit:
cvs rdiff -u -r1.24 -r1.25 src/sys/arch/amd64/amd64/copy.S
cvs rdiff -u -r1.101 -r1.102 src/sys/arch/amd64/amd64/trap.c
cvs rdiff -u -r1.21 -r1.22 src/sys/arch/amd64/include/frameasm.h
cvs rdiff -u -r1.136 -r1.137 src/sys/arch/x86/x86/cpu.c
cvs rdiff -u -r1.22 -r1.23 src/sys/arch/x86/x86/patch.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/amd64/copy.S
diff -u src/sys/arch/amd64/amd64/copy.S:1.24 src/sys/arch/amd64/amd64/copy.S:1.25
--- src/sys/arch/amd64/amd64/copy.S:1.24	Fri Aug 25 11:35:03 2017
+++ src/sys/arch/amd64/amd64/copy.S	Tue Oct 17 06:58:15 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: copy.S,v 1.24 2017/08/25 11:35:03 maxv Exp $	*/
+/*	$NetBSD: copy.S,v 1.25 2017/10/17 06:58:15 maxv Exp $	*/
 
 /*
  * Copyright (c) 2001 Wasabi Systems, Inc.
@@ -107,6 +107,24 @@ ENTRY(do_pmap_load)
 	ret
 
 /*
+ * SMAP functions. ret+int3+int3 is patched dynamically to STAC/CLAC.
+ */
+
+ENTRY(smap_enable)
+.Lclacpatch:
+	ret
+	int3
+	int3
+	ret
+
+ENTRY(smap_disable)
+.Lstacpatch:
+	ret
+	int3
+	int3
+	ret
+
+/*
  * Copy routines from and to userland, plus a few more. See the
  * section 9 manpages for info. Some cases can be optimized more.
  *
@@ -185,6 +203,7 @@ ENTRY(copyout)
 	cmpq	%r8,%rdx
 	ja	_C_LABEL(copy_efault)	/* jump if end in kernel space */
 
+	callq	smap_disable
 .Lcopyout_start:
 	movq	%rax,%rcx		/* length */
 	shrq	$3,%rcx			/* count of 8-byte words */
@@ -195,6 +214,7 @@ ENTRY(copyout)
 	rep
 	movsb				/* copy remaining bytes */
 .Lcopyout_end:
+	callq	smap_enable
 
 	xorl	%eax,%eax
 	ret
@@ -212,6 +232,7 @@ ENTRY(copyin)
 	cmpq	%r8,%rdx
 	ja	_C_LABEL(copy_efault)	/* j if end in kernel space */
 
+	callq	smap_disable
 .Lcopyin_start:
 3:	/* bcopy(%rsi, %rdi, %rax); */
 	movq	%rax,%rcx
@@ -223,6 +244,7 @@ ENTRY(copyin)
 	rep
 	movsb
 .Lcopyin_end:
+	callq	smap_enable
 
 	xorl	%eax,%eax
 	ret
@@ -241,6 +263,7 @@ NENTRY(kcopy_fault)
 	ret
 
 NENTRY(copy_fault)
+	callq	smap_enable
 	ret
 
 ENTRY(copyoutstr)
@@ -261,6 +284,7 @@ ENTRY(copyoutstr)
 	movq	%rax,%r8
 1:	incq	%rdx
 
+	callq	smap_disable
 .Lcopyoutstr_start:
 1:	decq	%rdx
 	jz	2f
@@ -269,6 +293,7 @@ ENTRY(copyoutstr)
 	testb	%al,%al
 	jnz	1b
 .Lcopyoutstr_end:
+	callq	smap_enable
 
 	/* Success -- 0 byte reached. */
 	decq	%rdx
@@ -276,6 +301,7 @@ ENTRY(copyoutstr)
 	jmp	copystr_return
 
 2:	/* rdx is zero -- return EFAULT or ENAMETOOLONG. */
+	callq	smap_enable
 	movq	$VM_MAXUSER_ADDRESS,%r11
 	cmpq	%r11,%rdi
 	jae	_C_LABEL(copystr_efault)
@@ -301,6 +327,7 @@ ENTRY(copyinstr)
 	movq	%rax,%r8
 1:	incq	%rdx
 
+	callq	smap_disable
 .Lcopyinstr_start:
 1:	decq	%rdx
 	jz	2f
@@ -309,6 +336,7 @@ ENTRY(copyinstr)
 	testb	%al,%al
 	jnz	1b
 .Lcopyinstr_end:
+	callq	smap_enable
 
 	/* Success -- 0 byte reached. */
 	decq	%rdx
@@ -316,6 +344,7 @@ ENTRY(copyinstr)
 	jmp	copystr_return
 
 2:	/* edx is zero -- return EFAULT or ENAMETOOLONG. */
+	callq	smap_enable
 	movq	$VM_MAXUSER_ADDRESS,%r11
 	cmpq	%r11,%rsi
 	jae	_C_LABEL(copystr_efault)
@@ -327,6 +356,7 @@ ENTRY(copystr_efault)
 	movl	$EFAULT,%eax
 
 ENTRY(copystr_fault)
+	callq	smap_enable
 copystr_return:
 	/* Set *lencopied and return %eax. */
 	testq	%r9,%r9
@@ -376,7 +406,9 @@ ENTRY(fuswintr)
 	leaq	_C_LABEL(fusuintrfailure)(%rip),%r11
 	movq	%r11,PCB_ONFAULT(%rcx)
 
+	callq	smap_disable
 	movzwl	(%rdi),%eax
+	callq	smap_enable
 
 	movq	$0,PCB_ONFAULT(%rcx)
 	ret
@@ -390,7 +422,9 @@ ENTRY(fubyte)
 	leaq	_C_LABEL(fusufailure)(%rip),%r11
 	movq	%r11,PCB_ONFAULT(%rcx)
 
+	callq	smap_disable
 	movzbl	(%rdi),%eax
+	callq	smap_enable
 
 	movq	$0,PCB_ONFAULT(%rcx)
 	ret
@@ -406,7 +440,9 @@ ENTRY(suswintr)
 	leaq	_C_LABEL(fusuintrfailure)(%rip),%r11
 	movq	%r11,PCB_ONFAULT(%rcx)
 
+	callq	smap_disable
 	movw	%si,(%rdi)
+	callq	smap_enable
 
 	xorq	%rax,%rax
 	movq	%rax,PCB_ONFAULT(%rcx)
@@ -422,7 +458,9 @@ ENTRY(subyte)
 	leaq	_C_LABEL(fusufailure)(%rip),%r11
 	movq	%r11,PCB_ONFAULT(%rcx)
 
+	callq	smap_disable
 	movb	%sil,(%rdi)
+	callq	smap_enable
 
 	xorq	%rax,%rax
 	movq	%rax,PCB_ONFAULT(%rcx)
@@ -434,11 +472,13 @@ ENTRY(subyte)
  * because trap.c checks for them.
  */
 ENTRY(fusuintrfailure)
+	callq	smap_enable
 	movq	$0,PCB_ONFAULT(%rcx)
 	movl	$-1,%eax
 	ret
 
 ENTRY(fusufailure)
+	callq	smap_enable
 	movq	$0,PCB_ONFAULT(%rcx)
 	movl	$-1,%eax
 	ret
@@ -460,11 +500,13 @@ ENTRY(ucas_64)
 	ja	_C_LABEL(ucas_efault)
 	movq	%rsi,%rax
 
+	callq	smap_disable
 .Lucas64_start:
 	/* Perform the CAS */
 	lock
 	cmpxchgq %rdx,(%rdi)
 .Lucas64_end:
+	callq	smap_enable
 
 	/*
 	 * Note: %rax is "old" value.
@@ -486,11 +528,13 @@ ENTRY(ucas_32)
 	ja	_C_LABEL(ucas_efault)
 	movl	%esi,%eax
 
+	callq	smap_disable
 .Lucas32_start:
 	/* Perform the CAS */
 	lock
 	cmpxchgl %edx,(%rdi)
 .Lucas32_end:
+	callq	smap_enable
 
 	/*
 	 * Note: %eax is "old" value.
@@ -505,6 +549,7 @@ ENTRY(ucas_efault)
 	movq	$EFAULT,%rax
 
 NENTRY(ucas_fault)
+	callq	smap_enable
 	ret
 
 /*
@@ -524,6 +569,19 @@ x86_copyfunc_end:	.globl	x86_copyfunc_en
  */
 	.section ".rodata"
 	.globl _C_LABEL(onfault_table)
+	.type	_C_LABEL(x86_clacpatch),@object
+	.type	_C_LABEL(x86_stacpatch),@object
+
+LABEL(x86_clacpatch)
+	.quad	.Lclacpatch
+	.quad	0 /* terminate */
+END(x86_clacpatch)
+
+LABEL(x86_stacpatch)
+	.quad	.Lstacpatch
+	.quad	0 /* terminate */
+END(x86_stacpatch)
+
 _C_LABEL(onfault_table):
 	.quad .Lcopyin_start
 	.quad .Lcopyin_end

Index: src/sys/arch/amd64/amd64/trap.c
diff -u src/sys/arch/amd64/amd64/trap.c:1.101 src/sys/arch/amd64/amd64/trap.c:1.102
--- src/sys/arch/amd64/amd64/trap.c:1.101	Sun Sep 17 09:41:35 2017
+++ src/sys/arch/amd64/amd64/trap.c	Tue Oct 17 06:58:15 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: trap.c,v 1.101 2017/09/17 09:41:35 maxv Exp $	*/
+/*	$NetBSD: trap.c,v 1.102 2017/10/17 06:58:15 maxv Exp $	*/
 
 /*
  * Copyright (c) 1998, 2000, 2017 The NetBSD Foundation, Inc.
@@ -64,7 +64,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.101 2017/09/17 09:41:35 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.102 2017/10/17 06:58:15 maxv Exp $");
 
 #include "opt_ddb.h"
 #include "opt_kgdb.h"
@@ -556,6 +556,13 @@ trap(struct trapframe *frame)
 				    (void *)cr2);
 		}
 
+		if (cr2 < VM_MAXUSER_ADDRESS) {
+			/* SMAP might have brought us here */
+			if (onfault_handler(pcb, frame) == NULL)
+				panic("prevented access to %p (SMAP)",
+				    (void *)cr2);
+		}
+
 		goto faultcommon;
 
 	case T_PAGEFLT|T_USER: {

Index: src/sys/arch/amd64/include/frameasm.h
diff -u src/sys/arch/amd64/include/frameasm.h:1.21 src/sys/arch/amd64/include/frameasm.h:1.22
--- src/sys/arch/amd64/include/frameasm.h:1.21	Fri Sep 15 17:32:12 2017
+++ src/sys/arch/amd64/include/frameasm.h	Tue Oct 17 06:58:15 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: frameasm.h,v 1.21 2017/09/15 17:32:12 maxv Exp $	*/
+/*	$NetBSD: frameasm.h,v 1.22 2017/10/17 06:58:15 maxv Exp $	*/
 
 #ifndef _AMD64_MACHINE_FRAMEASM_H
 #define _AMD64_MACHINE_FRAMEASM_H
@@ -56,7 +56,8 @@
 	movq	%rbp,TF_RBP(%rsp)	; \
 	movq	%rbx,TF_RBX(%rsp)	; \
 	movq	%rax,TF_RAX(%rsp)	; \
-	cld
+	cld				; \
+	callq	smap_enable
 
 #define	INTR_RESTORE_GPRS \
 	movq	TF_RDI(%rsp),%rdi	; \

Index: src/sys/arch/x86/x86/cpu.c
diff -u src/sys/arch/x86/x86/cpu.c:1.136 src/sys/arch/x86/x86/cpu.c:1.137
--- src/sys/arch/x86/x86/cpu.c:1.136	Thu Sep 28 17:48:20 2017
+++ src/sys/arch/x86/x86/cpu.c	Tue Oct 17 06:58:15 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.136 2017/09/28 17:48:20 maxv Exp $	*/
+/*	$NetBSD: cpu.c,v 1.137 2017/10/17 06:58:15 maxv Exp $	*/
 
 /*
  * Copyright (c) 2000-2012 NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.136 2017/09/28 17:48:20 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.137 2017/10/17 06:58:15 maxv Exp $");
 
 #include "opt_ddb.h"
 #include "opt_mpbios.h"		/* for MPDEBUG */
@@ -573,6 +573,12 @@ cpu_init(struct cpu_info *ci)
 	if (cpu_feature[5] & CPUID_SEF_SMEP)
 		cr4 |= CR4_SMEP;
 
+#ifdef amd64
+	/* If SMAP is supported, enable it */
+	if (cpu_feature[5] & CPUID_SEF_SMAP)
+		cr4 |= CR4_SMAP;
+#endif
+
 	if (cr4) {
 		cr4 |= rcr4();
 		lcr4(cr4);
@@ -1042,7 +1048,7 @@ cpu_init_msrs(struct cpu_info *ci, bool 
 	    ((uint64_t)LSEL(LSYSRETBASE_SEL, SEL_UPL) << 48));
 	wrmsr(MSR_LSTAR, (uint64_t)Xsyscall);
 	wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32);
-	wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C);
+	wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_AC);
 
 	if (full) {
 		wrmsr(MSR_FSBASE, 0);

Index: src/sys/arch/x86/x86/patch.c
diff -u src/sys/arch/x86/x86/patch.c:1.22 src/sys/arch/x86/x86/patch.c:1.23
--- src/sys/arch/x86/x86/patch.c:1.22	Fri Nov 15 08:47:55 2013
+++ src/sys/arch/x86/x86/patch.c	Tue Oct 17 06:58:15 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: patch.c,v 1.22 2013/11/15 08:47:55 msaitoh Exp $	*/
+/*	$NetBSD: patch.c,v 1.23 2017/10/17 06:58:15 maxv Exp $	*/
 
 /*-
  * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.22 2013/11/15 08:47:55 msaitoh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.23 2017/10/17 06:58:15 maxv Exp $");
 
 #include "opt_lockdebug.h"
 #ifdef i386
@@ -77,6 +77,8 @@ void	_atomic_cas_cx8(void);
 void	_atomic_cas_cx8_end(void);
 
 extern void	*x86_lockpatch[];
+extern void	*x86_clacpatch[];
+extern void	*x86_stacpatch[];
 extern void	*x86_retpatch[];
 extern void	*atomic_lockpatch[];
 
@@ -236,6 +238,29 @@ x86_patch(bool early)
 		}
 	}
 
+#ifdef amd64
+	/*
+	 * If SMAP is present then patch the prepared holes with clac/stac
+	 * instructions.
+	 *
+	 * clac = 0x0f, 0x01, 0xca
+	 * stac = 0x0f, 0x01, 0xcb
+	 */
+	if (!early && cpu_feature[5] & CPUID_SEF_SMAP) {
+		KASSERT(rcr4() & CR4_SMAP);
+		for (i = 0; x86_clacpatch[i] != NULL; i++) {
+			/* ret,int3,int3 -> clac */
+			patchbytes(x86_clacpatch[i],
+			    0x0f, 0x01, 0xca);
+		}
+		for (i = 0; x86_stacpatch[i] != NULL; i++) {
+			/* ret,int3,int3 -> stac */
+			patchbytes(x86_stacpatch[i],
+			    0x0f, 0x01, 0xcb);
+		}
+	}
+#endif
+
 	/* Write back and invalidate cache, flush pipelines. */
 	wbinvd();
 	x86_flush();

Reply via email to