Module Name:    src
Committed By:   riastradh
Date:           Sat Feb 25 18:04:42 UTC 2023

Modified Files:
        src/sys/arch/amd64/amd64: genassym.cf locore.S
        src/sys/arch/x86/x86: fpu.c

Log Message:
x86: Add kthread_fpu_enter/exit support, take two.

This time, make sure to restore the FPU state when switching to a
kthread in the middle of kthread_fpu_enter/exit.

This adds a single predicted-taken branch for the case of kthreads
that are not in kthread_fpu_enter/exit, so it incurs a penalty only
for threads that actually use it.  Since it avoids FPU state
switching in kthreads that do use the FPU, namely cgd worker threads,
this should be a net performance win on systems using it and have
negligible impact otherwise.

XXX pullup-10


To generate a diff of this commit:
cvs rdiff -u -r1.93 -r1.94 src/sys/arch/amd64/amd64/genassym.cf
cvs rdiff -u -r1.215 -r1.216 src/sys/arch/amd64/amd64/locore.S
cvs rdiff -u -r1.81 -r1.82 src/sys/arch/x86/x86/fpu.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/amd64/genassym.cf
diff -u src/sys/arch/amd64/amd64/genassym.cf:1.93 src/sys/arch/amd64/amd64/genassym.cf:1.94
--- src/sys/arch/amd64/amd64/genassym.cf:1.93	Tue Dec 27 08:40:40 2022
+++ src/sys/arch/amd64/amd64/genassym.cf	Sat Feb 25 18:04:42 2023
@@ -1,4 +1,4 @@
-#	$NetBSD: genassym.cf,v 1.93 2022/12/27 08:40:40 msaitoh Exp $
+#	$NetBSD: genassym.cf,v 1.94 2023/02/25 18:04:42 riastradh Exp $
 
 #
 # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -166,6 +166,7 @@ define	L_MD_FLAGS		offsetof(struct lwp, 
 define	L_MD_ASTPENDING		offsetof(struct lwp, l_md.md_astpending)
 
 define	LW_SYSTEM		LW_SYSTEM
+define	LW_SYSTEM_FPU		LW_SYSTEM_FPU
 define	MDL_IRET		MDL_IRET
 define	MDL_COMPAT32		MDL_COMPAT32
 define	MDL_FPU_IN_CPU		MDL_FPU_IN_CPU

Index: src/sys/arch/amd64/amd64/locore.S
diff -u src/sys/arch/amd64/amd64/locore.S:1.215 src/sys/arch/amd64/amd64/locore.S:1.216
--- src/sys/arch/amd64/amd64/locore.S:1.215	Mon Dec 26 17:46:00 2022
+++ src/sys/arch/amd64/amd64/locore.S	Sat Feb 25 18:04:42 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.215 2022/12/26 17:46:00 christos Exp $	*/
+/*	$NetBSD: locore.S,v 1.216 2023/02/25 18:04:42 riastradh Exp $	*/
 
 /*
  * Copyright-o-rama!
@@ -1247,7 +1247,7 @@ ENTRY(cpu_switchto)
 
 	/* Don't bother with the rest if switching to a system process. */
 	testl	$LW_SYSTEM,L_FLAG(%r12)
-	jnz	.Lswitch_return
+	jnz	.Lswitch_system
 
 	/* Is this process using RAS (restartable atomic sequences)? */
 	movq	L_PROC(%r12),%rdi
@@ -1336,6 +1336,21 @@ ENTRY(cpu_switchto)
 	popq	%r12
 	popq	%rbx
 	ret
+
+.Lswitch_system:
+	/*
+	 * If it has LWP_SYSTEM_FPU set, meaning it's running in
+	 * kthread_fpu_enter/exit, we need to restore the FPU state
+	 * and enable FPU instructions with fpu_handle_deferred.
+	 *
+	 * No need to test MDL_FPU_IN_CPU via HANDLE_DEFERRED_FPU --
+	 * fpu_switch guarantees it is clear, so we can just call
+	 * fpu_handle_deferred unconditionally.
+	 */
+	testl	$LW_SYSTEM_FPU,L_FLAG(%r12)
+	jz	.Lswitch_return
+	callq	_C_LABEL(fpu_handle_deferred)
+	jmp	.Lswitch_return
 END(cpu_switchto)
 
 /*

Index: src/sys/arch/x86/x86/fpu.c
diff -u src/sys/arch/x86/x86/fpu.c:1.81 src/sys/arch/x86/x86/fpu.c:1.82
--- src/sys/arch/x86/x86/fpu.c:1.81	Sat Feb 25 18:04:25 2023
+++ src/sys/arch/x86/x86/fpu.c	Sat Feb 25 18:04:42 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu.c,v 1.81 2023/02/25 18:04:25 riastradh Exp $	*/
+/*	$NetBSD: fpu.c,v 1.82 2023/02/25 18:04:42 riastradh Exp $	*/
 
 /*
  * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.  All
@@ -96,7 +96,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.81 2023/02/25 18:04:25 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.82 2023/02/25 18:04:42 riastradh Exp $");
 
 #include "opt_multiprocessor.h"
 
@@ -107,6 +107,7 @@ __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.81
 #include <sys/file.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
+#include <sys/kthread.h>
 #include <sys/sysctl.h>
 #include <sys/xcall.h>
 
@@ -131,13 +132,35 @@ void fpu_switch(struct lwp *, struct lwp
 
 uint32_t x86_fpu_mxcsr_mask __read_mostly = 0;
 
+/*
+ * True if this a thread that is allowed to use the FPU -- either a
+ * user thread, or a system thread with LW_SYSTEM_FPU enabled.
+ */
+static inline bool
+lwp_can_haz_fpu(struct lwp *l)
+{
+
+	return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) != LW_SYSTEM;
+}
+
+/*
+ * True if this is a system thread with its own private FPU state.
+ */
+static inline bool
+lwp_system_fpu_p(struct lwp *l)
+{
+
+	return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) ==
+	    (LW_SYSTEM|LW_SYSTEM_FPU);
+}
+
 static inline union savefpu *
 fpu_lwp_area(struct lwp *l)
 {
 	struct pcb *pcb = lwp_getpcb(l);
 	union savefpu *area = &pcb->pcb_savefpu;
 
-	KASSERT((l->l_flag & LW_SYSTEM) == 0);
+	KASSERT(lwp_can_haz_fpu(l));
 	if (l == curlwp) {
 		fpu_save();
 	}
@@ -155,7 +178,7 @@ fpu_save_lwp(struct lwp *l)
 
 	s = splvm();
 	if (l->l_md.md_flags & MDL_FPU_IN_CPU) {
-		KASSERT((l->l_flag & LW_SYSTEM) == 0);
+		KASSERT(lwp_can_haz_fpu(l));
 		fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32));
 		l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
 	}
@@ -314,7 +337,7 @@ fpu_switch(struct lwp *oldlwp, struct lw
 	    cpu_index(ci), ci->ci_ilevel);
 
 	if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) {
-		KASSERT(!(oldlwp->l_flag & LW_SYSTEM));
+		KASSERT(lwp_can_haz_fpu(oldlwp));
 		pcb = lwp_getpcb(oldlwp);
 		fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features,
 		    !(oldlwp->l_proc->p_flag & PK_32));
@@ -330,11 +353,11 @@ fpu_lwp_fork(struct lwp *l1, struct lwp 
 	union savefpu *fpu_save;
 
 	/* Kernel threads have no FPU. */
-	if (__predict_false(l2->l_flag & LW_SYSTEM)) {
+	if (__predict_false(!lwp_can_haz_fpu(l2))) {
 		return;
 	}
 	/* For init(8). */
-	if (__predict_false(l1->l_flag & LW_SYSTEM)) {
+	if (__predict_false(!lwp_can_haz_fpu(l1))) {
 		memset(&pcb2->pcb_savefpu, 0, x86_fpu_save_size);
 		return;
 	}
@@ -358,6 +381,13 @@ fpu_lwp_abandon(struct lwp *l)
 
 /* -------------------------------------------------------------------------- */
 
+static const union savefpu safe_fpu __aligned(64) = {
+	.sv_xmm = {
+		.fx_mxcsr = __SAFE_MXCSR__,
+	},
+};
+static const union savefpu zero_fpu __aligned(64);
+
 /*
  * fpu_kern_enter()
  *
@@ -373,15 +403,15 @@ fpu_lwp_abandon(struct lwp *l)
 void
 fpu_kern_enter(void)
 {
-	static const union savefpu safe_fpu __aligned(64) = {
-		.sv_xmm = {
-			.fx_mxcsr = __SAFE_MXCSR__,
-		},
-	};
 	struct lwp *l = curlwp;
 	struct cpu_info *ci;
 	int s;
 
+	if (lwp_system_fpu_p(l) && !cpu_intr_p()) {
+		KASSERT(!cpu_softintr_p());
+		return;
+	}
+
 	s = splvm();
 
 	ci = curcpu();
@@ -427,10 +457,16 @@ fpu_kern_enter(void)
 void
 fpu_kern_leave(void)
 {
-	static const union savefpu zero_fpu __aligned(64);
-	struct cpu_info *ci = curcpu();
+	struct cpu_info *ci;
 	int s;
 
+	if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) {
+		KASSERT(!cpu_softintr_p());
+		return;
+	}
+
+	ci = curcpu();
+
 #if 0
 	/*
 	 * Can't assert this because if the caller holds a spin lock at
@@ -459,6 +495,24 @@ fpu_kern_leave(void)
 	splx(s);
 }
 
+void
+kthread_fpu_enter_md(void)
+{
+
+	/* Enable the FPU by clearing CR0_TS, and enter a safe FPU state.  */
+	clts();
+	fpu_area_restore(&safe_fpu, x86_xsave_features, /*is_64bit*/false);
+}
+
+void
+kthread_fpu_exit_md(void)
+{
+
+	/* Zero the FPU state and disable the FPU by setting CR0_TS.  */
+	fpu_area_restore(&zero_fpu, x86_xsave_features, /*is_64bit*/false);
+	stts();
+}
+
 /* -------------------------------------------------------------------------- */
 
 /*

Reply via email to