Module Name: src Committed By: riastradh Date: Sat Feb 25 18:04:42 UTC 2023
Modified Files: src/sys/arch/amd64/amd64: genassym.cf locore.S src/sys/arch/x86/x86: fpu.c Log Message: x86: Add kthread_fpu_enter/exit support, take two. This time, make sure to restore the FPU state when switching to a kthread in the middle of kthread_fpu_enter/exit. This adds a single predicted-taken branch for the case of kthreads that are not in kthread_fpu_enter/exit, so it incurs a penalty only for threads that actually use it. Since it avoids FPU state switching in kthreads that do use the FPU, namely cgd worker threads, this should be a net performance win on systems using it and have negligible impact otherwise. XXX pullup-10 To generate a diff of this commit: cvs rdiff -u -r1.93 -r1.94 src/sys/arch/amd64/amd64/genassym.cf cvs rdiff -u -r1.215 -r1.216 src/sys/arch/amd64/amd64/locore.S cvs rdiff -u -r1.81 -r1.82 src/sys/arch/x86/x86/fpu.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/amd64/amd64/genassym.cf diff -u src/sys/arch/amd64/amd64/genassym.cf:1.93 src/sys/arch/amd64/amd64/genassym.cf:1.94 --- src/sys/arch/amd64/amd64/genassym.cf:1.93 Tue Dec 27 08:40:40 2022 +++ src/sys/arch/amd64/amd64/genassym.cf Sat Feb 25 18:04:42 2023 @@ -1,4 +1,4 @@ -# $NetBSD: genassym.cf,v 1.93 2022/12/27 08:40:40 msaitoh Exp $ +# $NetBSD: genassym.cf,v 1.94 2023/02/25 18:04:42 riastradh Exp $ # # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -166,6 +166,7 @@ define L_MD_FLAGS offsetof(struct lwp, define L_MD_ASTPENDING offsetof(struct lwp, l_md.md_astpending) define LW_SYSTEM LW_SYSTEM +define LW_SYSTEM_FPU LW_SYSTEM_FPU define MDL_IRET MDL_IRET define MDL_COMPAT32 MDL_COMPAT32 define MDL_FPU_IN_CPU MDL_FPU_IN_CPU Index: src/sys/arch/amd64/amd64/locore.S diff -u src/sys/arch/amd64/amd64/locore.S:1.215 src/sys/arch/amd64/amd64/locore.S:1.216 --- src/sys/arch/amd64/amd64/locore.S:1.215 Mon Dec 26 17:46:00 2022 +++ src/sys/arch/amd64/amd64/locore.S Sat Feb 25 18:04:42 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.215 2022/12/26 17:46:00 christos Exp $ */ +/* $NetBSD: locore.S,v 1.216 2023/02/25 18:04:42 riastradh Exp $ */ /* * Copyright-o-rama! @@ -1247,7 +1247,7 @@ ENTRY(cpu_switchto) /* Don't bother with the rest if switching to a system process. */ testl $LW_SYSTEM,L_FLAG(%r12) - jnz .Lswitch_return + jnz .Lswitch_system /* Is this process using RAS (restartable atomic sequences)? */ movq L_PROC(%r12),%rdi @@ -1336,6 +1336,21 @@ ENTRY(cpu_switchto) popq %r12 popq %rbx ret + +.Lswitch_system: + /* + * If it has LWP_SYSTEM_FPU set, meaning it's running in + * kthread_fpu_enter/exit, we need to restore the FPU state + * and enable FPU instructions with fpu_handle_deferred. + * + * No need to test MDL_FPU_IN_CPU via HANDLE_DEFERRED_FPU -- + * fpu_switch guarantees it is clear, so we can just call + * fpu_handle_deferred unconditionally. + */ + testl $LW_SYSTEM_FPU,L_FLAG(%r12) + jz .Lswitch_return + callq _C_LABEL(fpu_handle_deferred) + jmp .Lswitch_return END(cpu_switchto) /* Index: src/sys/arch/x86/x86/fpu.c diff -u src/sys/arch/x86/x86/fpu.c:1.81 src/sys/arch/x86/x86/fpu.c:1.82 --- src/sys/arch/x86/x86/fpu.c:1.81 Sat Feb 25 18:04:25 2023 +++ src/sys/arch/x86/x86/fpu.c Sat Feb 25 18:04:42 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: fpu.c,v 1.81 2023/02/25 18:04:25 riastradh Exp $ */ +/* $NetBSD: fpu.c,v 1.82 2023/02/25 18:04:42 riastradh Exp $ */ /* * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. All @@ -96,7 +96,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.81 2023/02/25 18:04:25 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.82 2023/02/25 18:04:42 riastradh Exp $"); #include "opt_multiprocessor.h" @@ -107,6 +107,7 @@ __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.81 #include <sys/file.h> #include <sys/proc.h> #include <sys/kernel.h> +#include <sys/kthread.h> #include <sys/sysctl.h> #include <sys/xcall.h> @@ -131,13 +132,35 @@ void fpu_switch(struct lwp *, struct lwp uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; +/* + * True if this a thread that is allowed to use the FPU -- either a + * user thread, or a system thread with LW_SYSTEM_FPU enabled. + */ +static inline bool +lwp_can_haz_fpu(struct lwp *l) +{ + + return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) != LW_SYSTEM; +} + +/* + * True if this is a system thread with its own private FPU state. + */ +static inline bool +lwp_system_fpu_p(struct lwp *l) +{ + + return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) == + (LW_SYSTEM|LW_SYSTEM_FPU); +} + static inline union savefpu * fpu_lwp_area(struct lwp *l) { struct pcb *pcb = lwp_getpcb(l); union savefpu *area = &pcb->pcb_savefpu; - KASSERT((l->l_flag & LW_SYSTEM) == 0); + KASSERT(lwp_can_haz_fpu(l)); if (l == curlwp) { fpu_save(); } @@ -155,7 +178,7 @@ fpu_save_lwp(struct lwp *l) s = splvm(); if (l->l_md.md_flags & MDL_FPU_IN_CPU) { - KASSERT((l->l_flag & LW_SYSTEM) == 0); + KASSERT(lwp_can_haz_fpu(l)); fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32)); l->l_md.md_flags &= ~MDL_FPU_IN_CPU; } @@ -314,7 +337,7 @@ fpu_switch(struct lwp *oldlwp, struct lw cpu_index(ci), ci->ci_ilevel); if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) { - KASSERT(!(oldlwp->l_flag & LW_SYSTEM)); + KASSERT(lwp_can_haz_fpu(oldlwp)); pcb = lwp_getpcb(oldlwp); fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features, !(oldlwp->l_proc->p_flag & PK_32)); @@ -330,11 +353,11 @@ fpu_lwp_fork(struct lwp *l1, struct lwp union savefpu *fpu_save; /* Kernel threads have no FPU. */ - if (__predict_false(l2->l_flag & LW_SYSTEM)) { + if (__predict_false(!lwp_can_haz_fpu(l2))) { return; } /* For init(8). */ - if (__predict_false(l1->l_flag & LW_SYSTEM)) { + if (__predict_false(!lwp_can_haz_fpu(l1))) { memset(&pcb2->pcb_savefpu, 0, x86_fpu_save_size); return; } @@ -358,6 +381,13 @@ fpu_lwp_abandon(struct lwp *l) /* -------------------------------------------------------------------------- */ +static const union savefpu safe_fpu __aligned(64) = { + .sv_xmm = { + .fx_mxcsr = __SAFE_MXCSR__, + }, +}; +static const union savefpu zero_fpu __aligned(64); + /* * fpu_kern_enter() * @@ -373,15 +403,15 @@ fpu_lwp_abandon(struct lwp *l) void fpu_kern_enter(void) { - static const union savefpu safe_fpu __aligned(64) = { - .sv_xmm = { - .fx_mxcsr = __SAFE_MXCSR__, - }, - }; struct lwp *l = curlwp; struct cpu_info *ci; int s; + if (lwp_system_fpu_p(l) && !cpu_intr_p()) { + KASSERT(!cpu_softintr_p()); + return; + } + s = splvm(); ci = curcpu(); @@ -427,10 +457,16 @@ fpu_kern_enter(void) void fpu_kern_leave(void) { - static const union savefpu zero_fpu __aligned(64); - struct cpu_info *ci = curcpu(); + struct cpu_info *ci; int s; + if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) { + KASSERT(!cpu_softintr_p()); + return; + } + + ci = curcpu(); + #if 0 /* * Can't assert this because if the caller holds a spin lock at @@ -459,6 +495,24 @@ fpu_kern_leave(void) splx(s); } +void +kthread_fpu_enter_md(void) +{ + + /* Enable the FPU by clearing CR0_TS, and enter a safe FPU state. */ + clts(); + fpu_area_restore(&safe_fpu, x86_xsave_features, /*is_64bit*/false); +} + +void +kthread_fpu_exit_md(void) +{ + + /* Zero the FPU state and disable the FPU by setting CR0_TS. */ + fpu_area_restore(&zero_fpu, x86_xsave_features, /*is_64bit*/false); + stts(); +} + /* -------------------------------------------------------------------------- */ /*