Module Name: src Committed By: maxv Date: Mon Sep 17 15:53:06 UTC 2018
Modified Files: src/sys/arch/x86/x86: fpu.c Log Message: Reduce the noise, reorder and rename some things for clarity. To generate a diff of this commit: cvs rdiff -u -r1.46 -r1.47 src/sys/arch/x86/x86/fpu.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/x86/x86/fpu.c diff -u src/sys/arch/x86/x86/fpu.c:1.46 src/sys/arch/x86/x86/fpu.c:1.47 --- src/sys/arch/x86/x86/fpu.c:1.46 Sun Jul 1 08:32:41 2018 +++ src/sys/arch/x86/x86/fpu.c Mon Sep 17 15:53:06 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: fpu.c,v 1.46 2018/07/01 08:32:41 maxv Exp $ */ +/* $NetBSD: fpu.c,v 1.47 2018/09/17 15:53:06 maxv Exp $ */ /* * Copyright (c) 2008 The NetBSD Foundation, Inc. All @@ -96,7 +96,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.46 2018/07/01 08:32:41 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.47 2018/09/17 15:53:06 maxv Exp $"); #include "opt_multiprocessor.h" @@ -121,9 +121,6 @@ __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.46 #include <x86/cpu.h> #include <x86/fpu.h> -/* Check some duplicate definitions match */ -#include <machine/fenv.h> - #ifdef XEN #define clts() HYPERVISOR_fpu_taskswitch(0) #define stts() HYPERVISOR_fpu_taskswitch(1) @@ -134,108 +131,25 @@ bool x86_fpu_eager __read_mostly = false static uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; static inline union savefpu * -process_fpframe(struct lwp *lwp) +lwp_fpuarea(struct lwp *l) { - struct pcb *pcb = lwp_getpcb(lwp); + struct pcb *pcb = lwp_getpcb(l); return &pcb->pcb_savefpu; } -/* - * The following table is used to ensure that the FPE_... value - * that is passed as a trapcode to the signal handler of the user - * process does not have more than one bit set. - * - * Multiple bits may be set if SSE simd instructions generate errors - * on more than one value or if the user process modifies the control - * word while a status word bit is already set (which this is a sign - * of bad coding). - * We have no choise than to narrow them down to one bit, since we must - * not send a trapcode that is not exactly one of the FPE_ macros. - * - * The mechanism has a static table with 127 entries. Each combination - * of the 7 FPU status word exception bits directly translates to a - * position in this table, where a single FPE_... value is stored. - * This FPE_... value stored there is considered the "most important" - * of the exception bits and will be sent as the signal code. The - * precedence of the bits is based upon Intel Document "Numerical - * Applications", Chapter "Special Computational Situations". - * - * The code to choose one of these values does these steps: - * 1) Throw away status word bits that cannot be masked. - * 2) Throw away the bits currently masked in the control word, - * assuming the user isn't interested in them anymore. - * 3) Reinsert status word bit 7 (stack fault) if it is set, which - * cannot be masked but must be preserved. - * 'Stack fault' is a sub-class of 'invalid operation'. - * 4) Use the remaining bits to point into the trapcode table. - * - * The 6 maskable bits in order of their preference, as stated in the - * above referenced Intel manual: - * 1 Invalid operation (FP_X_INV) - * 1a Stack underflow - * 1b Stack overflow - * 1c Operand of unsupported format - * 1d SNaN operand. - * 2 QNaN operand (not an exception, irrelevant here) - * 3 Any other invalid-operation not mentioned above or zero divide - * (FP_X_INV, FP_X_DZ) - * 4 Denormal operand (FP_X_DNML) - * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) - * 6 Inexact result (FP_X_IMP) - * - * NB: the above seems to mix up the mxscr error bits and the x87 ones. - * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx - * status. - * - * The table is nearly, but not quite, in bit order (ZERODIV and DENORM - * are swapped). - * - * This table assumes that any stack fault is cleared - so that an INVOP - * fault will only be reported as FLTSUB once. - * This might not happen if the mask is being changed. - */ -#define FPE_xxx1(f) (f & EN_SW_INVOP \ - ? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \ - : f & EN_SW_ZERODIV ? FPE_FLTDIV \ - : f & EN_SW_DENORM ? FPE_FLTUND \ - : f & EN_SW_OVERFLOW ? FPE_FLTOVF \ - : f & EN_SW_UNDERFLOW ? FPE_FLTUND \ - : f & EN_SW_PRECLOSS ? FPE_FLTRES \ - : f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0) -#define FPE_xxx2(f) FPE_xxx1(f), FPE_xxx1((f + 1)) -#define FPE_xxx4(f) FPE_xxx2(f), FPE_xxx2((f + 2)) -#define FPE_xxx8(f) FPE_xxx4(f), FPE_xxx4((f + 4)) -#define FPE_xxx16(f) FPE_xxx8(f), FPE_xxx8((f + 8)) -#define FPE_xxx32(f) FPE_xxx16(f), FPE_xxx16((f + 16)) -static const uint8_t fpetable[128] = { - FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96) -}; -#undef FPE_xxx1 -#undef FPE_xxx2 -#undef FPE_xxx4 -#undef FPE_xxx8 -#undef FPE_xxx16 -#undef FPE_xxx32 - -/* - * Init the FPU. - * - * This might not be strictly necessary since it will be initialised - * for each process. However it does no harm. - */ void fpuinit(struct cpu_info *ci) { - + /* + * This might not be strictly necessary since it will be initialized + * for each process. However it does no harm. + */ clts(); fninit(); stts(); } -/* - * Get the value of MXCSR_MASK supported by the CPU. - */ void fpuinit_mxcsr_mask(void) { @@ -296,61 +210,60 @@ fpu_clear_amd(void) } static void -fpu_save(struct lwp *l) +fpu_area_save(void *area) { - struct pcb *pcb = lwp_getpcb(l); + clts(); switch (x86_fpu_save) { case FPU_SAVE_FSAVE: - fnsave(&pcb->pcb_savefpu); + fnsave(area); break; case FPU_SAVE_FXSAVE: - fxsave(&pcb->pcb_savefpu); + fxsave(area); break; case FPU_SAVE_XSAVE: - xsave(&pcb->pcb_savefpu, x86_xsave_features); + xsave(area, x86_xsave_features); break; case FPU_SAVE_XSAVEOPT: - xsaveopt(&pcb->pcb_savefpu, x86_xsave_features); + xsaveopt(area, x86_xsave_features); break; } } static void -fpu_restore(struct lwp *l) +fpu_area_restore(void *area) { - struct pcb *pcb = lwp_getpcb(l); + clts(); switch (x86_fpu_save) { case FPU_SAVE_FSAVE: - frstor(&pcb->pcb_savefpu); + frstor(area); break; case FPU_SAVE_FXSAVE: if (cpu_vendor == CPUVENDOR_AMD) fpu_clear_amd(); - fxrstor(&pcb->pcb_savefpu); + fxrstor(area); break; case FPU_SAVE_XSAVE: case FPU_SAVE_XSAVEOPT: if (cpu_vendor == CPUVENDOR_AMD) fpu_clear_amd(); - xrstor(&pcb->pcb_savefpu, x86_xsave_features); + xrstor(area, x86_xsave_features); break; } } static void -fpu_eagerrestore(struct lwp *l) +fpu_lwp_install(struct lwp *l) { struct pcb *pcb = lwp_getpcb(l); struct cpu_info *ci = curcpu(); - clts(); KASSERT(ci->ci_fpcurlwp == NULL); KASSERT(pcb->pcb_fpcpu == NULL); ci->ci_fpcurlwp = l; pcb->pcb_fpcpu = ci; - fpu_restore(l); + fpu_area_restore(&pcb->pcb_savefpu); } void @@ -375,46 +288,113 @@ fpu_eagerswitch(struct lwp *oldlwp, stru #endif fpusave_cpu(true); if (!(newlwp->l_flag & LW_SYSTEM)) - fpu_eagerrestore(newlwp); + fpu_lwp_install(newlwp); splx(s); } /* -------------------------------------------------------------------------- */ /* - * This is a synchronous trap on either an x87 instruction (due to an - * unmasked error on the previous x87 instruction) or on an SSE/SSE2 etc - * instruction due to an error on the instruction itself. - * - * If trap actually generates a signal, then the fpu state is saved - * and then copied onto the process's user-stack, and then recovered - * from there when the signal returns (or from the jmp_buf if the - * signal handler exits with a longjmp()). - * - * All this code need to do is save the reason for the trap. - * For x87 interrupts the status word bits need clearing to stop the - * trap re-occurring. - * - * The mxcsr bits are 'sticky' and need clearing to not confuse a later trap. - * - * Since this is a synchronous trap, the fpu registers must still belong - * to the correct process (we trap through an interrupt gate so that - * interrupts are disabled on entry). - * Interrupts (these better include IPIs) are left disabled until we've - * finished looking at fpu registers. + * The following table is used to ensure that the FPE_... value + * that is passed as a trapcode to the signal handler of the user + * process does not have more than one bit set. + * + * Multiple bits may be set if SSE simd instructions generate errors + * on more than one value or if the user process modifies the control + * word while a status word bit is already set (which this is a sign + * of bad coding). + * We have no choise than to narrow them down to one bit, since we must + * not send a trapcode that is not exactly one of the FPE_ macros. + * + * The mechanism has a static table with 127 entries. Each combination + * of the 7 FPU status word exception bits directly translates to a + * position in this table, where a single FPE_... value is stored. + * This FPE_... value stored there is considered the "most important" + * of the exception bits and will be sent as the signal code. The + * precedence of the bits is based upon Intel Document "Numerical + * Applications", Chapter "Special Computational Situations". + * + * The code to choose one of these values does these steps: + * 1) Throw away status word bits that cannot be masked. + * 2) Throw away the bits currently masked in the control word, + * assuming the user isn't interested in them anymore. + * 3) Reinsert status word bit 7 (stack fault) if it is set, which + * cannot be masked but must be preserved. + * 'Stack fault' is a sub-class of 'invalid operation'. + * 4) Use the remaining bits to point into the trapcode table. + * + * The 6 maskable bits in order of their preference, as stated in the + * above referenced Intel manual: + * 1 Invalid operation (FP_X_INV) + * 1a Stack underflow + * 1b Stack overflow + * 1c Operand of unsupported format + * 1d SNaN operand. + * 2 QNaN operand (not an exception, irrelevant here) + * 3 Any other invalid-operation not mentioned above or zero divide + * (FP_X_INV, FP_X_DZ) + * 4 Denormal operand (FP_X_DNML) + * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) + * 6 Inexact result (FP_X_IMP) + * + * NB: the above seems to mix up the mxscr error bits and the x87 ones. + * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx + * status. * - * For amd64 the calling code (in amd64_trap.S) has already checked - * that we trapped from usermode. + * The table is nearly, but not quite, in bit order (ZERODIV and DENORM + * are swapped). + * + * This table assumes that any stack fault is cleared - so that an INVOP + * fault will only be reported as FLTSUB once. + * This might not happen if the mask is being changed. */ +#define FPE_xxx1(f) (f & EN_SW_INVOP \ + ? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \ + : f & EN_SW_ZERODIV ? FPE_FLTDIV \ + : f & EN_SW_DENORM ? FPE_FLTUND \ + : f & EN_SW_OVERFLOW ? FPE_FLTOVF \ + : f & EN_SW_UNDERFLOW ? FPE_FLTUND \ + : f & EN_SW_PRECLOSS ? FPE_FLTRES \ + : f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0) +#define FPE_xxx2(f) FPE_xxx1(f), FPE_xxx1((f + 1)) +#define FPE_xxx4(f) FPE_xxx2(f), FPE_xxx2((f + 2)) +#define FPE_xxx8(f) FPE_xxx4(f), FPE_xxx4((f + 4)) +#define FPE_xxx16(f) FPE_xxx8(f), FPE_xxx8((f + 8)) +#define FPE_xxx32(f) FPE_xxx16(f), FPE_xxx16((f + 16)) +static const uint8_t fpetable[128] = { + FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96) +}; +#undef FPE_xxx1 +#undef FPE_xxx2 +#undef FPE_xxx4 +#undef FPE_xxx8 +#undef FPE_xxx16 +#undef FPE_xxx32 +/* + * This is a synchronous trap on either an x87 instruction (due to an unmasked + * error on the previous x87 instruction) or on an SSE/SSE2/etc instruction due + * to an error on the instruction itself. + * + * If trap actually generates a signal, then the fpu state is saved and then + * copied onto the lwp's user-stack, and then recovered from there when the + * signal returns. + * + * All this code needs to do is save the reason for the trap. For x87 traps the + * status word bits need clearing to stop the trap re-occurring. For SSE traps + * the mxcsr bits are 'sticky' and need clearing to not confuse a later trap. + * + * We come here with interrupts disabled. + */ void fputrap(struct trapframe *frame) { uint32_t statbits; ksiginfo_t ksi; - if (!USERMODE(frame->tf_cs)) + if (__predict_false(!USERMODE(frame->tf_cs))) { panic("fpu trap from kernel, trapframe %p\n", frame); + } /* * At this point, fpcurlwp should be curlwp. If it wasn't, the TS bit @@ -442,7 +422,7 @@ fputrap(struct trapframe *frame) /* Clear any pending exceptions from status word */ fnclex(); - /* Removed masked interrupts */ + /* Remove masked interrupts */ statbits = sw & ~(cw & 0x3f); } @@ -458,7 +438,7 @@ fputrap(struct trapframe *frame) } /* - * Implement device not available (DNA) exception + * Implement device not available (DNA) exception. * * If we were the last lwp to use the FPU, we can simply return. * Otherwise, we save the previous state, if necessary, and restore @@ -469,18 +449,16 @@ fputrap(struct trapframe *frame) void fpudna(struct trapframe *frame) { - struct cpu_info *ci; + struct cpu_info *ci = curcpu(); struct lwp *l, *fl; struct pcb *pcb; int s; - if (!USERMODE(frame->tf_cs)) + if (!USERMODE(frame->tf_cs)) { panic("fpudna from kernel, ip %p, trapframe %p\n", (void *)X86_TF_RIP(frame), frame); + } - ci = curcpu(); - - /* Save soft spl level - interrupts are hard disabled */ s = splhigh(); /* Save state on current CPU. */ @@ -526,20 +504,15 @@ fpudna(struct trapframe *frame) kpreempt_enable(); } - /* - * Restore state on this CPU, or initialize. Ensure that - * the entire update is atomic with respect to FPU-sync IPIs. - */ - clts(); - ci->ci_fpcurlwp = l; - pcb->pcb_fpcpu = ci; - - fpu_restore(l); + /* Install the LWP's FPU state. */ + fpu_lwp_install(l); KASSERT(ci == curcpu()); splx(s); } +/* -------------------------------------------------------------------------- */ + /* * Save current CPU's FPU state. Must be called at IPL_HIGH. */ @@ -560,8 +533,7 @@ fpusave_cpu(bool save) pcb = lwp_getpcb(l); if (save) { - clts(); - fpu_save(l); + fpu_area_save(&pcb->pcb_savefpu); } stts(); @@ -619,7 +591,7 @@ fpusave_lwp(struct lwp *l, bool save) void fpu_set_default_cw(struct lwp *l, unsigned int x87_cw) { - union savefpu *fpu_save = process_fpframe(l); + union savefpu *fpu_save = lwp_fpuarea(l); struct pcb *pcb = lwp_getpcb(l); if (i386_use_fxsave) { @@ -647,7 +619,7 @@ fpu_save_area_clear(struct lwp *l, unsig KASSERT(l == curlwp); KASSERT((l->l_flag & LW_SYSTEM) == 0); - fpu_save = process_fpframe(l); + fpu_save = lwp_fpuarea(l); pcb = lwp_getpcb(l); s = splhigh(); @@ -694,7 +666,7 @@ fpu_save_area_clear(struct lwp *l, unsig pcb->pcb_fpu_dflt_cw = x87_cw; if (x86_fpu_eager) { - fpu_eagerrestore(l); + fpu_lwp_install(l); splx(s); } } @@ -702,7 +674,7 @@ fpu_save_area_clear(struct lwp *l, unsig void fpu_save_area_reset(struct lwp *l) { - union savefpu *fpu_save = process_fpframe(l); + union savefpu *fpu_save = lwp_fpuarea(l); struct pcb *pcb = lwp_getpcb(l); /* @@ -857,7 +829,7 @@ process_write_fpregs_xmm(struct lwp *l, union savefpu *fpu_save; fpusave_lwp(l, false); - fpu_save = process_fpframe(l); + fpu_save = lwp_fpuarea(l); if (i386_use_fxsave) { memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm)); @@ -890,7 +862,7 @@ process_write_fpregs_s87(struct lwp *l, if (i386_use_fxsave) { /* Save so we don't lose the xmm registers */ fpusave_lwp(l, true); - fpu_save = process_fpframe(l); + fpu_save = lwp_fpuarea(l); process_s87_to_xmm(fpregs, &fpu_save->sv_xmm); /* @@ -904,7 +876,7 @@ process_write_fpregs_s87(struct lwp *l, } } else { fpusave_lwp(l, false); - fpu_save = process_fpframe(l); + fpu_save = lwp_fpuarea(l); memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87)); } } @@ -915,7 +887,7 @@ process_read_fpregs_xmm(struct lwp *l, s union savefpu *fpu_save; fpusave_lwp(l, true); - fpu_save = process_fpframe(l); + fpu_save = lwp_fpuarea(l); if (i386_use_fxsave) { memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm)); @@ -931,7 +903,7 @@ process_read_fpregs_s87(struct lwp *l, s union savefpu *fpu_save; fpusave_lwp(l, true); - fpu_save = process_fpframe(l); + fpu_save = lwp_fpuarea(l); if (i386_use_fxsave) { memset(fpregs, 0, sizeof(*fpregs));