Module Name:    src
Committed By:   maxv
Date:           Mon Sep 17 15:53:06 UTC 2018

Modified Files:
        src/sys/arch/x86/x86: fpu.c

Log Message:
Reduce the noise, reorder and rename some things for clarity.


To generate a diff of this commit:
cvs rdiff -u -r1.46 -r1.47 src/sys/arch/x86/x86/fpu.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/x86/x86/fpu.c
diff -u src/sys/arch/x86/x86/fpu.c:1.46 src/sys/arch/x86/x86/fpu.c:1.47
--- src/sys/arch/x86/x86/fpu.c:1.46	Sun Jul  1 08:32:41 2018
+++ src/sys/arch/x86/x86/fpu.c	Mon Sep 17 15:53:06 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu.c,v 1.46 2018/07/01 08:32:41 maxv Exp $	*/
+/*	$NetBSD: fpu.c,v 1.47 2018/09/17 15:53:06 maxv Exp $	*/
 
 /*
  * Copyright (c) 2008 The NetBSD Foundation, Inc.  All
@@ -96,7 +96,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.46 2018/07/01 08:32:41 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.47 2018/09/17 15:53:06 maxv Exp $");
 
 #include "opt_multiprocessor.h"
 
@@ -121,9 +121,6 @@ __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.46
 #include <x86/cpu.h>
 #include <x86/fpu.h>
 
-/* Check some duplicate definitions match */
-#include <machine/fenv.h>
-
 #ifdef XEN
 #define clts() HYPERVISOR_fpu_taskswitch(0)
 #define stts() HYPERVISOR_fpu_taskswitch(1)
@@ -134,108 +131,25 @@ bool x86_fpu_eager __read_mostly = false
 static uint32_t x86_fpu_mxcsr_mask __read_mostly = 0;
 
 static inline union savefpu *
-process_fpframe(struct lwp *lwp)
+lwp_fpuarea(struct lwp *l)
 {
-	struct pcb *pcb = lwp_getpcb(lwp);
+	struct pcb *pcb = lwp_getpcb(l);
 
 	return &pcb->pcb_savefpu;
 }
 
-/*
- * The following table is used to ensure that the FPE_... value
- * that is passed as a trapcode to the signal handler of the user
- * process does not have more than one bit set.
- *
- * Multiple bits may be set if SSE simd instructions generate errors
- * on more than one value or if the user process modifies the control
- * word while a status word bit is already set (which this is a sign
- * of bad coding).
- * We have no choise than to narrow them down to one bit, since we must
- * not send a trapcode that is not exactly one of the FPE_ macros.
- *
- * The mechanism has a static table with 127 entries.  Each combination
- * of the 7 FPU status word exception bits directly translates to a
- * position in this table, where a single FPE_... value is stored.
- * This FPE_... value stored there is considered the "most important"
- * of the exception bits and will be sent as the signal code.  The
- * precedence of the bits is based upon Intel Document "Numerical
- * Applications", Chapter "Special Computational Situations".
- *
- * The code to choose one of these values does these steps:
- * 1) Throw away status word bits that cannot be masked.
- * 2) Throw away the bits currently masked in the control word,
- *    assuming the user isn't interested in them anymore.
- * 3) Reinsert status word bit 7 (stack fault) if it is set, which
- *    cannot be masked but must be preserved.
- *    'Stack fault' is a sub-class of 'invalid operation'.
- * 4) Use the remaining bits to point into the trapcode table.
- *
- * The 6 maskable bits in order of their preference, as stated in the
- * above referenced Intel manual:
- * 1  Invalid operation (FP_X_INV)
- * 1a   Stack underflow
- * 1b   Stack overflow
- * 1c   Operand of unsupported format
- * 1d   SNaN operand.
- * 2  QNaN operand (not an exception, irrelevant here)
- * 3  Any other invalid-operation not mentioned above or zero divide
- *      (FP_X_INV, FP_X_DZ)
- * 4  Denormal operand (FP_X_DNML)
- * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
- * 6  Inexact result (FP_X_IMP)
- *
- * NB: the above seems to mix up the mxscr error bits and the x87 ones.
- * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
- * status.
- *
- * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
- * are swapped).
- *
- * This table assumes that any stack fault is cleared - so that an INVOP
- * fault will only be reported as FLTSUB once.
- * This might not happen if the mask is being changed.
- */
-#define FPE_xxx1(f) (f & EN_SW_INVOP \
-		? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
-	: f & EN_SW_ZERODIV ? FPE_FLTDIV \
-	: f & EN_SW_DENORM ? FPE_FLTUND \
-	: f & EN_SW_OVERFLOW ? FPE_FLTOVF \
-	: f & EN_SW_UNDERFLOW ? FPE_FLTUND \
-	: f & EN_SW_PRECLOSS ? FPE_FLTRES \
-	: f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
-#define	FPE_xxx2(f)	FPE_xxx1(f),	FPE_xxx1((f + 1))
-#define	FPE_xxx4(f)	FPE_xxx2(f),	FPE_xxx2((f + 2))
-#define	FPE_xxx8(f)	FPE_xxx4(f),	FPE_xxx4((f + 4))
-#define	FPE_xxx16(f)	FPE_xxx8(f),	FPE_xxx8((f + 8))
-#define	FPE_xxx32(f)	FPE_xxx16(f),	FPE_xxx16((f + 16))
-static const uint8_t fpetable[128] = {
-	FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
-};
-#undef FPE_xxx1
-#undef FPE_xxx2
-#undef FPE_xxx4
-#undef FPE_xxx8
-#undef FPE_xxx16
-#undef FPE_xxx32
-
-/*
- * Init the FPU.
- *
- * This might not be strictly necessary since it will be initialised
- * for each process.  However it does no harm.
- */
 void
 fpuinit(struct cpu_info *ci)
 {
-
+	/*
+	 * This might not be strictly necessary since it will be initialized
+	 * for each process. However it does no harm.
+	 */
 	clts();
 	fninit();
 	stts();
 }
 
-/*
- * Get the value of MXCSR_MASK supported by the CPU.
- */
 void
 fpuinit_mxcsr_mask(void)
 {
@@ -296,61 +210,60 @@ fpu_clear_amd(void)
 }
 
 static void
-fpu_save(struct lwp *l)
+fpu_area_save(void *area)
 {
-	struct pcb *pcb = lwp_getpcb(l);
+	clts();
 
 	switch (x86_fpu_save) {
 	case FPU_SAVE_FSAVE:
-		fnsave(&pcb->pcb_savefpu);
+		fnsave(area);
 		break;
 	case FPU_SAVE_FXSAVE:
-		fxsave(&pcb->pcb_savefpu);
+		fxsave(area);
 		break;
 	case FPU_SAVE_XSAVE:
-		xsave(&pcb->pcb_savefpu, x86_xsave_features);
+		xsave(area, x86_xsave_features);
 		break;
 	case FPU_SAVE_XSAVEOPT:
-		xsaveopt(&pcb->pcb_savefpu, x86_xsave_features);
+		xsaveopt(area, x86_xsave_features);
 		break;
 	}
 }
 
 static void
-fpu_restore(struct lwp *l)
+fpu_area_restore(void *area)
 {
-	struct pcb *pcb = lwp_getpcb(l);
+	clts();
 
 	switch (x86_fpu_save) {
 	case FPU_SAVE_FSAVE:
-		frstor(&pcb->pcb_savefpu);
+		frstor(area);
 		break;
 	case FPU_SAVE_FXSAVE:
 		if (cpu_vendor == CPUVENDOR_AMD)
 			fpu_clear_amd();
-		fxrstor(&pcb->pcb_savefpu);
+		fxrstor(area);
 		break;
 	case FPU_SAVE_XSAVE:
 	case FPU_SAVE_XSAVEOPT:
 		if (cpu_vendor == CPUVENDOR_AMD)
 			fpu_clear_amd();
-		xrstor(&pcb->pcb_savefpu, x86_xsave_features);
+		xrstor(area, x86_xsave_features);
 		break;
 	}
 }
 
 static void
-fpu_eagerrestore(struct lwp *l)
+fpu_lwp_install(struct lwp *l)
 {
 	struct pcb *pcb = lwp_getpcb(l);
 	struct cpu_info *ci = curcpu();
 
-	clts();
 	KASSERT(ci->ci_fpcurlwp == NULL);
 	KASSERT(pcb->pcb_fpcpu == NULL);
 	ci->ci_fpcurlwp = l;
 	pcb->pcb_fpcpu = ci;
-	fpu_restore(l);
+	fpu_area_restore(&pcb->pcb_savefpu);
 }
 
 void
@@ -375,46 +288,113 @@ fpu_eagerswitch(struct lwp *oldlwp, stru
 #endif
 	fpusave_cpu(true);
 	if (!(newlwp->l_flag & LW_SYSTEM))
-		fpu_eagerrestore(newlwp);
+		fpu_lwp_install(newlwp);
 	splx(s);
 }
 
 /* -------------------------------------------------------------------------- */
 
 /*
- * This is a synchronous trap on either an x87 instruction (due to an
- * unmasked error on the previous x87 instruction) or on an SSE/SSE2 etc
- * instruction due to an error on the instruction itself.
- *
- * If trap actually generates a signal, then the fpu state is saved
- * and then copied onto the process's user-stack, and then recovered
- * from there when the signal returns (or from the jmp_buf if the
- * signal handler exits with a longjmp()).
- *
- * All this code need to do is save the reason for the trap.
- * For x87 interrupts the status word bits need clearing to stop the
- * trap re-occurring.
- *
- * The mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
- *
- * Since this is a synchronous trap, the fpu registers must still belong
- * to the correct process (we trap through an interrupt gate so that
- * interrupts are disabled on entry).
- * Interrupts (these better include IPIs) are left disabled until we've
- * finished looking at fpu registers.
+ * The following table is used to ensure that the FPE_... value
+ * that is passed as a trapcode to the signal handler of the user
+ * process does not have more than one bit set.
+ *
+ * Multiple bits may be set if SSE simd instructions generate errors
+ * on more than one value or if the user process modifies the control
+ * word while a status word bit is already set (which this is a sign
+ * of bad coding).
+ * We have no choise than to narrow them down to one bit, since we must
+ * not send a trapcode that is not exactly one of the FPE_ macros.
+ *
+ * The mechanism has a static table with 127 entries.  Each combination
+ * of the 7 FPU status word exception bits directly translates to a
+ * position in this table, where a single FPE_... value is stored.
+ * This FPE_... value stored there is considered the "most important"
+ * of the exception bits and will be sent as the signal code.  The
+ * precedence of the bits is based upon Intel Document "Numerical
+ * Applications", Chapter "Special Computational Situations".
+ *
+ * The code to choose one of these values does these steps:
+ * 1) Throw away status word bits that cannot be masked.
+ * 2) Throw away the bits currently masked in the control word,
+ *    assuming the user isn't interested in them anymore.
+ * 3) Reinsert status word bit 7 (stack fault) if it is set, which
+ *    cannot be masked but must be preserved.
+ *    'Stack fault' is a sub-class of 'invalid operation'.
+ * 4) Use the remaining bits to point into the trapcode table.
+ *
+ * The 6 maskable bits in order of their preference, as stated in the
+ * above referenced Intel manual:
+ * 1  Invalid operation (FP_X_INV)
+ * 1a   Stack underflow
+ * 1b   Stack overflow
+ * 1c   Operand of unsupported format
+ * 1d   SNaN operand.
+ * 2  QNaN operand (not an exception, irrelevant here)
+ * 3  Any other invalid-operation not mentioned above or zero divide
+ *      (FP_X_INV, FP_X_DZ)
+ * 4  Denormal operand (FP_X_DNML)
+ * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
+ * 6  Inexact result (FP_X_IMP)
+ *
+ * NB: the above seems to mix up the mxscr error bits and the x87 ones.
+ * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
+ * status.
  *
- * For amd64 the calling code (in amd64_trap.S) has already checked
- * that we trapped from usermode.
+ * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
+ * are swapped).
+ *
+ * This table assumes that any stack fault is cleared - so that an INVOP
+ * fault will only be reported as FLTSUB once.
+ * This might not happen if the mask is being changed.
  */
+#define FPE_xxx1(f) (f & EN_SW_INVOP \
+		? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
+	: f & EN_SW_ZERODIV ? FPE_FLTDIV \
+	: f & EN_SW_DENORM ? FPE_FLTUND \
+	: f & EN_SW_OVERFLOW ? FPE_FLTOVF \
+	: f & EN_SW_UNDERFLOW ? FPE_FLTUND \
+	: f & EN_SW_PRECLOSS ? FPE_FLTRES \
+	: f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
+#define	FPE_xxx2(f)	FPE_xxx1(f),	FPE_xxx1((f + 1))
+#define	FPE_xxx4(f)	FPE_xxx2(f),	FPE_xxx2((f + 2))
+#define	FPE_xxx8(f)	FPE_xxx4(f),	FPE_xxx4((f + 4))
+#define	FPE_xxx16(f)	FPE_xxx8(f),	FPE_xxx8((f + 8))
+#define	FPE_xxx32(f)	FPE_xxx16(f),	FPE_xxx16((f + 16))
+static const uint8_t fpetable[128] = {
+	FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
+};
+#undef FPE_xxx1
+#undef FPE_xxx2
+#undef FPE_xxx4
+#undef FPE_xxx8
+#undef FPE_xxx16
+#undef FPE_xxx32
 
+/*
+ * This is a synchronous trap on either an x87 instruction (due to an unmasked
+ * error on the previous x87 instruction) or on an SSE/SSE2/etc instruction due
+ * to an error on the instruction itself.
+ *
+ * If trap actually generates a signal, then the fpu state is saved and then
+ * copied onto the lwp's user-stack, and then recovered from there when the
+ * signal returns.
+ *
+ * All this code needs to do is save the reason for the trap. For x87 traps the
+ * status word bits need clearing to stop the trap re-occurring. For SSE traps
+ * the mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
+ *
+ * We come here with interrupts disabled.
+ */
 void
 fputrap(struct trapframe *frame)
 {
 	uint32_t statbits;
 	ksiginfo_t ksi;
 
-	if (!USERMODE(frame->tf_cs))
+	if (__predict_false(!USERMODE(frame->tf_cs))) {
 		panic("fpu trap from kernel, trapframe %p\n", frame);
+	}
 
 	/*
 	 * At this point, fpcurlwp should be curlwp.  If it wasn't, the TS bit
@@ -442,7 +422,7 @@ fputrap(struct trapframe *frame)
 		/* Clear any pending exceptions from status word */
 		fnclex();
 
-		/* Removed masked interrupts */
+		/* Remove masked interrupts */
 		statbits = sw & ~(cw & 0x3f);
 	}
 
@@ -458,7 +438,7 @@ fputrap(struct trapframe *frame)
 }
 
 /*
- * Implement device not available (DNA) exception
+ * Implement device not available (DNA) exception.
  *
  * If we were the last lwp to use the FPU, we can simply return.
  * Otherwise, we save the previous state, if necessary, and restore
@@ -469,18 +449,16 @@ fputrap(struct trapframe *frame)
 void
 fpudna(struct trapframe *frame)
 {
-	struct cpu_info *ci;
+	struct cpu_info *ci = curcpu();
 	struct lwp *l, *fl;
 	struct pcb *pcb;
 	int s;
 
-	if (!USERMODE(frame->tf_cs))
+	if (!USERMODE(frame->tf_cs)) {
 		panic("fpudna from kernel, ip %p, trapframe %p\n",
 		    (void *)X86_TF_RIP(frame), frame);
+	}
 
-	ci = curcpu();
-
-	/* Save soft spl level - interrupts are hard disabled */
 	s = splhigh();
 
 	/* Save state on current CPU. */
@@ -526,20 +504,15 @@ fpudna(struct trapframe *frame)
 		kpreempt_enable();
 	}
 
-	/*
-	 * Restore state on this CPU, or initialize.  Ensure that
-	 * the entire update is atomic with respect to FPU-sync IPIs.
-	 */
-	clts();
-	ci->ci_fpcurlwp = l;
-	pcb->pcb_fpcpu = ci;
-
-	fpu_restore(l);
+	/* Install the LWP's FPU state. */
+	fpu_lwp_install(l);
 
 	KASSERT(ci == curcpu());
 	splx(s);
 }
 
+/* -------------------------------------------------------------------------- */
+
 /*
  * Save current CPU's FPU state.  Must be called at IPL_HIGH.
  */
@@ -560,8 +533,7 @@ fpusave_cpu(bool save)
 	pcb = lwp_getpcb(l);
 
 	if (save) {
-		clts();
-		fpu_save(l);
+		fpu_area_save(&pcb->pcb_savefpu);
 	}
 
 	stts();
@@ -619,7 +591,7 @@ fpusave_lwp(struct lwp *l, bool save)
 void
 fpu_set_default_cw(struct lwp *l, unsigned int x87_cw)
 {
-	union savefpu *fpu_save = process_fpframe(l);
+	union savefpu *fpu_save = lwp_fpuarea(l);
 	struct pcb *pcb = lwp_getpcb(l);
 
 	if (i386_use_fxsave) {
@@ -647,7 +619,7 @@ fpu_save_area_clear(struct lwp *l, unsig
 
 	KASSERT(l == curlwp);
 	KASSERT((l->l_flag & LW_SYSTEM) == 0);
-	fpu_save = process_fpframe(l);
+	fpu_save = lwp_fpuarea(l);
 	pcb = lwp_getpcb(l);
 
 	s = splhigh();
@@ -694,7 +666,7 @@ fpu_save_area_clear(struct lwp *l, unsig
 	pcb->pcb_fpu_dflt_cw = x87_cw;
 
 	if (x86_fpu_eager) {
-		fpu_eagerrestore(l);
+		fpu_lwp_install(l);
 		splx(s);
 	}
 }
@@ -702,7 +674,7 @@ fpu_save_area_clear(struct lwp *l, unsig
 void
 fpu_save_area_reset(struct lwp *l)
 {
-	union savefpu *fpu_save = process_fpframe(l);
+	union savefpu *fpu_save = lwp_fpuarea(l);
 	struct pcb *pcb = lwp_getpcb(l);
 
 	/*
@@ -857,7 +829,7 @@ process_write_fpregs_xmm(struct lwp *l, 
 	union savefpu *fpu_save;
 
 	fpusave_lwp(l, false);
-	fpu_save = process_fpframe(l);
+	fpu_save = lwp_fpuarea(l);
 
 	if (i386_use_fxsave) {
 		memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm));
@@ -890,7 +862,7 @@ process_write_fpregs_s87(struct lwp *l, 
 	if (i386_use_fxsave) {
 		/* Save so we don't lose the xmm registers */
 		fpusave_lwp(l, true);
-		fpu_save = process_fpframe(l);
+		fpu_save = lwp_fpuarea(l);
 		process_s87_to_xmm(fpregs, &fpu_save->sv_xmm);
 
 		/*
@@ -904,7 +876,7 @@ process_write_fpregs_s87(struct lwp *l, 
 		}
 	} else {
 		fpusave_lwp(l, false);
-		fpu_save = process_fpframe(l);
+		fpu_save = lwp_fpuarea(l);
 		memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87));
 	}
 }
@@ -915,7 +887,7 @@ process_read_fpregs_xmm(struct lwp *l, s
 	union savefpu *fpu_save;
 
 	fpusave_lwp(l, true);
-	fpu_save = process_fpframe(l);
+	fpu_save = lwp_fpuarea(l);
 
 	if (i386_use_fxsave) {
 		memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm));
@@ -931,7 +903,7 @@ process_read_fpregs_s87(struct lwp *l, s
 	union savefpu *fpu_save;
 
 	fpusave_lwp(l, true);
-	fpu_save = process_fpframe(l);
+	fpu_save = lwp_fpuarea(l);
 
 	if (i386_use_fxsave) {
 		memset(fpregs, 0, sizeof(*fpregs));

Reply via email to