Author: kib
Date: Sat Dec 30 11:33:04 2017
New Revision: 327380
URL: https://svnweb.freebsd.org/changeset/base/327380

Log:
  Move i386/isa/npx.c to i386i386/npx.c.
  
  The i386 FPU (AKA npx) code does not depend on ISA devices at all,
  after the support for IRQ13 FPU exceptions was removed.  Put the file
  into the expected place in the kernel source tree.
  
  Discussed with:       jhb
  Sponsored by: The FreeBSD Foundation

Added:
  head/sys/i386/i386/npx.c
     - copied unchanged from r327379, head/sys/i386/isa/npx.c
Deleted:
  head/sys/i386/isa/npx.c
Modified:
  head/sys/conf/files.i386

Modified: head/sys/conf/files.i386
==============================================================================
--- head/sys/conf/files.i386    Sat Dec 30 08:16:31 2017        (r327379)
+++ head/sys/conf/files.i386    Sat Dec 30 11:33:04 2017        (r327380)
@@ -489,6 +489,7 @@ i386/i386/minidump_machdep.c        standard
 i386/i386/mp_clock.c           optional smp
 i386/i386/mp_machdep.c         optional smp
 i386/i386/mpboot.s             optional smp
+i386/i386/npx.c                        standard
 i386/i386/perfmon.c            optional perfmon
 i386/i386/pmap.c               standard
 i386/i386/ptrace_machdep.c     standard
@@ -519,7 +520,6 @@ i386/ibcs2/ibcs2_xenix.c    optional ibcs2
 i386/ibcs2/ibcs2_xenix_sysent.c        optional ibcs2
 i386/ibcs2/imgact_coff.c       optional ibcs2
 i386/isa/elink.c               optional ep
-i386/isa/npx.c                 standard
 i386/isa/pmtimer.c             optional pmtimer
 i386/isa/prof_machdep.c                optional profiling-routine
 i386/linux/imgact_linux.c      optional compat_linux

Copied: head/sys/i386/i386/npx.c (from r327379, head/sys/i386/isa/npx.c)
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/i386/i386/npx.c    Sat Dec 30 11:33:04 2017        (r327380, copy 
of r327379, head/sys/i386/isa/npx.c)
@@ -0,0 +1,1430 @@
+/*-
+ * Copyright (c) 1990 William Jolitz.
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     from: @(#)npx.c 7.2 (Berkeley) 5/12/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_cpu.h"
+#include "opt_isa.h"
+#include "opt_npx.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#ifdef NPX_DEBUG
+#include <sys/syslog.h>
+#endif
+#include <sys/signalvar.h>
+#include <vm/uma.h>
+
+#include <machine/asmacros.h>
+#include <machine/cputypes.h>
+#include <machine/frame.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/psl.h>
+#include <machine/resource.h>
+#include <machine/specialreg.h>
+#include <machine/segments.h>
+#include <machine/ucontext.h>
+
+#include <machine/intr_machdep.h>
+
+#ifdef DEV_ISA
+#include <isa/isavar.h>
+#endif
+
+/*
+ * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
+ */
+
+#if defined(__GNUCLIKE_ASM) && !defined(lint)
+
+#define        fldcw(cw)               __asm __volatile("fldcw %0" : : "m" 
(cw))
+#define        fnclex()                __asm __volatile("fnclex")
+#define        fninit()                __asm __volatile("fninit")
+#define        fnsave(addr)            __asm __volatile("fnsave %0" : "=m" 
(*(addr)))
+#define        fnstcw(addr)            __asm __volatile("fnstcw %0" : "=m" 
(*(addr)))
+#define        fnstsw(addr)            __asm __volatile("fnstsw %0" : "=am" 
(*(addr)))
+#define        fp_divide_by_0()        __asm __volatile( \
+                                   "fldz; fld1; fdiv %st,%st(1); fnop")
+#define        frstor(addr)            __asm __volatile("frstor %0" : : "m" 
(*(addr)))
+#define        fxrstor(addr)           __asm __volatile("fxrstor %0" : : "m" 
(*(addr)))
+#define        fxsave(addr)            __asm __volatile("fxsave %0" : "=m" 
(*(addr)))
+#define        ldmxcsr(csr)            __asm __volatile("ldmxcsr %0" : : "m" 
(csr))
+#define        stmxcsr(addr)           __asm __volatile("stmxcsr %0" : : "m" 
(*(addr)))
+
+static __inline void
+xrstor(char *addr, uint64_t mask)
+{
+       uint32_t low, hi;
+
+       low = mask;
+       hi = mask >> 32;
+       __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi));
+}
+
+static __inline void
+xsave(char *addr, uint64_t mask)
+{
+       uint32_t low, hi;
+
+       low = mask;
+       hi = mask >> 32;
+       __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) :
+           "memory");
+}
+
+static __inline void
+xsaveopt(char *addr, uint64_t mask)
+{
+       uint32_t low, hi;
+
+       low = mask;
+       hi = mask >> 32;
+       __asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) :
+           "memory");
+}
+#else  /* !(__GNUCLIKE_ASM && !lint) */
+
+void   fldcw(u_short cw);
+void   fnclex(void);
+void   fninit(void);
+void   fnsave(caddr_t addr);
+void   fnstcw(caddr_t addr);
+void   fnstsw(caddr_t addr);
+void   fp_divide_by_0(void);
+void   frstor(caddr_t addr);
+void   fxsave(caddr_t addr);
+void   fxrstor(caddr_t addr);
+void   ldmxcsr(u_int csr);
+void   stmxcsr(u_int *csr);
+void   xrstor(char *addr, uint64_t mask);
+void   xsave(char *addr, uint64_t mask);
+void   xsaveopt(char *addr, uint64_t mask);
+
+#endif /* __GNUCLIKE_ASM && !lint */
+
+#define        start_emulating()       load_cr0(rcr0() | CR0_TS)
+#define        stop_emulating()        clts()
+
+#define GET_FPU_CW(thread) \
+       (cpu_fxsr ? \
+               (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \
+               (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw)
+#define GET_FPU_SW(thread) \
+       (cpu_fxsr ? \
+               (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \
+               (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw)
+#define SET_FPU_CW(savefpu, value) do { \
+       if (cpu_fxsr) \
+               (savefpu)->sv_xmm.sv_env.en_cw = (value); \
+       else \
+               (savefpu)->sv_87.sv_env.en_cw = (value); \
+} while (0)
+
+CTASSERT(sizeof(union savefpu) == 512);
+CTASSERT(sizeof(struct xstate_hdr) == 64);
+CTASSERT(sizeof(struct savefpu_ymm) == 832);
+
+/*
+ * This requirement is to make it easier for asm code to calculate
+ * offset of the fpu save area from the pcb address. FPU save area
+ * must be 64-byte aligned.
+ */
+CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0);
+
+/*
+ * Ensure the copy of XCR0 saved in a core is contained in the padding
+ * area.
+ */
+CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savexmm, sv_pad) &&
+    X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savexmm));
+
+static void    fpu_clean_state(void);
+
+static void    fpusave(union savefpu *);
+static void    fpurstor(union savefpu *);
+
+int    hw_float;
+
+SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
+    &hw_float, 0, "Floating point instructions executed in hardware");
+
+int use_xsave;
+uint64_t xsave_mask;
+static uma_zone_t fpu_save_area_zone;
+static union savefpu *npx_initialstate;
+
+struct xsave_area_elm_descr {
+       u_int   offset;
+       u_int   size;
+} *xsave_area_desc;
+
+static int use_xsaveopt;
+
+static volatile u_int          npx_traps_while_probing;
+
+alias_for_inthand_t probetrap;
+__asm("                                                                \n\
+       .text                                                   \n\
+       .p2align 2,0x90                                         \n\
+       .type   " __XSTRING(CNAME(probetrap)) ",@function       \n\
+" __XSTRING(CNAME(probetrap)) ":                               \n\
+       ss                                                      \n\
+       incl    " __XSTRING(CNAME(npx_traps_while_probing)) "   \n\
+       fnclex                                                  \n\
+       iret                                                    \n\
+");
+
+/*
+ * Determine if an FPU is present and how to use it.
+ */
+static int
+npx_probe(void)
+{
+       struct gate_descriptor save_idt_npxtrap;
+       u_short control, status;
+
+       /*
+        * Modern CPUs all have an FPU that uses the INT16 interface
+        * and provide a simple way to verify that, so handle the
+        * common case right away.
+        */
+       if (cpu_feature & CPUID_FPU) {
+               hw_float = 1;
+               return (1);
+       }
+
+       save_idt_npxtrap = idt[IDT_MF];
+       setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL,
+           GSEL(GCODE_SEL, SEL_KPL));
+
+       /*
+        * Don't trap while we're probing.
+        */
+       stop_emulating();
+
+       /*
+        * Finish resetting the coprocessor, if any.  If there is an error
+        * pending, then we may get a bogus IRQ13, but npx_intr() will handle
+        * it OK.  Bogus halts have never been observed, but we enabled
+        * IRQ13 and cleared the BUSY# latch early to handle them anyway.
+        */
+       fninit();
+
+       /*
+        * Don't use fwait here because it might hang.
+        * Don't use fnop here because it usually hangs if there is no FPU.
+        */
+       DELAY(1000);            /* wait for any IRQ13 */
+#ifdef DIAGNOSTIC
+       if (npx_traps_while_probing != 0)
+               printf("fninit caused %u bogus npx trap(s)\n",
+                      npx_traps_while_probing);
+#endif
+       /*
+        * Check for a status of mostly zero.
+        */
+       status = 0x5a5a;
+       fnstsw(&status);
+       if ((status & 0xb8ff) == 0) {
+               /*
+                * Good, now check for a proper control word.
+                */
+               control = 0x5a5a;
+               fnstcw(&control);
+               if ((control & 0x1f3f) == 0x033f) {
+                       /*
+                        * We have an npx, now divide by 0 to see if exception
+                        * 16 works.
+                        */
+                       control &= ~(1 << 2);   /* enable divide by 0 trap */
+                       fldcw(control);
+                       npx_traps_while_probing = 0;
+                       fp_divide_by_0();
+                       if (npx_traps_while_probing != 0) {
+                               /*
+                                * Good, exception 16 works.
+                                */
+                               hw_float = 1;
+                               goto cleanup;
+                       }
+                       printf(
+       "FPU does not use exception 16 for error reporting\n");
+                       goto cleanup;
+               }
+       }
+
+       /*
+        * Probe failed.  Floating point simply won't work.
+        * Notify user and disable FPU/MMX/SSE instruction execution.
+        */
+       printf("WARNING: no FPU!\n");
+       __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : :
+           "n" (CR0_EM | CR0_MP) : "ax");
+
+cleanup:
+       idt[IDT_MF] = save_idt_npxtrap;
+       return (hw_float);
+}
+
+/*
+ * Enable XSAVE if supported and allowed by user.
+ * Calculate the xsave_mask.
+ */
+static void
+npxinit_bsp1(void)
+{
+       u_int cp[4];
+       uint64_t xsave_mask_user;
+
+       if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) {
+               use_xsave = 1;
+               TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
+       }
+       if (!use_xsave)
+               return;
+
+       cpuid_count(0xd, 0x0, cp);
+       xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
+       if ((cp[0] & xsave_mask) != xsave_mask)
+               panic("CPU0 does not support X87 or SSE: %x", cp[0]);
+       xsave_mask = ((uint64_t)cp[3] << 32) | cp[0];
+       xsave_mask_user = xsave_mask;
+       TUNABLE_QUAD_FETCH("hw.xsave_mask", &xsave_mask_user);
+       xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
+       xsave_mask &= xsave_mask_user;
+       if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512)
+               xsave_mask &= ~XFEATURE_AVX512;
+       if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX)
+               xsave_mask &= ~XFEATURE_MPX;
+
+       cpuid_count(0xd, 0x1, cp);
+       if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0)
+               use_xsaveopt = 1;
+}
+
+/*
+
+ * Calculate the fpu save area size.
+ */
+static void
+npxinit_bsp2(void)
+{
+       u_int cp[4];
+
+       if (use_xsave) {
+               cpuid_count(0xd, 0x0, cp);
+               cpu_max_ext_state_size = cp[1];
+
+               /*
+                * Reload the cpu_feature2, since we enabled OSXSAVE.
+                */
+               do_cpuid(1, cp);
+               cpu_feature2 = cp[2];
+       } else
+               cpu_max_ext_state_size = sizeof(union savefpu);
+}
+
+/*
+ * Initialize floating point unit.
+ */
+void
+npxinit(bool bsp)
+{
+       static union savefpu dummy;
+       register_t saveintr;
+       u_int mxcsr;
+       u_short control;
+
+       if (bsp) {
+               if (!npx_probe())
+                       return;
+               npxinit_bsp1();
+       }
+
+       if (use_xsave) {
+               load_cr4(rcr4() | CR4_XSAVE);
+               load_xcr(XCR0, xsave_mask);
+       }
+
+       /*
+        * XCR0 shall be set up before CPU can report the save area size.
+        */
+       if (bsp)
+               npxinit_bsp2();
+       
+       /*
+        * fninit has the same h/w bugs as fnsave.  Use the detoxified
+        * fnsave to throw away any junk in the fpu.  fpusave() initializes
+        * the fpu.
+        *
+        * It is too early for critical_enter() to work on AP.
+        */
+       saveintr = intr_disable();
+       stop_emulating();
+       if (cpu_fxsr)
+               fninit();
+       else
+               fnsave(&dummy);
+       control = __INITIAL_NPXCW__;
+       fldcw(control);
+       if (cpu_fxsr) {
+               mxcsr = __INITIAL_MXCSR__;
+               ldmxcsr(mxcsr);
+       }
+       start_emulating();
+       intr_restore(saveintr);
+}
+
+/*
+ * On the boot CPU we generate a clean state that is used to
+ * initialize the floating point unit when it is first used by a
+ * process.
+ */
+static void
+npxinitstate(void *arg __unused)
+{
+       register_t saveintr;
+       int cp[4], i, max_ext_n;
+
+       if (!hw_float)
+               return;
+
+       npx_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
+           M_WAITOK | M_ZERO);
+       saveintr = intr_disable();
+       stop_emulating();
+
+       fpusave(npx_initialstate);
+       if (cpu_fxsr) {
+               if (npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask)
+                       cpu_mxcsr_mask = 
+                           npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask;
+               else
+                       cpu_mxcsr_mask = 0xFFBF;
+
+               /*
+                * The fninit instruction does not modify XMM
+                * registers or x87 registers (MM/ST).  The fpusave
+                * call dumped the garbage contained in the registers
+                * after reset to the initial state saved.  Clear XMM
+                * and x87 registers file image to make the startup
+                * program state and signal handler XMM/x87 register
+                * content predictable.
+                */
+               bzero(npx_initialstate->sv_xmm.sv_fp,
+                   sizeof(npx_initialstate->sv_xmm.sv_fp));
+               bzero(npx_initialstate->sv_xmm.sv_xmm,
+                   sizeof(npx_initialstate->sv_xmm.sv_xmm));
+       } else
+               bzero(npx_initialstate->sv_87.sv_ac,
+                   sizeof(npx_initialstate->sv_87.sv_ac));
+
+       /*
+        * Create a table describing the layout of the CPU Extended
+        * Save Area.
+        */
+       if (use_xsave) {
+               if (xsave_mask >> 32 != 0)
+                       max_ext_n = fls(xsave_mask >> 32) + 32;
+               else
+                       max_ext_n = fls(xsave_mask);
+               xsave_area_desc = malloc(max_ext_n * sizeof(struct
+                   xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
+               /* x87 state */
+               xsave_area_desc[0].offset = 0;
+               xsave_area_desc[0].size = 160;
+               /* XMM */
+               xsave_area_desc[1].offset = 160;
+               xsave_area_desc[1].size = 288 - 160;
+
+               for (i = 2; i < max_ext_n; i++) {
+                       cpuid_count(0xd, i, cp);
+                       xsave_area_desc[i].offset = cp[1];
+                       xsave_area_desc[i].size = cp[0];
+               }
+       }
+
+       fpu_save_area_zone = uma_zcreate("FPU_save_area",
+           cpu_max_ext_state_size, NULL, NULL, NULL, NULL,
+           XSAVE_AREA_ALIGN - 1, 0);
+
+       start_emulating();
+       intr_restore(saveintr);
+}
+SYSINIT(npxinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, npxinitstate, NULL);
+
+/*
+ * Free coprocessor (if we have it).
+ */
+void
+npxexit(struct thread *td)
+{
+
+       critical_enter();
+       if (curthread == PCPU_GET(fpcurthread)) {
+               stop_emulating();
+               fpusave(curpcb->pcb_save);
+               start_emulating();
+               PCPU_SET(fpcurthread, NULL);
+       }
+       critical_exit();
+#ifdef NPX_DEBUG
+       if (hw_float) {
+               u_int   masked_exceptions;
+
+               masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f;
+               /*
+                * Log exceptions that would have trapped with the old
+                * control word (overflow, divide by 0, and invalid operand).
+                */
+               if (masked_exceptions & 0x0d)
+                       log(LOG_ERR,
+       "pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
+                           td->td_proc->p_pid, td->td_proc->p_comm,
+                           masked_exceptions);
+       }
+#endif
+}
+
+int
+npxformat(void)
+{
+
+       if (!hw_float)
+               return (_MC_FPFMT_NODEV);
+       if (cpu_fxsr)
+               return (_MC_FPFMT_XMM);
+       return (_MC_FPFMT_387);
+}
+
+/* 
+ * The following mechanism is used to ensure that the FPE_... value
+ * that is passed as a trapcode to the signal handler of the user
+ * process does not have more than one bit set.
+ * 
+ * Multiple bits may be set if the user process modifies the control
+ * word while a status word bit is already set.  While this is a sign
+ * of bad coding, we have no choise than to narrow them down to one
+ * bit, since we must not send a trapcode that is not exactly one of
+ * the FPE_ macros.
+ *
+ * The mechanism has a static table with 127 entries.  Each combination
+ * of the 7 FPU status word exception bits directly translates to a
+ * position in this table, where a single FPE_... value is stored.
+ * This FPE_... value stored there is considered the "most important"
+ * of the exception bits and will be sent as the signal code.  The
+ * precedence of the bits is based upon Intel Document "Numerical
+ * Applications", Chapter "Special Computational Situations".
+ *
+ * The macro to choose one of these values does these steps: 1) Throw
+ * away status word bits that cannot be masked.  2) Throw away the bits
+ * currently masked in the control word, assuming the user isn't
+ * interested in them anymore.  3) Reinsert status word bit 7 (stack
+ * fault) if it is set, which cannot be masked but must be presered.
+ * 4) Use the remaining bits to point into the trapcode table.
+ *
+ * The 6 maskable bits in order of their preference, as stated in the
+ * above referenced Intel manual:
+ * 1  Invalid operation (FP_X_INV)
+ * 1a   Stack underflow
+ * 1b   Stack overflow
+ * 1c   Operand of unsupported format
+ * 1d   SNaN operand.
+ * 2  QNaN operand (not an exception, irrelavant here)
+ * 3  Any other invalid-operation not mentioned above or zero divide
+ *      (FP_X_INV, FP_X_DZ)
+ * 4  Denormal operand (FP_X_DNML)
+ * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
+ * 6  Inexact result (FP_X_IMP) 
+ */
+static char fpetable[128] = {
+       0,
+       FPE_FLTINV,     /*  1 - INV */
+       FPE_FLTUND,     /*  2 - DNML */
+       FPE_FLTINV,     /*  3 - INV | DNML */
+       FPE_FLTDIV,     /*  4 - DZ */
+       FPE_FLTINV,     /*  5 - INV | DZ */
+       FPE_FLTDIV,     /*  6 - DNML | DZ */
+       FPE_FLTINV,     /*  7 - INV | DNML | DZ */
+       FPE_FLTOVF,     /*  8 - OFL */
+       FPE_FLTINV,     /*  9 - INV | OFL */
+       FPE_FLTUND,     /*  A - DNML | OFL */
+       FPE_FLTINV,     /*  B - INV | DNML | OFL */
+       FPE_FLTDIV,     /*  C - DZ | OFL */
+       FPE_FLTINV,     /*  D - INV | DZ | OFL */
+       FPE_FLTDIV,     /*  E - DNML | DZ | OFL */
+       FPE_FLTINV,     /*  F - INV | DNML | DZ | OFL */
+       FPE_FLTUND,     /* 10 - UFL */
+       FPE_FLTINV,     /* 11 - INV | UFL */
+       FPE_FLTUND,     /* 12 - DNML | UFL */
+       FPE_FLTINV,     /* 13 - INV | DNML | UFL */
+       FPE_FLTDIV,     /* 14 - DZ | UFL */
+       FPE_FLTINV,     /* 15 - INV | DZ | UFL */
+       FPE_FLTDIV,     /* 16 - DNML | DZ | UFL */
+       FPE_FLTINV,     /* 17 - INV | DNML | DZ | UFL */
+       FPE_FLTOVF,     /* 18 - OFL | UFL */
+       FPE_FLTINV,     /* 19 - INV | OFL | UFL */
+       FPE_FLTUND,     /* 1A - DNML | OFL | UFL */
+       FPE_FLTINV,     /* 1B - INV | DNML | OFL | UFL */
+       FPE_FLTDIV,     /* 1C - DZ | OFL | UFL */
+       FPE_FLTINV,     /* 1D - INV | DZ | OFL | UFL */
+       FPE_FLTDIV,     /* 1E - DNML | DZ | OFL | UFL */
+       FPE_FLTINV,     /* 1F - INV | DNML | DZ | OFL | UFL */
+       FPE_FLTRES,     /* 20 - IMP */
+       FPE_FLTINV,     /* 21 - INV | IMP */
+       FPE_FLTUND,     /* 22 - DNML | IMP */
+       FPE_FLTINV,     /* 23 - INV | DNML | IMP */
+       FPE_FLTDIV,     /* 24 - DZ | IMP */
+       FPE_FLTINV,     /* 25 - INV | DZ | IMP */
+       FPE_FLTDIV,     /* 26 - DNML | DZ | IMP */
+       FPE_FLTINV,     /* 27 - INV | DNML | DZ | IMP */
+       FPE_FLTOVF,     /* 28 - OFL | IMP */
+       FPE_FLTINV,     /* 29 - INV | OFL | IMP */
+       FPE_FLTUND,     /* 2A - DNML | OFL | IMP */
+       FPE_FLTINV,     /* 2B - INV | DNML | OFL | IMP */
+       FPE_FLTDIV,     /* 2C - DZ | OFL | IMP */
+       FPE_FLTINV,     /* 2D - INV | DZ | OFL | IMP */
+       FPE_FLTDIV,     /* 2E - DNML | DZ | OFL | IMP */
+       FPE_FLTINV,     /* 2F - INV | DNML | DZ | OFL | IMP */
+       FPE_FLTUND,     /* 30 - UFL | IMP */
+       FPE_FLTINV,     /* 31 - INV | UFL | IMP */
+       FPE_FLTUND,     /* 32 - DNML | UFL | IMP */
+       FPE_FLTINV,     /* 33 - INV | DNML | UFL | IMP */
+       FPE_FLTDIV,     /* 34 - DZ | UFL | IMP */
+       FPE_FLTINV,     /* 35 - INV | DZ | UFL | IMP */
+       FPE_FLTDIV,     /* 36 - DNML | DZ | UFL | IMP */
+       FPE_FLTINV,     /* 37 - INV | DNML | DZ | UFL | IMP */
+       FPE_FLTOVF,     /* 38 - OFL | UFL | IMP */
+       FPE_FLTINV,     /* 39 - INV | OFL | UFL | IMP */
+       FPE_FLTUND,     /* 3A - DNML | OFL | UFL | IMP */
+       FPE_FLTINV,     /* 3B - INV | DNML | OFL | UFL | IMP */
+       FPE_FLTDIV,     /* 3C - DZ | OFL | UFL | IMP */
+       FPE_FLTINV,     /* 3D - INV | DZ | OFL | UFL | IMP */
+       FPE_FLTDIV,     /* 3E - DNML | DZ | OFL | UFL | IMP */
+       FPE_FLTINV,     /* 3F - INV | DNML | DZ | OFL | UFL | IMP */
+       FPE_FLTSUB,     /* 40 - STK */
+       FPE_FLTSUB,     /* 41 - INV | STK */
+       FPE_FLTUND,     /* 42 - DNML | STK */
+       FPE_FLTSUB,     /* 43 - INV | DNML | STK */
+       FPE_FLTDIV,     /* 44 - DZ | STK */
+       FPE_FLTSUB,     /* 45 - INV | DZ | STK */
+       FPE_FLTDIV,     /* 46 - DNML | DZ | STK */
+       FPE_FLTSUB,     /* 47 - INV | DNML | DZ | STK */
+       FPE_FLTOVF,     /* 48 - OFL | STK */
+       FPE_FLTSUB,     /* 49 - INV | OFL | STK */
+       FPE_FLTUND,     /* 4A - DNML | OFL | STK */
+       FPE_FLTSUB,     /* 4B - INV | DNML | OFL | STK */
+       FPE_FLTDIV,     /* 4C - DZ | OFL | STK */
+       FPE_FLTSUB,     /* 4D - INV | DZ | OFL | STK */
+       FPE_FLTDIV,     /* 4E - DNML | DZ | OFL | STK */
+       FPE_FLTSUB,     /* 4F - INV | DNML | DZ | OFL | STK */
+       FPE_FLTUND,     /* 50 - UFL | STK */
+       FPE_FLTSUB,     /* 51 - INV | UFL | STK */
+       FPE_FLTUND,     /* 52 - DNML | UFL | STK */
+       FPE_FLTSUB,     /* 53 - INV | DNML | UFL | STK */
+       FPE_FLTDIV,     /* 54 - DZ | UFL | STK */
+       FPE_FLTSUB,     /* 55 - INV | DZ | UFL | STK */
+       FPE_FLTDIV,     /* 56 - DNML | DZ | UFL | STK */
+       FPE_FLTSUB,     /* 57 - INV | DNML | DZ | UFL | STK */
+       FPE_FLTOVF,     /* 58 - OFL | UFL | STK */
+       FPE_FLTSUB,     /* 59 - INV | OFL | UFL | STK */
+       FPE_FLTUND,     /* 5A - DNML | OFL | UFL | STK */
+       FPE_FLTSUB,     /* 5B - INV | DNML | OFL | UFL | STK */
+       FPE_FLTDIV,     /* 5C - DZ | OFL | UFL | STK */
+       FPE_FLTSUB,     /* 5D - INV | DZ | OFL | UFL | STK */
+       FPE_FLTDIV,     /* 5E - DNML | DZ | OFL | UFL | STK */
+       FPE_FLTSUB,     /* 5F - INV | DNML | DZ | OFL | UFL | STK */
+       FPE_FLTRES,     /* 60 - IMP | STK */
+       FPE_FLTSUB,     /* 61 - INV | IMP | STK */
+       FPE_FLTUND,     /* 62 - DNML | IMP | STK */
+       FPE_FLTSUB,     /* 63 - INV | DNML | IMP | STK */
+       FPE_FLTDIV,     /* 64 - DZ | IMP | STK */
+       FPE_FLTSUB,     /* 65 - INV | DZ | IMP | STK */
+       FPE_FLTDIV,     /* 66 - DNML | DZ | IMP | STK */
+       FPE_FLTSUB,     /* 67 - INV | DNML | DZ | IMP | STK */
+       FPE_FLTOVF,     /* 68 - OFL | IMP | STK */
+       FPE_FLTSUB,     /* 69 - INV | OFL | IMP | STK */
+       FPE_FLTUND,     /* 6A - DNML | OFL | IMP | STK */
+       FPE_FLTSUB,     /* 6B - INV | DNML | OFL | IMP | STK */
+       FPE_FLTDIV,     /* 6C - DZ | OFL | IMP | STK */
+       FPE_FLTSUB,     /* 6D - INV | DZ | OFL | IMP | STK */
+       FPE_FLTDIV,     /* 6E - DNML | DZ | OFL | IMP | STK */
+       FPE_FLTSUB,     /* 6F - INV | DNML | DZ | OFL | IMP | STK */
+       FPE_FLTUND,     /* 70 - UFL | IMP | STK */
+       FPE_FLTSUB,     /* 71 - INV | UFL | IMP | STK */
+       FPE_FLTUND,     /* 72 - DNML | UFL | IMP | STK */
+       FPE_FLTSUB,     /* 73 - INV | DNML | UFL | IMP | STK */
+       FPE_FLTDIV,     /* 74 - DZ | UFL | IMP | STK */
+       FPE_FLTSUB,     /* 75 - INV | DZ | UFL | IMP | STK */
+       FPE_FLTDIV,     /* 76 - DNML | DZ | UFL | IMP | STK */
+       FPE_FLTSUB,     /* 77 - INV | DNML | DZ | UFL | IMP | STK */
+       FPE_FLTOVF,     /* 78 - OFL | UFL | IMP | STK */
+       FPE_FLTSUB,     /* 79 - INV | OFL | UFL | IMP | STK */
+       FPE_FLTUND,     /* 7A - DNML | OFL | UFL | IMP | STK */
+       FPE_FLTSUB,     /* 7B - INV | DNML | OFL | UFL | IMP | STK */
+       FPE_FLTDIV,     /* 7C - DZ | OFL | UFL | IMP | STK */
+       FPE_FLTSUB,     /* 7D - INV | DZ | OFL | UFL | IMP | STK */
+       FPE_FLTDIV,     /* 7E - DNML | DZ | OFL | UFL | IMP | STK */
+       FPE_FLTSUB,     /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
+};
+
+/*
+ * Read the FP status and control words, then generate si_code value
+ * for SIGFPE.  The error code chosen will be one of the
+ * FPE_... macros.  It will be sent as the second argument to old
+ * BSD-style signal handlers and as "siginfo_t->si_code" (second
+ * argument) to SA_SIGINFO signal handlers.
+ *
+ * Some time ago, we cleared the x87 exceptions with FNCLEX there.
+ * Clearing exceptions was necessary mainly to avoid IRQ13 bugs.  The
+ * usermode code which understands the FPU hardware enough to enable
+ * the exceptions, can also handle clearing the exception state in the
+ * handler.  The only consequence of not clearing the exception is the
+ * rethrow of the SIGFPE on return from the signal handler and
+ * reexecution of the corresponding instruction.
+ *
+ * For XMM traps, the exceptions were never cleared.
+ */
+int
+npxtrap_x87(void)
+{
+       u_short control, status;
+
+       if (!hw_float) {
+               printf(
+       "npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n",
+                      PCPU_GET(fpcurthread), curthread, hw_float);
+               panic("npxtrap from nowhere");
+       }
+       critical_enter();
+
+       /*
+        * Interrupt handling (for another interrupt) may have pushed the
+        * state to memory.  Fetch the relevant parts of the state from
+        * wherever they are.
+        */
+       if (PCPU_GET(fpcurthread) != curthread) {
+               control = GET_FPU_CW(curthread);
+               status = GET_FPU_SW(curthread);
+       } else {
+               fnstcw(&control);
+               fnstsw(&status);
+       }
+       critical_exit();
+       return (fpetable[status & ((~control & 0x3f) | 0x40)]);
+}
+
+int
+npxtrap_sse(void)
+{
+       u_int mxcsr;
+
+       if (!hw_float) {
+               printf(
+       "npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n",
+                      PCPU_GET(fpcurthread), curthread, hw_float);
+               panic("npxtrap from nowhere");
+       }
+       critical_enter();
+       if (PCPU_GET(fpcurthread) != curthread)
+               mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr;
+       else
+               stmxcsr(&mxcsr);
+       critical_exit();
+       return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
+}
+
+/*
+ * Implement device not available (DNA) exception
+ *
+ * It would be better to switch FP context here (if curthread != fpcurthread)
+ * and not necessarily for every context switch, but it is too hard to
+ * access foreign pcb's.
+ */
+
+static int err_count = 0;
+
+int
+npxdna(void)
+{
+
+       if (!hw_float)
+               return (0);
+       critical_enter();
+       if (PCPU_GET(fpcurthread) == curthread) {
+               printf("npxdna: fpcurthread == curthread %d times\n",
+                   ++err_count);
+               stop_emulating();
+               critical_exit();
+               return (1);
+       }
+       if (PCPU_GET(fpcurthread) != NULL) {
+               printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
+                      PCPU_GET(fpcurthread),
+                      PCPU_GET(fpcurthread)->td_proc->p_pid,
+                      curthread, curthread->td_proc->p_pid);
+               panic("npxdna");
+       }
+       stop_emulating();
+       /*
+        * Record new context early in case frstor causes a trap.
+        */
+       PCPU_SET(fpcurthread, curthread);
+
+       if (cpu_fxsr)
+               fpu_clean_state();
+
+       if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) {
+               /*
+                * This is the first time this thread has used the FPU or
+                * the PCB doesn't contain a clean FPU state.  Explicitly
+                * load an initial state.
+                *
+                * We prefer to restore the state from the actual save
+                * area in PCB instead of directly loading from
+                * npx_initialstate, to ignite the XSAVEOPT
+                * tracking engine.
+                */
+               bcopy(npx_initialstate, curpcb->pcb_save, 
cpu_max_ext_state_size);
+               fpurstor(curpcb->pcb_save);
+               if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
+                       fldcw(curpcb->pcb_initial_npxcw);
+               curpcb->pcb_flags |= PCB_NPXINITDONE;
+               if (PCB_USER_FPU(curpcb))
+                       curpcb->pcb_flags |= PCB_NPXUSERINITDONE;
+       } else {
+               fpurstor(curpcb->pcb_save);
+       }
+       critical_exit();
+
+       return (1);
+}
+
+/*
+ * Wrapper for fpusave() called from context switch routines.
+ *
+ * npxsave() must be called with interrupts disabled, so that it clears
+ * fpcurthread atomically with saving the state.  We require callers to do the
+ * disabling, since most callers need to disable interrupts anyway to call
+ * npxsave() atomically with checking fpcurthread.
+ */
+void
+npxsave(addr)
+       union savefpu *addr;
+{
+
+       stop_emulating();
+       if (use_xsaveopt)
+               xsaveopt((char *)addr, xsave_mask);
+       else
+               fpusave(addr);
+       start_emulating();
+       PCPU_SET(fpcurthread, NULL);
+}
+
+/*
+ * Unconditionally save the current co-processor state across suspend and
+ * resume.
+ */
+void
+npxsuspend(union savefpu *addr)
+{
+       register_t cr0;
+
+       if (!hw_float)
+               return;
+       if (PCPU_GET(fpcurthread) == NULL) {
+               bcopy(npx_initialstate, addr, cpu_max_ext_state_size);
+               return;
+       }
+       cr0 = rcr0();
+       stop_emulating();
+       fpusave(addr);
+       load_cr0(cr0);
+}
+
+void
+npxresume(union savefpu *addr)
+{
+       register_t cr0;
+
+       if (!hw_float)
+               return;
+
+       cr0 = rcr0();
+       npxinit(false);
+       stop_emulating();
+       fpurstor(addr);
+       load_cr0(cr0);
+}
+
+void
+npxdrop(void)
+{
+       struct thread *td;
+
+       /*
+        * Discard pending exceptions in the !cpu_fxsr case so that unmasked
+        * ones don't cause a panic on the next frstor.
+        */
+       if (!cpu_fxsr)
+               fnclex();
+
+       td = PCPU_GET(fpcurthread);
+       KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread"));
+       CRITICAL_ASSERT(td);
+       PCPU_SET(fpcurthread, NULL);
+       td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
+       start_emulating();
+}
+
+/*
+ * Get the user state of the FPU into pcb->pcb_user_save without
+ * dropping ownership (if possible).  It returns the FPU ownership
+ * status.
+ */
+int
+npxgetregs(struct thread *td)
+{
+       struct pcb *pcb;
+       uint64_t *xstate_bv, bit;
+       char *sa;
+       int max_ext_n, i;
+       int owned;
+
+       if (!hw_float)
+               return (_MC_FPOWNED_NONE);
+
+       pcb = td->td_pcb;
+       if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
+               bcopy(npx_initialstate, get_pcb_user_save_pcb(pcb),
+                   cpu_max_ext_state_size);
+               SET_FPU_CW(get_pcb_user_save_pcb(pcb), pcb->pcb_initial_npxcw);
+               npxuserinited(td);
+               return (_MC_FPOWNED_PCB);
+       }
+       critical_enter();
+       if (td == PCPU_GET(fpcurthread)) {
+               fpusave(get_pcb_user_save_pcb(pcb));
+               if (!cpu_fxsr)
+                       /*
+                        * fnsave initializes the FPU and destroys whatever
+                        * context it contains.  Make sure the FPU owner
+                        * starts with a clean state next time.
+                        */
+                       npxdrop();
+               owned = _MC_FPOWNED_FPU;
+       } else {
+               owned = _MC_FPOWNED_PCB;
+       }
+       critical_exit();
+       if (use_xsave) {
+               /*
+                * Handle partially saved state.
+                */
+               sa = (char *)get_pcb_user_save_pcb(pcb);
+               xstate_bv = (uint64_t *)(sa + sizeof(union savefpu) +
+                   offsetof(struct xstate_hdr, xstate_bv));
+               if (xsave_mask >> 32 != 0)
+                       max_ext_n = fls(xsave_mask >> 32) + 32;
+               else
+                       max_ext_n = fls(xsave_mask);
+               for (i = 0; i < max_ext_n; i++) {
+                       bit = 1ULL << i;
+                       if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0)
+                               continue;
+                       bcopy((char *)npx_initialstate +

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to