Author: kib
Date: Wed Jun 20 18:51:38 2018
New Revision: 335455
URL: https://svnweb.freebsd.org/changeset/base/335455

Log:
  MFC r335072, r335089, r335131, r335132:
  Enable eager FPU context switch on i386 and amd64.
  
  CVE:  CVE-2018-3665
  Tested by:    emaste (smoke boot)

Modified:
  stable/10/sys/amd64/amd64/cpu_switch.S
  stable/10/sys/amd64/amd64/fpu.c
  stable/10/sys/i386/i386/swtch.s
  stable/10/sys/i386/isa/npx.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- stable/10/sys/amd64/amd64/cpu_switch.S      Wed Jun 20 18:43:17 2018        
(r335454)
+++ stable/10/sys/amd64/amd64/cpu_switch.S      Wed Jun 20 18:51:38 2018        
(r335455)
@@ -111,10 +111,10 @@ done_store_dr:
 
        /* have we used fp, and need a save? */
        cmpq    %rdi,PCPU(FPCURTHREAD)
-       jne     3f
+       jne     2f
        movq    PCB_SAVEFPU(%r8),%r8
        clts
-       cmpl    $0,use_xsave
+       cmpl    $0,use_xsave(%rip)
        jne     1f
        fxsave  (%r8)
        jmp     2f
@@ -126,12 +126,7 @@ ctx_switch_xsave:
        /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */
        xsave   (%r8)
        movq    %rcx,%rdx
-2:     smsw    %ax
-       orb     $CR0_TS,%al
-       lmsw    %ax
-       xorl    %eax,%eax
-       movq    %rax,PCPU(FPCURTHREAD)
-3:
+2:
 
        /* Save is done.  Now fire up new thread. Leave old vmspace. */
        movq    TD_PCB(%rsi),%r8
@@ -255,6 +250,8 @@ done_load_dr:
        movq    PCB_RBX(%r8),%rbx
        movq    PCB_RIP(%r8),%rax
        movq    %rax,(%rsp)
+       movq    PCPU(CURTHREAD),%rdi
+       call    fpu_activate_sw
        ret
 
        /*

Modified: stable/10/sys/amd64/amd64/fpu.c
==============================================================================
--- stable/10/sys/amd64/amd64/fpu.c     Wed Jun 20 18:43:17 2018        
(r335454)
+++ stable/10/sys/amd64/amd64/fpu.c     Wed Jun 20 18:51:38 2018        
(r335455)
@@ -139,6 +139,11 @@ static     void    fpu_clean_state(void);
 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
     SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in 
hardware");
 
+int lazy_fpu_switch = 0;
+SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH,
+    &lazy_fpu_switch, 0,
+    "Lazily load FPU context after context switch");
+
 int use_xsave;                 /* non-static for cpu_switch.S */
 uint64_t xsave_mask;           /* the same */
 static uma_zone_t fpu_save_area_zone;
@@ -204,6 +209,7 @@ fpuinit_bsp1(void)
        u_int cp[4];
        uint64_t xsave_mask_user;
 
+       TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch);
        if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
                use_xsave = 1;
                TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
@@ -611,6 +617,45 @@ fputrap_sse(void)
        return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
 }
 
+static void
+restore_fpu_curthread(struct thread *td)
+{
+       struct pcb *pcb;
+
+       /*
+        * Record new context early in case frstor causes a trap.
+        */
+       PCPU_SET(fpcurthread, td);
+
+       stop_emulating();
+       fpu_clean_state();
+       pcb = td->td_pcb;
+
+       if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) {
+               /*
+                * This is the first time this thread has used the FPU or
+                * the PCB doesn't contain a clean FPU state.  Explicitly
+                * load an initial state.
+                *
+                * We prefer to restore the state from the actual save
+                * area in PCB instead of directly loading from
+                * fpu_initialstate, to ignite the XSAVEOPT
+                * tracking engine.
+                */
+               bcopy(fpu_initialstate, pcb->pcb_save,
+                   cpu_max_ext_state_size);
+               fpurestore(pcb->pcb_save);
+               if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
+                       fldcw(pcb->pcb_initial_fpucw);
+               if (PCB_USER_FPU(pcb))
+                       set_pcb_flags(pcb, PCB_FPUINITDONE |
+                           PCB_USERFPUINITDONE);
+               else
+                       set_pcb_flags(pcb, PCB_FPUINITDONE);
+       } else
+               fpurestore(pcb->pcb_save);
+}
+
 /*
  * Device Not Available (DNA, #NM) exception handler.
  *
@@ -621,7 +666,9 @@ fputrap_sse(void)
 void
 fpudna(void)
 {
+       struct thread *td;
 
+       td = curthread;
        /*
         * This handler is entered with interrupts enabled, so context
         * switches may occur before critical_enter() is executed.  If
@@ -633,49 +680,38 @@ fpudna(void)
         */
        critical_enter();
 
-       if (PCPU_GET(fpcurthread) == curthread) {
-               printf("fpudna: fpcurthread == curthread\n");
+       if (__predict_false(PCPU_GET(fpcurthread) == td)) {
+               /*
+                * Some virtual machines seems to set %cr0.TS at
+                * arbitrary moments.  Silently clear the TS bit
+                * regardless of the eager/lazy FPU context switch
+                * mode.
+                */
                stop_emulating();
-               critical_exit();
-               return;
+       } else {
+               if (__predict_false(PCPU_GET(fpcurthread) != NULL)) {
+                       panic(
+                   "fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n",
+                           PCPU_GET(fpcurthread),
+                           PCPU_GET(fpcurthread)->td_tid, td, td->td_tid);
+               }
+               restore_fpu_curthread(td);
        }
-       if (PCPU_GET(fpcurthread) != NULL) {
-               panic("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n",
-                   PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid,
-                   curthread, curthread->td_tid);
-       }
-       stop_emulating();
-       /*
-        * Record new context early in case frstor causes a trap.
-        */
-       PCPU_SET(fpcurthread, curthread);
+       critical_exit();
+}
 
-       fpu_clean_state();
+void fpu_activate_sw(struct thread *td); /* Called from the context switch */
+void
+fpu_activate_sw(struct thread *td)
+{
 
-       if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) {
-               /*
-                * This is the first time this thread has used the FPU or
-                * the PCB doesn't contain a clean FPU state.  Explicitly
-                * load an initial state.
-                *
-                * We prefer to restore the state from the actual save
-                * area in PCB instead of directly loading from
-                * fpu_initialstate, to ignite the XSAVEOPT
-                * tracking engine.
-                */
-               bcopy(fpu_initialstate, curpcb->pcb_save,
-                   cpu_max_ext_state_size);
-               fpurestore(curpcb->pcb_save);
-               if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
-                       fldcw(curpcb->pcb_initial_fpucw);
-               if (PCB_USER_FPU(curpcb))
-                       set_pcb_flags(curpcb,
-                           PCB_FPUINITDONE | PCB_USERFPUINITDONE);
-               else
-                       set_pcb_flags(curpcb, PCB_FPUINITDONE);
-       } else
-               fpurestore(curpcb->pcb_save);
-       critical_exit();
+       if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 ||
+           !PCB_USER_FPU(td->td_pcb)) {
+               PCPU_SET(fpcurthread, NULL);
+               start_emulating();
+       } else if (PCPU_GET(fpcurthread) != td) {
+               restore_fpu_curthread(td);
+       }
 }
 
 void

Modified: stable/10/sys/i386/i386/swtch.s
==============================================================================
--- stable/10/sys/i386/i386/swtch.s     Wed Jun 20 18:43:17 2018        
(r335454)
+++ stable/10/sys/i386/i386/swtch.s     Wed Jun 20 18:51:38 2018        
(r335455)
@@ -314,6 +314,12 @@ sw1:
 cpu_switch_load_gs:
        mov     PCB_GS(%edx),%gs
 
+       pushl   %edx
+       pushl   PCPU(CURTHREAD)
+       call    npxswitch
+       popl    %edx
+       popl    %edx
+
        /* Test if debug registers should be restored. */
        testl   $PCB_DBREGS,PCB_FLAGS(%edx)
        jz      1f

Modified: stable/10/sys/i386/isa/npx.c
==============================================================================
--- stable/10/sys/i386/isa/npx.c        Wed Jun 20 18:43:17 2018        
(r335454)
+++ stable/10/sys/i386/isa/npx.c        Wed Jun 20 18:51:38 2018        
(r335455)
@@ -219,6 +219,11 @@ int        hw_float;
 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
     &hw_float, 0, "Floating point instructions executed in hardware");
 
+int lazy_fpu_switch = 0;
+SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH,
+    &lazy_fpu_switch, 0,
+    "Lazily load FPU context after context switch");
+
 #ifdef CPU_ENABLE_SSE
 int use_xsave;
 uint64_t xsave_mask;
@@ -360,6 +365,7 @@ npxinit_bsp1(void)
        u_int cp[4];
        uint64_t xsave_mask_user;
 
+       TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch);
        if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) {
                use_xsave = 1;
                TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
@@ -840,49 +846,22 @@ npxtrap_sse(void)
 }
 #endif
 
-/*
- * Implement device not available (DNA) exception
- *
- * It would be better to switch FP context here (if curthread != fpcurthread)
- * and not necessarily for every context switch, but it is too hard to
- * access foreign pcb's.
- */
-
-static int err_count = 0;
-
-int
-npxdna(void)
+static void
+restore_npx_curthread(struct thread *td, struct pcb *pcb)
 {
 
-       if (!hw_float)
-               return (0);
-       critical_enter();
-       if (PCPU_GET(fpcurthread) == curthread) {
-               printf("npxdna: fpcurthread == curthread %d times\n",
-                   ++err_count);
-               stop_emulating();
-               critical_exit();
-               return (1);
-       }
-       if (PCPU_GET(fpcurthread) != NULL) {
-               printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
-                      PCPU_GET(fpcurthread),
-                      PCPU_GET(fpcurthread)->td_proc->p_pid,
-                      curthread, curthread->td_proc->p_pid);
-               panic("npxdna");
-       }
-       stop_emulating();
        /*
         * Record new context early in case frstor causes a trap.
         */
-       PCPU_SET(fpcurthread, curthread);
+       PCPU_SET(fpcurthread, td);
 
+       stop_emulating();
 #ifdef CPU_ENABLE_SSE
        if (cpu_fxsr)
                fpu_clean_state();
 #endif
 
-       if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) {
+       if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
                /*
                 * This is the first time this thread has used the FPU or
                 * the PCB doesn't contain a clean FPU state.  Explicitly
@@ -893,18 +872,54 @@ npxdna(void)
                 * npx_initialstate, to ignite the XSAVEOPT
                 * tracking engine.
                 */
-               bcopy(npx_initialstate, curpcb->pcb_save, 
cpu_max_ext_state_size);
-               fpurstor(curpcb->pcb_save);
-               if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
-                       fldcw(curpcb->pcb_initial_npxcw);
-               curpcb->pcb_flags |= PCB_NPXINITDONE;
-               if (PCB_USER_FPU(curpcb))
-                       curpcb->pcb_flags |= PCB_NPXUSERINITDONE;
+               bcopy(npx_initialstate, pcb->pcb_save, cpu_max_ext_state_size);
+               fpurstor(pcb->pcb_save);
+               if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
+                       fldcw(pcb->pcb_initial_npxcw);
+               pcb->pcb_flags |= PCB_NPXINITDONE;
+               if (PCB_USER_FPU(pcb))
+                       pcb->pcb_flags |= PCB_NPXUSERINITDONE;
        } else {
-               fpurstor(curpcb->pcb_save);
+               fpurstor(pcb->pcb_save);
        }
-       critical_exit();
+}
 
+/*
+ * Implement device not available (DNA) exception
+ *
+ * It would be better to switch FP context here (if curthread != fpcurthread)
+ * and not necessarily for every context switch, but it is too hard to
+ * access foreign pcb's.
+ */
+int
+npxdna(void)
+{
+       struct thread *td;
+
+       if (!hw_float)
+               return (0);
+       td = curthread;
+       critical_enter();
+       if (__predict_false(PCPU_GET(fpcurthread) == td)) {
+               /*
+                * Some virtual machines seems to set %cr0.TS at
+                * arbitrary moments.  Silently clear the TS bit
+                * regardless of the eager/lazy FPU context switch
+                * mode.
+                */
+               stop_emulating();
+       } else {
+               if (__predict_false(PCPU_GET(fpcurthread) != NULL)) {
+                       printf(
+                   "npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
+                           PCPU_GET(fpcurthread),
+                           PCPU_GET(fpcurthread)->td_proc->p_pid,
+                           td, td->td_proc->p_pid);
+                       panic("npxdna");
+               }
+               restore_npx_curthread(td, td->td_pcb);
+       }
+       critical_exit();
        return (1);
 }
 
@@ -928,8 +943,20 @@ npxsave(addr)
        else
 #endif
                fpusave(addr);
-       start_emulating();
-       PCPU_SET(fpcurthread, NULL);
+}
+
+void npxswitch(struct thread *td, struct pcb *pcb);
+void
+npxswitch(struct thread *td, struct pcb *pcb)
+{
+
+       if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 ||
+           !PCB_USER_FPU(pcb)) {
+               start_emulating();
+               PCPU_SET(fpcurthread, NULL);
+       } else if (PCPU_GET(fpcurthread) != td) {
+               restore_npx_curthread(td, pcb);
+       }
 }
 
 /*
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to