Author: kib
Date: Sun Sep  2 10:51:31 2018
New Revision: 338427
URL: https://svnweb.freebsd.org/changeset/base/338427

Log:
  MFC r338068, r338113:
  Update L1TF workaround to sustain L1D pollution from NMI.

Modified:
  stable/11/sys/amd64/amd64/exception.S
  stable/11/sys/amd64/amd64/support.S
  stable/11/sys/amd64/amd64/trap.c
  stable/11/sys/amd64/include/md_var.h
  stable/11/sys/amd64/vmm/intel/vmx.c
  stable/11/sys/amd64/vmm/intel/vmx_support.S
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/amd64/amd64/exception.S
==============================================================================
--- stable/11/sys/amd64/amd64/exception.S       Sat Sep  1 16:16:40 2018        
(r338426)
+++ stable/11/sys/amd64/amd64/exception.S       Sun Sep  2 10:51:31 2018        
(r338427)
@@ -848,7 +848,10 @@ nocallchain:
        movl    %edx,%eax
        shrq    $32,%rdx
        wrmsr
-       movq    %r13,%cr3
+       cmpb    $0, nmi_flush_l1d_sw(%rip)
+       je      2f
+       call    flush_l1d_sw            /* bhyve L1TF assist */
+2:     movq    %r13,%cr3
        RESTORE_REGS
        addq    $TF_RIP,%rsp
        jmp     doreti_iret

Modified: stable/11/sys/amd64/amd64/support.S
==============================================================================
--- stable/11/sys/amd64/amd64/support.S Sat Sep  1 16:16:40 2018        
(r338426)
+++ stable/11/sys/amd64/amd64/support.S Sun Sep  2 10:51:31 2018        
(r338427)
@@ -892,3 +892,36 @@ ENTRY(handle_ibrs_exit_rs)
 END(handle_ibrs_exit_rs)
 
        .noaltmacro
+
+/*
+ * Flush L1D cache.  Load enough of the data from the kernel text
+ * to flush existing L1D content.
+ *
+ * N.B. The function does not follow ABI calling conventions, it corrupts %rbx.
+ * The vmm.ko caller expects that only %rax, %rdx, %rbx, %rcx, %r9, and %rflags
+ * registers are clobbered.  The NMI handler caller only needs %r13 preserved.
+ */
+ENTRY(flush_l1d_sw)
+#define        L1D_FLUSH_SIZE  (64 * 1024)
+       movq    $KERNBASE, %r9
+       movq    $-L1D_FLUSH_SIZE, %rcx
+       /*
+        * pass 1: Preload TLB.
+        * Kernel text is mapped using superpages.  TLB preload is
+        * done for the benefit of older CPUs which split 2M page
+        * into 4k TLB entries.
+        */
+1:     movb    L1D_FLUSH_SIZE(%r9, %rcx), %al
+       addq    $PAGE_SIZE, %rcx
+       jne     1b
+       xorl    %eax, %eax
+       cpuid
+       movq    $-L1D_FLUSH_SIZE, %rcx
+       /* pass 2: Read each cache line. */
+2:     movb    L1D_FLUSH_SIZE(%r9, %rcx), %al
+       addq    $64, %rcx
+       jne     2b
+       lfence
+       ret
+#undef L1D_FLUSH_SIZE
+END(flush_l1d_sw)

Modified: stable/11/sys/amd64/amd64/trap.c
==============================================================================
--- stable/11/sys/amd64/amd64/trap.c    Sat Sep  1 16:16:40 2018        
(r338426)
+++ stable/11/sys/amd64/amd64/trap.c    Sun Sep  2 10:51:31 2018        
(r338427)
@@ -158,6 +158,20 @@ SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG
     "Print debugging information on trap signal to ctty");
 
 /*
+ * Control L1D flush on return from NMI.
+ *
+ * Tunable  can be set to the following values:
+ * 0 - only enable flush on return from NMI if required by vmm.ko (default)
+ * >1 - always flush on return from NMI.
+ *
+ * Post-boot, the sysctl indicates if flushing is currently enabled.
+ */
+int nmi_flush_l1d_sw;
+SYSCTL_INT(_machdep, OID_AUTO, nmi_flush_l1d_sw, CTLFLAG_RWTUN,
+    &nmi_flush_l1d_sw, 0,
+    "Flush L1 Data Cache on NMI exit, software bhyve L1TF mitigation assist");
+
+/*
  * Exception, fault, and trap interface to the FreeBSD kernel.
  * This common code is called from assembly language IDT gate entry
  * routines that prepare a suitable stack frame, and restore this

Modified: stable/11/sys/amd64/include/md_var.h
==============================================================================
--- stable/11/sys/amd64/include/md_var.h        Sat Sep  1 16:16:40 2018        
(r338426)
+++ stable/11/sys/amd64/include/md_var.h        Sun Sep  2 10:51:31 2018        
(r338427)
@@ -38,6 +38,7 @@ extern uint64_t       *vm_page_dump;
 extern int     hw_lower_amd64_sharedpage;
 extern int     hw_ibrs_disable;
 extern int     hw_ssb_disable;
+extern int     nmi_flush_l1d_sw;
 
 /*
  * The file "conf/ldscript.amd64" defines the symbol "kernphys".  Its

Modified: stable/11/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- stable/11/sys/amd64/vmm/intel/vmx.c Sat Sep  1 16:16:40 2018        
(r338426)
+++ stable/11/sys/amd64/vmm/intel/vmx.c Sun Sep  2 10:51:31 2018        
(r338427)
@@ -188,8 +188,11 @@ SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, 
 static int guest_l1d_flush;
 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush, CTLFLAG_RD,
     &guest_l1d_flush, 0, NULL);
+static int guest_l1d_flush_sw;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD,
+    &guest_l1d_flush_sw, 0, NULL);
 
-uint64_t vmx_msr_flush_cmd;
+static struct msr_entry msr_load_list[1] __aligned(16);
 
 /*
  * Use the last page below 4GB as the APIC access address. This address is
@@ -500,6 +503,9 @@ vmx_cleanup(void)
                vpid_unr = NULL;
        }
 
+       if (nmi_flush_l1d_sw == 1)
+               nmi_flush_l1d_sw = 0;
+
        smp_rendezvous(NULL, vmx_disable, NULL, NULL);
 
        return (0);
@@ -728,11 +734,30 @@ vmx_init(int ipinum)
 
        guest_l1d_flush = (cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) == 0;
        TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush);
-       if (guest_l1d_flush &&
-           (cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) != 0)
-               vmx_msr_flush_cmd = IA32_FLUSH_CMD_L1D;
 
        /*
+        * L1D cache flush is enabled.  Use IA32_FLUSH_CMD MSR when
+        * available.  Otherwise fall back to the software flush
+        * method which loads enough data from the kernel text to
+        * flush existing L1D content, both on VMX entry and on NMI
+        * return.
+        */
+       if (guest_l1d_flush) {
+               if ((cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) == 0) {
+                       guest_l1d_flush_sw = 1;
+                       TUNABLE_INT_FETCH("hw.vmm.l1d_flush_sw",
+                           &guest_l1d_flush_sw);
+               }
+               if (guest_l1d_flush_sw) {
+                       if (nmi_flush_l1d_sw <= 1)
+                               nmi_flush_l1d_sw = 1;
+               } else {
+                       msr_load_list[0].index = MSR_IA32_FLUSH_CMD;
+                       msr_load_list[0].val = IA32_FLUSH_CMD_L1D;
+               }
+       }
+
+       /*
         * Stash the cr0 and cr4 bits that must be fixed to 0 or 1
         */
        fixed0 = rdmsr(MSR_VMX_CR0_FIXED0);
@@ -920,6 +945,15 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
                error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls);
                error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap));
                error += vmwrite(VMCS_VPID, vpid[i]);
+
+               if (guest_l1d_flush && !guest_l1d_flush_sw) {
+                       vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract(
+                           (vm_offset_t)&msr_load_list[0]));
+                       vmcs_write(VMCS_ENTRY_MSR_LOAD_COUNT,
+                           nitems(msr_load_list));
+                       vmcs_write(VMCS_EXIT_MSR_STORE, 0);
+                       vmcs_write(VMCS_EXIT_MSR_STORE_COUNT, 0);
+               }
 
                /* exception bitmap */
                if (vcpu_trace_exceptions(vm, i))

Modified: stable/11/sys/amd64/vmm/intel/vmx_support.S
==============================================================================
--- stable/11/sys/amd64/vmm/intel/vmx_support.S Sat Sep  1 16:16:40 2018        
(r338426)
+++ stable/11/sys/amd64/vmm/intel/vmx_support.S Sun Sep  2 10:51:31 2018        
(r338427)
@@ -176,44 +176,10 @@ ENTRY(vmx_enter_guest)
        jbe     invept_error            /* Check invept instruction error */
 
 guest_restore:
-
-       /*
-        * Flush L1D cache if requested.  Use IA32_FLUSH_CMD MSR if available,
-        * otherwise load enough of the data from the zero_region to flush
-        * existing L1D content.
-        */
-#define        L1D_FLUSH_SIZE  (64 * 1024)
        movl    %edx, %r8d
-       cmpb    $0, guest_l1d_flush(%rip)
+       cmpb    $0, guest_l1d_flush_sw(%rip)
        je      after_l1d
-       movq    vmx_msr_flush_cmd(%rip), %rax
-       testq   %rax, %rax
-       jz      1f
-       movq    %rax, %rdx
-       shrq    $32, %rdx
-       movl    $MSR_IA32_FLUSH_CMD, %ecx
-       wrmsr
-       jmp     after_l1d
-1:     movq    $KERNBASE, %r9
-       movq    $-L1D_FLUSH_SIZE, %rcx
-       /*
-        * pass 1: Preload TLB.
-        * Kernel text is mapped using superpages.  TLB preload is
-        * done for the benefit of older CPUs which split 2M page
-        * into 4k TLB entries.
-        */
-2:     movb    L1D_FLUSH_SIZE(%r9, %rcx), %al
-       addq    $PAGE_SIZE, %rcx
-       jne     2b
-       xorl    %eax, %eax
-       cpuid
-       movq    $-L1D_FLUSH_SIZE, %rcx
-       /* pass 2: Read each cache line */
-3:     movb    L1D_FLUSH_SIZE(%r9, %rcx), %al
-       addq    $64, %rcx
-       jne     3b
-       lfence
-#undef L1D_FLUSH_SIZE
+       call    flush_l1d_sw
 after_l1d:
        cmpl    $0, %r8d
        je      do_launch
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to