Author: kib
Date: Fri Feb  3 12:20:44 2017
New Revision: 313150
URL: https://svnweb.freebsd.org/changeset/base/313150

Log:
  MFC r289894:
  CLFLUSH does not need barriers, the instruction is ordered WRT other writes.
  Use CLFLUSHOPT when available.
  
  MFC r312555:
  Use SFENCE for ordering CLFLUSHOPT.

Modified:
  stable/10/sys/amd64/amd64/initcpu.c
  stable/10/sys/amd64/amd64/pmap.c
  stable/10/sys/amd64/include/cpufunc.h
  stable/10/sys/i386/i386/initcpu.c
  stable/10/sys/i386/i386/pmap.c
  stable/10/sys/i386/include/cpufunc.h
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/amd64/amd64/initcpu.c
==============================================================================
--- stable/10/sys/amd64/amd64/initcpu.c Fri Feb  3 12:13:55 2017        
(r313149)
+++ stable/10/sys/amd64/amd64/initcpu.c Fri Feb  3 12:20:44 2017        
(r313150)
@@ -253,12 +253,17 @@ initializecpucache(void)
         * CPUID_SS feature even though the native CPU supports it.
         */
        TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable);
-       if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1)
+       if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) {
                cpu_feature &= ~CPUID_CLFSH;
+               cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+       }
+
        /*
-        * Allow to disable CLFLUSH feature manually by
-        * hw.clflush_disable tunable.
+        * The kernel's use of CLFLUSH{,OPT} can be disabled manually
+        * by setting the hw.clflush_disable tunable.
         */
-       if (hw_clflush_disable == 1)
+       if (hw_clflush_disable == 1) {
                cpu_feature &= ~CPUID_CLFSH;
+               cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+       }
 }

Modified: stable/10/sys/amd64/amd64/pmap.c
==============================================================================
--- stable/10/sys/amd64/amd64/pmap.c    Fri Feb  3 12:13:55 2017        
(r313149)
+++ stable/10/sys/amd64/amd64/pmap.c    Fri Feb  3 12:20:44 2017        
(r313150)
@@ -1789,9 +1789,8 @@ pmap_invalidate_cache_range(vm_offset_t 
 
        if ((cpu_feature & CPUID_SS) != 0 && !force)
                ; /* If "Self Snoop" is supported and allowed, do nothing. */
-       else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+       else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
            eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-
                /*
                 * XXX: Some CPUs fault, hang, or trash the local APIC
                 * registers if we use CLFLUSH on the local APIC
@@ -1802,16 +1801,29 @@ pmap_invalidate_cache_range(vm_offset_t 
                        return;
 
                /*
-                * Otherwise, do per-cache line flush.  Use the mfence
+                * Otherwise, do per-cache line flush.  Use the sfence
                 * instruction to insure that previous stores are
                 * included in the write-back.  The processor
                 * propagates flush to other processors in the cache
                 * coherence domain.
                 */
-               mfence();
+               sfence();
+               for (; sva < eva; sva += cpu_clflush_line_size)
+                       clflushopt(sva);
+               sfence();
+       } else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+           eva - sva < PMAP_CLFLUSH_THRESHOLD) {
+               if (pmap_kextract(sva) == lapic_paddr)
+                       return;
+               /*
+                * Writes are ordered by CLFLUSH on Intel CPUs.
+                */
+               if (cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
                for (; sva < eva; sva += cpu_clflush_line_size)
                        clflush(sva);
-               mfence();
+               if (cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
        } else {
 
                /*
@@ -1835,19 +1847,31 @@ pmap_invalidate_cache_pages(vm_page_t *p
 {
        vm_offset_t daddr, eva;
        int i;
+       bool useclflushopt;
 
+       useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0;
        if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
-           (cpu_feature & CPUID_CLFSH) == 0)
+           ((cpu_feature & CPUID_CLFSH) == 0 && !useclflushopt))
                pmap_invalidate_cache();
        else {
-               mfence();
+               if (useclflushopt)
+                       sfence();
+               else if (cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
                for (i = 0; i < count; i++) {
                        daddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pages[i]));
                        eva = daddr + PAGE_SIZE;
-                       for (; daddr < eva; daddr += cpu_clflush_line_size)
-                               clflush(daddr);
+                       for (; daddr < eva; daddr += cpu_clflush_line_size) {
+                               if (useclflushopt)
+                                       clflushopt(daddr);
+                               else
+                                       clflush(daddr);
+                       }
                }
-               mfence();
+               if (useclflushopt)
+                       sfence();
+               else if (cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
        }
 }
 

Modified: stable/10/sys/amd64/include/cpufunc.h
==============================================================================
--- stable/10/sys/amd64/include/cpufunc.h       Fri Feb  3 12:13:55 2017        
(r313149)
+++ stable/10/sys/amd64/include/cpufunc.h       Fri Feb  3 12:20:44 2017        
(r313150)
@@ -327,6 +327,13 @@ mfence(void)
 }
 
 static __inline void
+sfence(void)
+{
+
+       __asm __volatile("sfence" : : : "memory");
+}
+
+static __inline void
 ia32_pause(void)
 {
        __asm __volatile("pause");

Modified: stable/10/sys/i386/i386/initcpu.c
==============================================================================
--- stable/10/sys/i386/i386/initcpu.c   Fri Feb  3 12:13:55 2017        
(r313149)
+++ stable/10/sys/i386/i386/initcpu.c   Fri Feb  3 12:20:44 2017        
(r313150)
@@ -826,14 +826,18 @@ initializecpucache(void)
         * CPUID_SS feature even though the native CPU supports it.
         */
        TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable);
-       if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1)
+       if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) {
                cpu_feature &= ~CPUID_CLFSH;
+               cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+       }
        /*
-        * Allow to disable CLFLUSH feature manually by
-        * hw.clflush_disable tunable.
+        * The kernel's use of CLFLUSH{,OPT} can be disabled manually
+        * by setting the hw.clflush_disable tunable.
         */
-       if (hw_clflush_disable == 1)
+       if (hw_clflush_disable == 1) {
                cpu_feature &= ~CPUID_CLFSH;
+               cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT;
+       }
 
 #if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE)
        /*

Modified: stable/10/sys/i386/i386/pmap.c
==============================================================================
--- stable/10/sys/i386/i386/pmap.c      Fri Feb  3 12:13:55 2017        
(r313149)
+++ stable/10/sys/i386/i386/pmap.c      Fri Feb  3 12:20:44 2017        
(r313150)
@@ -1222,9 +1222,8 @@ pmap_invalidate_cache_range(vm_offset_t 
 
        if ((cpu_feature & CPUID_SS) != 0 && !force)
                ; /* If "Self Snoop" is supported and allowed, do nothing. */
-       else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+       else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
            eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-
 #ifdef DEV_APIC
                /*
                 * XXX: Some CPUs fault, hang, or trash the local APIC
@@ -1236,16 +1235,29 @@ pmap_invalidate_cache_range(vm_offset_t 
                        return;
 #endif
                /*
-                * Otherwise, do per-cache line flush.  Use the mfence
+                * Otherwise, do per-cache line flush.  Use the sfence
                 * instruction to insure that previous stores are
                 * included in the write-back.  The processor
                 * propagates flush to other processors in the cache
                 * coherence domain.
                 */
-               mfence();
+               sfence();
+               for (; sva < eva; sva += cpu_clflush_line_size)
+                       clflushopt(sva);
+               sfence();
+       } else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+           eva - sva < PMAP_CLFLUSH_THRESHOLD) {
+               if (pmap_kextract(sva) == lapic_paddr)
+                       return;
+               /*
+                * Writes are ordered by CLFLUSH on Intel CPUs.
+                */
+               if (cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
                for (; sva < eva; sva += cpu_clflush_line_size)
                        clflush(sva);
-               mfence();
+               if (cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
        } else {
 
                /*
@@ -5316,8 +5328,10 @@ pmap_flush_page(vm_page_t m)
 {
        struct sysmaps *sysmaps;
        vm_offset_t sva, eva;
+       bool useclflushopt;
 
-       if ((cpu_feature & CPUID_CLFSH) != 0) {
+       useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0;
+       if (useclflushopt || (cpu_feature & CPUID_CLFSH) != 0) {
                sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
                mtx_lock(&sysmaps->lock);
                if (*sysmaps->CMAP2)
@@ -5330,14 +5344,25 @@ pmap_flush_page(vm_page_t m)
                eva = sva + PAGE_SIZE;
 
                /*
-                * Use mfence despite the ordering implied by
-                * mtx_{un,}lock() because clflush is not guaranteed
-                * to be ordered by any other instruction.
+                * Use mfence or sfence despite the ordering implied by
+                * mtx_{un,}lock() because clflush on non-Intel CPUs
+                * and clflushopt are not guaranteed to be ordered by
+                * any other instruction.
                 */
-               mfence();
-               for (; sva < eva; sva += cpu_clflush_line_size)
-                       clflush(sva);
-               mfence();
+               if (useclflushopt)
+                       sfence();
+               else if (cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
+               for (; sva < eva; sva += cpu_clflush_line_size) {
+                       if (useclflushopt)
+                               clflushopt(sva);
+                       else
+                               clflush(sva);
+               }
+               if (useclflushopt)
+                       sfence();
+               else if (cpu_vendor_id != CPU_VENDOR_INTEL)
+                       mfence();
                *sysmaps->CMAP2 = 0;
                sched_unpin();
                mtx_unlock(&sysmaps->lock);

Modified: stable/10/sys/i386/include/cpufunc.h
==============================================================================
--- stable/10/sys/i386/include/cpufunc.h        Fri Feb  3 12:13:55 2017        
(r313149)
+++ stable/10/sys/i386/include/cpufunc.h        Fri Feb  3 12:20:44 2017        
(r313150)
@@ -175,6 +175,13 @@ mfence(void)
        __asm __volatile("mfence" : : : "memory");
 }
 
+static __inline void
+sfence(void)
+{
+
+       __asm __volatile("sfence" : : : "memory");
+}
+
 #ifdef _KERNEL
 
 #define        HAVE_INLINE_FFS
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to