Author: kib Date: Fri Feb 3 12:20:44 2017 New Revision: 313150 URL: https://svnweb.freebsd.org/changeset/base/313150
Log: MFC r289894: CLFLUSH does not need barriers, the instruction is ordered WRT other writes. Use CLFLUSHOPT when available. MFC r312555: Use SFENCE for ordering CLFLUSHOPT. Modified: stable/10/sys/amd64/amd64/initcpu.c stable/10/sys/amd64/amd64/pmap.c stable/10/sys/amd64/include/cpufunc.h stable/10/sys/i386/i386/initcpu.c stable/10/sys/i386/i386/pmap.c stable/10/sys/i386/include/cpufunc.h Directory Properties: stable/10/ (props changed) Modified: stable/10/sys/amd64/amd64/initcpu.c ============================================================================== --- stable/10/sys/amd64/amd64/initcpu.c Fri Feb 3 12:13:55 2017 (r313149) +++ stable/10/sys/amd64/amd64/initcpu.c Fri Feb 3 12:20:44 2017 (r313150) @@ -253,12 +253,17 @@ initializecpucache(void) * CPUID_SS feature even though the native CPU supports it. */ TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable); - if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) + if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) { cpu_feature &= ~CPUID_CLFSH; + cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; + } + /* - * Allow to disable CLFLUSH feature manually by - * hw.clflush_disable tunable. + * The kernel's use of CLFLUSH{,OPT} can be disabled manually + * by setting the hw.clflush_disable tunable. */ - if (hw_clflush_disable == 1) + if (hw_clflush_disable == 1) { cpu_feature &= ~CPUID_CLFSH; + cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; + } } Modified: stable/10/sys/amd64/amd64/pmap.c ============================================================================== --- stable/10/sys/amd64/amd64/pmap.c Fri Feb 3 12:13:55 2017 (r313149) +++ stable/10/sys/amd64/amd64/pmap.c Fri Feb 3 12:20:44 2017 (r313150) @@ -1789,9 +1789,8 @@ pmap_invalidate_cache_range(vm_offset_t if ((cpu_feature & CPUID_SS) != 0 && !force) ; /* If "Self Snoop" is supported and allowed, do nothing. */ - else if ((cpu_feature & CPUID_CLFSH) != 0 && + else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 && eva - sva < PMAP_CLFLUSH_THRESHOLD) { - /* * XXX: Some CPUs fault, hang, or trash the local APIC * registers if we use CLFLUSH on the local APIC @@ -1802,16 +1801,29 @@ pmap_invalidate_cache_range(vm_offset_t return; /* - * Otherwise, do per-cache line flush. Use the mfence + * Otherwise, do per-cache line flush. Use the sfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache * coherence domain. */ - mfence(); + sfence(); + for (; sva < eva; sva += cpu_clflush_line_size) + clflushopt(sva); + sfence(); + } else if ((cpu_feature & CPUID_CLFSH) != 0 && + eva - sva < PMAP_CLFLUSH_THRESHOLD) { + if (pmap_kextract(sva) == lapic_paddr) + return; + /* + * Writes are ordered by CLFLUSH on Intel CPUs. + */ + if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflush(sva); - mfence(); + if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); } else { /* @@ -1835,19 +1847,31 @@ pmap_invalidate_cache_pages(vm_page_t *p { vm_offset_t daddr, eva; int i; + bool useclflushopt; + useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0; if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || - (cpu_feature & CPUID_CLFSH) == 0) + ((cpu_feature & CPUID_CLFSH) == 0 && !useclflushopt)) pmap_invalidate_cache(); else { - mfence(); + if (useclflushopt) + sfence(); + else if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); for (i = 0; i < count; i++) { daddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pages[i])); eva = daddr + PAGE_SIZE; - for (; daddr < eva; daddr += cpu_clflush_line_size) - clflush(daddr); + for (; daddr < eva; daddr += cpu_clflush_line_size) { + if (useclflushopt) + clflushopt(daddr); + else + clflush(daddr); + } } - mfence(); + if (useclflushopt) + sfence(); + else if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); } } Modified: stable/10/sys/amd64/include/cpufunc.h ============================================================================== --- stable/10/sys/amd64/include/cpufunc.h Fri Feb 3 12:13:55 2017 (r313149) +++ stable/10/sys/amd64/include/cpufunc.h Fri Feb 3 12:20:44 2017 (r313150) @@ -327,6 +327,13 @@ mfence(void) } static __inline void +sfence(void) +{ + + __asm __volatile("sfence" : : : "memory"); +} + +static __inline void ia32_pause(void) { __asm __volatile("pause"); Modified: stable/10/sys/i386/i386/initcpu.c ============================================================================== --- stable/10/sys/i386/i386/initcpu.c Fri Feb 3 12:13:55 2017 (r313149) +++ stable/10/sys/i386/i386/initcpu.c Fri Feb 3 12:20:44 2017 (r313150) @@ -826,14 +826,18 @@ initializecpucache(void) * CPUID_SS feature even though the native CPU supports it. */ TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable); - if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) + if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) { cpu_feature &= ~CPUID_CLFSH; + cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; + } /* - * Allow to disable CLFLUSH feature manually by - * hw.clflush_disable tunable. + * The kernel's use of CLFLUSH{,OPT} can be disabled manually + * by setting the hw.clflush_disable tunable. */ - if (hw_clflush_disable == 1) + if (hw_clflush_disable == 1) { cpu_feature &= ~CPUID_CLFSH; + cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; + } #if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE) /* Modified: stable/10/sys/i386/i386/pmap.c ============================================================================== --- stable/10/sys/i386/i386/pmap.c Fri Feb 3 12:13:55 2017 (r313149) +++ stable/10/sys/i386/i386/pmap.c Fri Feb 3 12:20:44 2017 (r313150) @@ -1222,9 +1222,8 @@ pmap_invalidate_cache_range(vm_offset_t if ((cpu_feature & CPUID_SS) != 0 && !force) ; /* If "Self Snoop" is supported and allowed, do nothing. */ - else if ((cpu_feature & CPUID_CLFSH) != 0 && + else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 && eva - sva < PMAP_CLFLUSH_THRESHOLD) { - #ifdef DEV_APIC /* * XXX: Some CPUs fault, hang, or trash the local APIC @@ -1236,16 +1235,29 @@ pmap_invalidate_cache_range(vm_offset_t return; #endif /* - * Otherwise, do per-cache line flush. Use the mfence + * Otherwise, do per-cache line flush. Use the sfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache * coherence domain. */ - mfence(); + sfence(); + for (; sva < eva; sva += cpu_clflush_line_size) + clflushopt(sva); + sfence(); + } else if ((cpu_feature & CPUID_CLFSH) != 0 && + eva - sva < PMAP_CLFLUSH_THRESHOLD) { + if (pmap_kextract(sva) == lapic_paddr) + return; + /* + * Writes are ordered by CLFLUSH on Intel CPUs. + */ + if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflush(sva); - mfence(); + if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); } else { /* @@ -5316,8 +5328,10 @@ pmap_flush_page(vm_page_t m) { struct sysmaps *sysmaps; vm_offset_t sva, eva; + bool useclflushopt; - if ((cpu_feature & CPUID_CLFSH) != 0) { + useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0; + if (useclflushopt || (cpu_feature & CPUID_CLFSH) != 0) { sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; mtx_lock(&sysmaps->lock); if (*sysmaps->CMAP2) @@ -5330,14 +5344,25 @@ pmap_flush_page(vm_page_t m) eva = sva + PAGE_SIZE; /* - * Use mfence despite the ordering implied by - * mtx_{un,}lock() because clflush is not guaranteed - * to be ordered by any other instruction. + * Use mfence or sfence despite the ordering implied by + * mtx_{un,}lock() because clflush on non-Intel CPUs + * and clflushopt are not guaranteed to be ordered by + * any other instruction. */ - mfence(); - for (; sva < eva; sva += cpu_clflush_line_size) - clflush(sva); - mfence(); + if (useclflushopt) + sfence(); + else if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); + for (; sva < eva; sva += cpu_clflush_line_size) { + if (useclflushopt) + clflushopt(sva); + else + clflush(sva); + } + if (useclflushopt) + sfence(); + else if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); *sysmaps->CMAP2 = 0; sched_unpin(); mtx_unlock(&sysmaps->lock); Modified: stable/10/sys/i386/include/cpufunc.h ============================================================================== --- stable/10/sys/i386/include/cpufunc.h Fri Feb 3 12:13:55 2017 (r313149) +++ stable/10/sys/i386/include/cpufunc.h Fri Feb 3 12:20:44 2017 (r313150) @@ -175,6 +175,13 @@ mfence(void) __asm __volatile("mfence" : : : "memory"); } +static __inline void +sfence(void) +{ + + __asm __volatile("sfence" : : : "memory"); +} + #ifdef _KERNEL #define HAVE_INLINE_FFS _______________________________________________ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"