All 64-bit CPUs have CLFLUSH. AMD introduced it in the K8, Intel in the P4
Willamette core prior to 64-bit support, and VIA/Centaur in the Isaiah.
Furthermore, the reported cacheline size is 64 on all CPUs to date.
Arguably changeset 19435c10abf7 ("x86: consolidate/enhance TLB flushing
interface", 2007) should have initialised c->x86_clflush_size earlier, but
even at the time of changeset 3330013e6739 ("VT-d / x86: re-arrange cache
syncing", 2022), early_cpu_init() had CLFLUSH-parsing logic but simply failed
to record the size.
By removing get_cache_line_size() and assuming 16 bytes, the practical
consequence for early IOMMU initialisation of SandyBridge era systems is to
flush every cacheline 4 times (a pipeline stall too, as those CPUs could only
have one flush in flight at a single time).
Record c->x86_clflush_size in early_cpu_init(), and panic() if CLFLUSH isn't
found. Drop the redundant initialisation of c->x86_cache_alignment.
Remove the fallback to 16 bytes in cache_{flush,writeback}(), opting instead
for an ASSERT() to confirm that the logic hasn't been re-arranged too early.
Fixes: 3330013e6739 ("VT-d / x86: re-arrange cache syncing")
Signed-off-by: Andrew Cooper <[email protected]>
---
CC: Jan Beulich <[email protected]>
CC: Roger Pau Monné <[email protected]>
CC: Julian Vetter <[email protected]>
CC: Teddy Astie <[email protected]>
---
xen/arch/x86/cpu/common.c | 7 ++++---
xen/arch/x86/flushtlb.c | 19 +++++++------------
2 files changed, 11 insertions(+), 15 deletions(-)
diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index ebe2baf8b98a..f8c80db6eb1d 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -319,8 +319,6 @@ void __init early_cpu_init(bool verbose)
uint64_t val;
u32 eax, ebx, ecx, edx;
- c->x86_cache_alignment = 32;
-
/* Get vendor name */
cpuid(0x00000000, &c->cpuid_level, &ebx, &ecx, &edx);
*(u32 *)&c->x86_vendor_id[0] = ebx;
@@ -352,6 +350,7 @@ void __init early_cpu_init(bool verbose)
if (edx & cpufeat_mask(X86_FEATURE_CLFLUSH)) {
unsigned int size = ((ebx >> 8) & 0xff) * 8;
+ c->x86_clflush_size = size;
c->x86_cache_alignment = size;
/*
@@ -380,7 +379,9 @@ void __init early_cpu_init(bool verbose)
}
else
setup_clear_cpu_cap(X86_FEATURE_CLZERO);
- }
+ } else
+ panic("CLFLUSH information not available\n");
+
/* Leaf 0x1 capabilities filled in early for Xen. */
c->x86_capability[FEATURESET_1d] = edx;
c->x86_capability[FEATURESET_1c] = ecx;
diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
index 23721bb52c90..1f8877dcab23 100644
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -234,8 +234,7 @@ unsigned int flush_area_local(const void *va, unsigned int
flags)
if ( (!(flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL)) ||
(flags & FLUSH_VA_VALID)) &&
- c->x86_clflush_size && c->x86_cache_size && sz &&
- ((sz >> 10) < c->x86_cache_size) )
+ c->x86_cache_size && sz && ((sz >> 10) < c->x86_cache_size) )
{
if ( flags & FLUSH_CACHE_EVICT )
cache_flush(va, sz);
@@ -264,13 +263,11 @@ unsigned int flush_area_local(const void *va, unsigned
int flags)
*/
void cache_flush(const void *addr, unsigned int size)
{
- /*
- * This function may be called before current_cpu_data is established.
- * Hence a fallback is needed to prevent the loop below becoming infinite.
- */
- unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
+ unsigned int clflush_size = current_cpu_data.x86_clflush_size;
const void *end = addr + size;
+ ASSERT(clflush_size);
+
alternative("", "mfence", X86_BUG_CLFLUSH_MFENCE);
addr -= (unsigned long)addr & (clflush_size - 1);
@@ -301,11 +298,9 @@ void cache_writeback(const void *addr, unsigned int size)
if ( !boot_cpu_has(X86_FEATURE_CLWB) )
return cache_flush(addr, size);
- /*
- * This function may be called before current_cpu_data is established.
- * Hence a fallback is needed to prevent the loop below becoming infinite.
- */
- clflush_size = current_cpu_data.x86_clflush_size ?: 16;
+ clflush_size = current_cpu_data.x86_clflush_size;
+ ASSERT(clflush_size);
+
addr -= (unsigned long)addr & (clflush_size - 1);
for ( ; addr < end; addr += clflush_size )
clwb(addr);
--
2.39.5