I noticed high latencies caused by a daemon periodically reading
various MSR on all cpus. KASAN kernels would see ~10ms latencies
simply reading one MSR. Even without KASAN, sending IPI to CPU
in deep sleep state or blocking hard IRQ in a a long section,
then waiting for the answer can consume hundreds of usec.

Converts rdmsr_safe_on_cpu() to use a completion instead
of busy polling.

Overall daemon cpu usage was reduced by 35 %,
and latencies caused by msr_read() disappeared.

Signed-off-by: Eric Dumazet <eduma...@google.com>
Cc: "H. Peter Anvin" <h...@zytor.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: Borislav Petkov <b...@alien8.de>
Cc: Hugh Dickins <hu...@google.com>
---
 arch/x86/lib/msr-smp.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
index 
693cce0be82dffb822cecd0c7e38d2821aff896c..761ba062afdaf7f7d0603ed94ed6cc3e46b37f76
 100644
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -2,6 +2,7 @@
 #include <linux/export.h>
 #include <linux/preempt.h>
 #include <linux/smp.h>
+#include <linux/completion.h>
 #include <asm/msr.h>
 
 static void __rdmsr_on_cpu(void *info)
@@ -143,13 +144,19 @@ void wrmsr_on_cpus(const struct cpumask *mask, u32 
msr_no, struct msr *msrs)
 }
 EXPORT_SYMBOL(wrmsr_on_cpus);
 
+struct msr_info_completion {
+       struct msr_info         msr;
+       struct completion       done;
+};
+
 /* These "safe" variants are slower and should be used when the target MSR
    may not actually exist. */
 static void __rdmsr_safe_on_cpu(void *info)
 {
-       struct msr_info *rv = info;
+       struct msr_info_completion *rv = info;
 
-       rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h);
+       rv->msr.err = rdmsr_safe(rv->msr.msr_no, &rv->msr.reg.l, 
&rv->msr.reg.h);
+       complete(&rv->done);
 }
 
 static void __wrmsr_safe_on_cpu(void *info)
@@ -161,17 +168,26 @@ static void __wrmsr_safe_on_cpu(void *info)
 
 int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
+       struct msr_info_completion rv;
+       call_single_data_t csd = {
+               .func   = __rdmsr_safe_on_cpu,
+               .info   = &rv,
+       };
        int err;
-       struct msr_info rv;
 
        memset(&rv, 0, sizeof(rv));
+       init_completion(&rv.done);
+       rv.msr.msr_no = msr_no;
 
-       rv.msr_no = msr_no;
-       err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
-       *l = rv.reg.l;
-       *h = rv.reg.h;
+       err = smp_call_function_single_async(cpu, &csd);
+       if (!err) {
+               wait_for_completion(&rv.done);
+               err = rv.msr.err;
+       }
+       *l = rv.msr.reg.l;
+       *h = rv.msr.reg.h;
 
-       return err ? err : rv.err;
+       return err;
 }
 EXPORT_SYMBOL(rdmsr_safe_on_cpu);
 
-- 
2.17.0.rc0.231.g781580f067-goog

Reply via email to