Add a member fpu->cpu to struct fpu which contains which cpu has this fpu
register set loaded (or -1 if the registers were flushed to memory in
fpu->state).

The various fpu accesors are modified to IPI the loaded cpu if it
happens to be different from the current cpu.

Signed-off-by: Avi Kivity <a...@redhat.com>
---
 arch/x86/include/asm/i387.h      |  115 +++++++++++++++++++++++++++++++++++--
 arch/x86/include/asm/processor.h |    4 +
 arch/x86/kernel/i387.c           |    3 +
 arch/x86/kernel/process.c        |    1 +
 4 files changed, 116 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index df5badf..124c89d 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -174,7 +174,7 @@ static inline void fpu_fxsave(struct fpu *fpu)
 #endif
 }
 
-static inline void fpu_save_init(struct fpu *fpu)
+static inline void __fpu_save_init(struct fpu *fpu)
 {
        if (use_xsave())
                fpu_xsave(fpu);
@@ -222,10 +222,7 @@ static inline int fxrstor_checking(struct 
i387_fxsave_struct *fx)
 #define safe_address (kstat_cpu(0).cpustat.user)
 #endif
 
-/*
- * These must be called with preempt disabled
- */
-static inline void fpu_save_init(struct fpu *fpu)
+static inline void __fpu_save_init(struct fpu *fpu)
 {
        if (use_xsave()) {
                struct xsave_struct *xstate = &fpu->state->xsave;
@@ -273,6 +270,33 @@ end:
 
 #endif /* CONFIG_X86_64 */
 
+static inline bool fpu_loaded(struct fpu *fpu)
+{
+       return fpu->cpu == smp_processor_id();
+}
+
+static inline bool fpu_remote(struct fpu *fpu)
+{
+       return fpu->cpu != -1 && fpu->cpu != smp_processor_id();
+}
+
+/*
+ * These must be called with preempt disabled
+ */
+static inline void fpu_save_init(struct fpu *fpu)
+{
+       ulong flags;
+
+       if (__get_cpu_var(current_fpu) != fpu
+           || fpu->cpu != smp_processor_id())
+               return;
+       local_irq_save(flags);
+       __fpu_save_init(fpu);
+       fpu->cpu = -1;
+       __get_cpu_var(current_fpu) = NULL;
+       local_irq_restore(flags);
+}
+
 static inline void __save_init_fpu(struct task_struct *tsk)
 {
        fpu_save_init(&tsk->thread.fpu);
@@ -284,7 +308,7 @@ static inline int fpu_fxrstor_checking(struct fpu *fpu)
        return fxrstor_checking(&fpu->state->fxsave);
 }
 
-static inline int fpu_restore_checking(struct fpu *fpu)
+static inline int __fpu_restore_checking(struct fpu *fpu)
 {
        if (use_xsave())
                return fpu_xrstor_checking(fpu);
@@ -292,6 +316,47 @@ static inline int fpu_restore_checking(struct fpu *fpu)
                return fpu_fxrstor_checking(fpu);
 }
 
+static inline void __fpu_unload(void *_fpu)
+{
+       struct fpu *fpu = _fpu;
+       unsigned cr0 = read_cr0();
+
+       if (cr0 & X86_CR0_TS)
+               clts();
+       if (__get_cpu_var(current_fpu) == fpu)
+               fpu_save_init(fpu);
+       if (cr0 & X86_CR0_TS)
+               write_cr0(cr0);
+}
+
+static inline void fpu_unload(struct fpu *fpu)
+{
+       int cpu = ACCESS_ONCE(fpu->cpu);
+
+       if (cpu != -1)
+               smp_call_function_single(cpu, __fpu_unload, fpu, 1);
+}
+
+static inline int fpu_restore_checking(struct fpu *fpu)
+{
+       ulong flags;
+       struct fpu *oldfpu;
+       int ret;
+
+       if (fpu->cpu == smp_processor_id())
+               return 0;
+       fpu_unload(fpu);
+       local_irq_save(flags);
+       oldfpu = __get_cpu_var(current_fpu);
+       if (oldfpu)
+               fpu_save_init(oldfpu);
+       ret = __fpu_restore_checking(fpu);
+       fpu->cpu = smp_processor_id();
+       __get_cpu_var(current_fpu) = fpu;
+       local_irq_restore(flags);
+       return ret;
+}
+
 static inline int restore_fpu_checking(struct task_struct *tsk)
 {
        return fpu_restore_checking(&tsk->thread.fpu);
@@ -451,18 +516,46 @@ static bool fpu_allocated(struct fpu *fpu)
        return fpu->state != NULL;
 }
 
+static inline void fpu_init_empty(struct fpu *fpu)
+{
+       fpu->state = NULL;
+       fpu->cpu = -1;
+}
+
 static inline int fpu_alloc(struct fpu *fpu)
 {
        if (fpu_allocated(fpu))
                return 0;
        fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
+       fpu->cpu = -1;
        if (!fpu->state)
                return -ENOMEM;
        WARN_ON((unsigned long)fpu->state & 15);
        return 0;
 }
 
-static inline void fpu_free(struct fpu *fpu)
+static inline void __fpu_forget(void *_fpu)
+{
+       struct fpu *fpu = _fpu;
+
+       if (fpu->cpu == smp_processor_id()) {
+               fpu->cpu = -1;
+               __get_cpu_var(current_fpu) = NULL;
+       }
+}
+
+static inline void fpu_forget(struct fpu *fpu)
+{
+       int cpu;
+
+       preempt_disable();
+       cpu = ACCESS_ONCE(fpu->cpu);
+       if (cpu != -1)
+               smp_call_function_single(cpu, __fpu_forget, fpu, 1);
+       preempt_enable();
+}
+
+static inline void __fpu_free(struct fpu *fpu)
 {
        if (fpu->state) {
                kmem_cache_free(task_xstate_cachep, fpu->state);
@@ -470,8 +563,16 @@ static inline void fpu_free(struct fpu *fpu)
        }
 }
 
+static inline void fpu_free(struct fpu *fpu)
+{
+       fpu_forget(fpu);
+       __fpu_free(fpu);
+}
+
 static inline void fpu_copy(struct fpu *dst, struct fpu *src)
 {
+       fpu_unload(src);
+       fpu_unload(dst);
        memcpy(dst->state, src->state, xstate_size);
 }
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7e5c6a6..98996fe 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -378,8 +378,11 @@ union thread_xstate {
 
 struct fpu {
        union thread_xstate *state;
+       int cpu;            /* -1 = unloaded */
 };
 
+DECLARE_PER_CPU(struct fpu *, current_fpu);
+
 #ifdef CONFIG_X86_64
 DECLARE_PER_CPU(struct orig_ist, orig_ist);
 
@@ -892,6 +895,7 @@ static inline void spin_lock_prefetch(const void *x)
        .vm86_info              = NULL,                                   \
        .sysenter_cs            = __KERNEL_CS,                            \
        .io_bitmap_ptr          = NULL,                                   \
+       .fpu                    = { .cpu = -1, },                         \
 }
 
 /*
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index c4444bc..e56f486 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -38,6 +38,9 @@
 # define HAVE_HWFP             1
 #endif
 
+DEFINE_PER_CPU(struct fpu *, current_fpu);
+EXPORT_PER_CPU_SYMBOL_GPL(current_fpu);
+
 static unsigned int            mxcsr_feature_mask __read_mostly = 0xffffffffu;
 unsigned int xstate_size;
 unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ebcfcce..16a7a9b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -35,6 +35,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct 
task_struct *src)
        int ret;
 
        *dst = *src;
+       fpu_init_empty(&dst->thread.fpu);
        if (fpu_allocated(&src->thread.fpu)) {
                memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
                ret = fpu_alloc(&dst->thread.fpu);
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to