> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
> Sent: Friday, April 7, 2017 4:27 AM
> To: de...@linuxdriverproject.org; x...@kernel.org
> Cc: linux-ker...@vger.kernel.org; KY Srinivasan <k...@microsoft.com>;
> Haiyang Zhang <haiya...@microsoft.com>; Stephen Hemminger
> <sthem...@microsoft.com>; Thomas Gleixner <t...@linutronix.de>; Ingo
> Molnar <mi...@redhat.com>; H. Peter Anvin <h...@zytor.com>; Steven
> Rostedt <rost...@goodmis.org>; Jork Loeser <jork.loe...@microsoft.com>
> Subject: [PATCH 6/7] x86/hyper-v: use hypercall for remove TLB flush
> 
> Hyper-V host can suggest us to use hypercall for doing remote TLB flush,
> this is supposed to work faster than IPIs.
> 
> Implementation details: to do HvFlushVirtualAddress{Space,List} hypercalls
> we need to put the input somewhere in memory and we don't really want to
> have memory allocation on each call so we pre-allocate per cpu memory
> areas
> on boot. These areas are of fixes size, limit them with an arbitrary number
> of 16 (16 gvas are able to specify 16 * 4096 pages).
> 
> pv_ops patching is happening very early so we need to separate
> hyperv_setup_mmu_ops() and hyper_alloc_mmu().
> 
> It is possible and easy to implement local TLB flushing too and there is
> even a hint for that. However, I don't see a room for optimization on the
> host side as both hypercall and native tlb flush will result in vmexit. The
> hint is also not set on modern Hyper-V versions.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com>
> ---
>  arch/x86/hyperv/Makefile           |   2 +-
>  arch/x86/hyperv/hv_init.c          |   2 +
>  arch/x86/hyperv/mmu.c              | 128
> +++++++++++++++++++++++++++++++++++++
>  arch/x86/include/asm/mshyperv.h    |   2 +
>  arch/x86/include/uapi/asm/hyperv.h |   7 ++
>  arch/x86/kernel/cpu/mshyperv.c     |   1 +
>  6 files changed, 141 insertions(+), 1 deletion(-)
>  create mode 100644 arch/x86/hyperv/mmu.c
> 
> diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
> index 171ae09..367a820 100644
> --- a/arch/x86/hyperv/Makefile
> +++ b/arch/x86/hyperv/Makefile
> @@ -1 +1 @@
> -obj-y                := hv_init.o
> +obj-y                := hv_init.o mmu.o
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 1c14088..2cf8a98 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -163,6 +163,8 @@ void hyperv_init(void)
>       hypercall_msr.guest_physical_address =
> vmalloc_to_pfn(hv_hypercall_pg);
>       wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> 
> +     hyper_alloc_mmu();
> +
>       /*
>        * Register Hyper-V specific clocksource.
>        */
> diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
> new file mode 100644
> index 0000000..fb487cb
> --- /dev/null
> +++ b/arch/x86/hyperv/mmu.c
> @@ -0,0 +1,128 @@
> +#include <linux/types.h>
> +#include <linux/hyperv.h>
> +#include <linux/slab.h>
> +#include <asm/mshyperv.h>
> +#include <asm/tlbflush.h>
> +#include <asm/msr.h>
> +#include <asm/fpu/api.h>
> +
> +/*
> + * Arbitrary number; we need to pre-allocate per-cpu struct for doing TLB
> + * flush hypercalls and we need to pick a size. '16' means we'll be able
> + * to flush 16 * 4096 pages (256MB) with one hypercall.
> + */
> +#define HV_MMU_MAX_GVAS 16

Did you experiment with different sizes here.
> +
> +/* HvFlushVirtualAddressSpace*, HvFlushVirtualAddressList hypercalls */
> +struct hv_flush_pcpu {
> +     struct {
> +             __u64 address_space;
> +             __u64 flags;
> +             __u64 processor_mask;
> +             __u64 gva_list[HV_MMU_MAX_GVAS];
> +     } flush;
> +
> +     spinlock_t lock;
> +};
> +
We may be supporting more than 64 CPUs in this hypercall. I am going to inquire 
with
the Windows folks and get back to you.

> +static struct hv_flush_pcpu __percpu *pcpu_flush;
> +
> +static void hyperv_flush_tlb_others(const struct cpumask *cpus,
> +                                 struct mm_struct *mm, unsigned long
> start,
> +                                 unsigned long end)
> +{
> +     struct hv_flush_pcpu *flush;
> +     unsigned long cur, flags;
> +     u64 status = -1ULL;
> +     int cpu, vcpu, gva_n;
> +
> +     if (!pcpu_flush || !hv_hypercall_pg)
> +             goto do_native;
> +
> +     if (cpumask_empty(cpus))
> +             return;
> +
> +     flush = this_cpu_ptr(pcpu_flush);
> +     spin_lock_irqsave(&flush->lock, flags);
> +
> +     flush->flush.address_space = virt_to_phys(mm->pgd);
> +     flush->flush.processor_mask = 0;
> +     if (cpumask_equal(cpus, cpu_present_mask)) {
> +             flush->flush.flags = HV_FLUSH_ALL_PROCESSORS;
> +     } else {
> +             flush->flush.flags = 0;
> +             for_each_cpu(cpu, cpus) {
> +                     vcpu = vmbus_cpu_number_to_vp_number(cpu);
> +                     if (vcpu != -1 && vcpu < 64)
> +                             flush->flush.processor_mask |= 1 << vcpu;
> +                     else
> +                             goto unlock_do_native;
> +             }
> +     }
> +
> +     if (end == TLB_FLUSH_ALL) {
> +             flush->flush.flags =
> HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
> +             status =
> hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
> +                                      &flush->flush, NULL);
> +     } else {
> +             cur = start;
> +more_gvas:
> +             gva_n = 0;
> +
> +             do {
> +                     flush->flush.gva_list[gva_n] = cur & PAGE_MASK;
> +                     /*
> +                      * Lower 12 bits encode the number of additional
> +                      * pages to flush (in addition to the 'cur' page).
> +                      */
> +                     if (end >= cur + PAGE_SIZE * PAGE_SIZE)
> +                             flush->flush.gva_list[gva_n] |=
> ~PAGE_MASK;
> +                     else if (end > cur)
> +                             flush->flush.gva_list[gva_n] |=
> +                                     (end - cur - 1) >> PAGE_SHIFT;
> +
> +                     cur += PAGE_SIZE * PAGE_SIZE;
> +                     ++gva_n;
> +
> +             } while (cur < end && gva_n < HV_MMU_MAX_GVAS);
> +
> +             status =
> hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
> +                                          gva_n, &flush->flush, NULL);
> +
> +             if (!(status & 0xffff) && cur < end)
> +                     goto more_gvas;
> +     }
> +
> +unlock_do_native:
> +     spin_unlock_irqrestore(&flush->lock, flags);
> +
> +     if (!(status & 0xffff))
> +             return;
> +do_native:
> +     native_flush_tlb_others(cpus, mm, start, end);
> +}
> +
> +void hyperv_setup_mmu_ops(void)
> +{
> +     if (ms_hyperv.hints &
> HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED) {
> +             pr_info("Hyper-V: Using hypercall for remote TLB flush\n");
> +             pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
> +     }
> +}
> +
> +void hyper_alloc_mmu(void)
> +{
> +     int cpu;
> +     struct hv_flush_pcpu *flush;
> +
> +     if (ms_hyperv.hints &
> HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED) {
> +             pcpu_flush = alloc_percpu(struct hv_flush_pcpu);
> +             if (!pcpu_flush)
> +                     return;
> +
> +             for_each_possible_cpu(cpu) {
> +                     flush = per_cpu_ptr(pcpu_flush, cpu);
> +                     spin_lock_init(&flush->lock);
> +             }
> +     }
> +}
> diff --git a/arch/x86/include/asm/mshyperv.h
> b/arch/x86/include/asm/mshyperv.h
> index 1293c84..a5041c3 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -301,6 +301,8 @@ static inline int
> vmbus_cpu_number_to_vp_number(int cpu_number)
>  }
> 
>  void hyperv_init(void);
> +void hyperv_setup_mmu_ops(void);
> +void hyper_alloc_mmu(void);
>  void hyperv_report_panic(struct pt_regs *regs);
>  bool hv_is_hypercall_page_setup(void);
>  void hyperv_cleanup(void);
> diff --git a/arch/x86/include/uapi/asm/hyperv.h
> b/arch/x86/include/uapi/asm/hyperv.h
> index c87e900..3d44036 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -239,6 +239,8 @@
>               (~((1ull <<
> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
> 
>  /* Declare the various hypercall operations. */
> +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE   0x0002
> +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST    0x0003
>  #define HVCALL_NOTIFY_LONG_SPIN_WAIT         0x0008
>  #define HVCALL_POST_MESSAGE                  0x005c
>  #define HVCALL_SIGNAL_EVENT                  0x005d
> @@ -256,6 +258,11 @@
>  #define HV_PROCESSOR_POWER_STATE_C2          2
>  #define HV_PROCESSOR_POWER_STATE_C3          3
> 
> +#define HV_FLUSH_ALL_PROCESSORS                      0x00000001
> +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES  0x00000002
> +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY    0x00000004
> +#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT   0x00000008
> +
>  /* Hypercall interface */
>  union hv_hypercall_input {
>       u64 as_uint64;
> diff --git a/arch/x86/kernel/cpu/mshyperv.c
> b/arch/x86/kernel/cpu/mshyperv.c
> index 04cb8d3..fc228d8 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -233,6 +233,7 @@ static void __init ms_hyperv_init_platform(void)
>        * Setup the hook to get control post apic initialization.
>        */
>       x86_platform.apic_post_init = hyperv_init;
> +     hyperv_setup_mmu_ops();
>  #endif
>  }
> 
> --
> 2.9.3

_______________________________________________
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

Reply via email to