Hi Andrea,

> path for kvm so I guess not worth optimizing in the short/mid term,
> but by optimizing the invalidate_page case we may halve the number of
> tlb flushes for some common case. I leave it for later, the swapping
> is heavily I/O bound anyway so a some more ipi in smp host shouldn't
> be very measurable (on UP host it makes no difference to flush
> multiple times in practice).
> 
> Signed-off-by: Andrea Arcangeli <[EMAIL PROTECTED]>
> 
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index 4086080..c527d7d 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -18,6 +18,7 @@ config KVM
>       tristate "Kernel-based Virtual Machine (KVM) support"
>       depends on ARCH_SUPPORTS_KVM && EXPERIMENTAL
>       select PREEMPT_NOTIFIERS
> +     select MMU_NOTIFIER
>       select ANON_INODES
>       ---help---
>         Support hosting fully virtualized guest machines using hardware
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 324ff9a..103c270 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -532,6 +532,36 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
>               kvm_flush_remote_tlbs(kvm);
>  }
>  
> +static void unmap_spte(struct kvm *kvm, u64 *spte)
> +{
> +     struct page *page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> 
> PAGE_SHIFT);
> +     get_page(page);
> +     rmap_remove(kvm, spte);
> +     set_shadow_pte(spte, shadow_trap_nonpresent_pte);
> +     kvm_flush_remote_tlbs(kvm);
> +     __free_page(page);
> +}
> +
> +void kvm_rmap_unmap_gfn(struct kvm *kvm, gfn_t gfn)
> +{
> +     unsigned long *rmapp;
> +     u64 *spte, *curr_spte;
> +
> +     spin_lock(&kvm->mmu_lock);
> +     gfn = unalias_gfn(kvm, gfn);
> +     rmapp = gfn_to_rmap(kvm, gfn);

The alias and memslot maps are protected only by mmap_sem, so you
should make kvm_set_memory_region/set_memory_alias grab the mmu spinlock
in addition to mmap_sem in write mode.

kvm_mmu_zap_all() grabs the mmu lock.. that should probably move up into
the caller.

> +
> +     spte = rmap_next(kvm, rmapp, NULL);
> +     while (spte) {
> +             BUG_ON(!(*spte & PT_PRESENT_MASK));
> +             rmap_printk("rmap_swap_page: spte %p %llx\n", spte, *spte);
> +             curr_spte = spte;
> +             spte = rmap_next(kvm, rmapp, spte);
> +             unmap_spte(kvm, curr_spte);
> +     }
> +     spin_unlock(&kvm->mmu_lock);
> +}
> +
>  #ifdef MMU_DEBUG
>  static int is_empty_shadow_page(u64 *spt)
>  {
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8a90403..e9a3f6e 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3159,6 +3159,36 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
>       free_page((unsigned long)vcpu->arch.pio_data);
>  }
>  
> +static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
> +{
> +     return container_of(mn, struct kvm, mmu_notifier);
> +}
> +
> +void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
> +                                   struct mm_struct *mm,
> +                                   unsigned long address)
> +{
> +     struct kvm *kvm = mmu_notifier_to_kvm(mn);
> +     gfn_t gfn = hva_to_gfn(kvm, address);
> +     BUG_ON(mm != kvm->mm);
> +     if (gfn == -1UL)
> +             return;
> +     kvm_rmap_unmap_gfn(kvm, gfn);

And then you also need to cover "hva_to_gfn()" to happen under the lock.

> +}
> +
> +void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
> +                                    struct mm_struct *mm,
> +                                    unsigned long start, unsigned long end)
> +{
> +     for (; start < end; start += PAGE_SIZE)
> +             kvm_mmu_notifier_invalidate_page(mn, mm, start);
> +}
> +
> +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
> +     .invalidate_range       = kvm_mmu_notifier_invalidate_range,
> +     .invalidate_page        = kvm_mmu_notifier_invalidate_page,
> +};
> +
>  struct  kvm *kvm_arch_create_vm(void)
>  {
>       struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
> @@ -3167,6 +3197,7 @@ struct  kvm *kvm_arch_create_vm(void)
>               return ERR_PTR(-ENOMEM);
>  
>       INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
> +     kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
>  
>       return kvm;
>  }
> diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> index d6db0de..feacd77 100644
> --- a/include/asm-x86/kvm_host.h
> +++ b/include/asm-x86/kvm_host.h
> @@ -404,6 +404,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
>  int kvm_mmu_setup(struct kvm_vcpu *vcpu);
>  void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
>  
> +void kvm_rmap_unmap_gfn(struct kvm *kvm, gfn_t gfn);
>  int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
>  void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
>  void kvm_mmu_zap_all(struct kvm *kvm);
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 2714068..85da7fa 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -117,6 +117,7 @@ struct kvm {
>       struct kvm_io_bus pio_bus;
>       struct kvm_vm_stat stat;
>       struct kvm_arch arch;
> +     struct mmu_notifier mmu_notifier;
>  };
>  
>  /* The guest did something we don't support. */
> @@ -163,6 +164,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
>                               struct kvm_memory_slot old,
>                               int user_alloc);
>  gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
> +gfn_t hva_to_gfn(struct kvm *kvm, unsigned long addr);
>  struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
>  void kvm_release_page_clean(struct page *page);
>  void kvm_release_page_dirty(struct page *page);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 4295623..8f1dd86 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -165,6 +165,7 @@ static struct kvm *kvm_create_vm(void)
>  
>       kvm->mm = current->mm;
>       atomic_inc(&kvm->mm->mm_count);
> +     mmu_notifier_register(&kvm->mmu_notifier, kvm->mm);
>       spin_lock_init(&kvm->mmu_lock);
>       kvm_io_bus_init(&kvm->pio_bus);
>       mutex_init(&kvm->lock);
> @@ -454,6 +455,23 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t 
> gfn)
>       return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
>  }
>  
> +gfn_t hva_to_gfn(struct kvm *kvm, unsigned long addr)
> +{
> +     int i;
> +
> +     for (i = 0; i < kvm->nmemslots; i++) {
> +             struct kvm_memory_slot *memslot = &kvm->memslots[i];
> +             unsigned long start = memslot->userspace_addr;
> +             unsigned long end = start + (memslot->npages << PAGE_SHIFT);
> +
> +             if (addr >= start && addr < end) {
> +                     gfn_t gfn_offset = (addr - start) >> PAGE_SHIFT;
> +                     return memslot->base_gfn + gfn_offset;
> +             }
> +     }
> +     return -1UL;
> +}
> +
>  /*
>   * Requires current->mm->mmap_sem to be held
>   */
> 
> 
> 
> And here a compatibility patch to kvm-userland so the external module
> still compile and runs with older kernels w/o MMU_NOTIFIER patch applied.
> 
> Signed-off-by: Andrea Arcangeli <[EMAIL PROTECTED]>
> 
> diff --git a/kernel/external-module-compat.h b/kernel/external-module-compat.h
> index 67b9cc4..34ef0a5 100644
> --- a/kernel/external-module-compat.h
> +++ b/kernel/external-module-compat.h
> @@ -17,6 +17,28 @@
>  #include <linux/hrtimer.h>
>  #include <asm/bitops.h>
>  
> +#ifndef CONFIG_MMU_NOTIFIER
> +struct mmu_notifier;
> +
> +struct mmu_notifier_ops {
> +     void (*release)(struct mmu_notifier * mn,
> +                     struct mm_struct *mm);
> +     void (*invalidate_page)(struct mmu_notifier * mn,
> +                             struct mm_struct *mm,
> +                             unsigned long address);
> +     void (*invalidate_range)(struct mmu_notifier * mn,
> +                              struct mm_struct *mm,
> +                              unsigned long start, unsigned long end);
> +};
> +
> +struct mmu_notifier {
> +     const struct mmu_notifier_ops *ops;
> +};
> +#define mmu_notifier_register(mn, mm) do {} while(0)
> +#define mmu_notifier_unregister(mn) do {} while (0)
> +#define mmu_notifier_release(mm) do {} while (0)
> +#endif
> +
>  /*
>   * 2.6.16 does not have GFP_NOWAIT
>   */
> 
> 
> Here another patch for kvm-userland where I can't see symmetry between
> the lack of atomic_inc despite mmdrop is still run. I can't possibly
> see how this is supposedly not required when compiled into the kernel
> vs external module. Either atomic_inc is needed in both or none. Even
> if I'm right still this bug wasn't destabilizing because
> atomic_inc_and_dec only fires once in the overflow check, so it
> shouldn't matter to run one mmdrop more than needed, but it's good for
> correctness.
> 
> Signed-off-by: Andrea Arcangeli <[EMAIL PROTECTED]>
> 
> diff --git a/kernel/hack-module.awk b/kernel/hack-module.awk
> index 5187c96..884bc50 100644
> --- a/kernel/hack-module.awk
> +++ b/kernel/hack-module.awk
> @@ -33,8 +33,6 @@
>      vmx_load_host_state = 0
>  }
>  
> -/atomic_inc\(&kvm->mm->mm_count\);/ { $0 = "//" $0 }
> -
>  /^\t\.fault = / {
>      fcn = gensub(/,/, "", "g", $3)
>      $0 = "\t.VMA_OPS_FAULT(fault) = VMA_OPS_FAULT_FUNC(" fcn "),"
> 
> 
> I'll post the mmu-notifiers patch (required in the host kernel to run
> the above) separately in CC with more mailing lists because that's not
> KVM code at all and we hope to get it merged in the mainline kernel
> soon after getting feedback on the interface from the other users of
> the mmu notifiers.
> 
> Thanks!
> Andrea
> 
> -------------------------------------------------------------------------
> Check out the new SourceForge.net Marketplace.
> It's the best place to buy or sell services for
> just about anything Open Source.
> http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
> _______________________________________________
> kvm-devel mailing list
> kvm-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/kvm-devel

-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to