Re: [kvm-devel] KVM swapping with mmu notifiers

Andrea Arcangeli Tue, 22 Jan 2008 09:18:17 -0800

On Tue, Jan 22, 2008 at 06:17:38PM +0200, Avi Kivity wrote:
> There can be more than one rmapp per hva.  Real world example:
>
>  memslot 1: gfn range 0xe000000 - 0xe0800000 @ hva 0x10000000 (8MB 
> framebuffer)
>  memslot 2: gfn range 0xa0000 - 0xa8000 @ hva 0x10000000 (32KB VGA window)
>
> If the guest accesses gfn 0xa0000 through one gva, and gfn 0xe0000000 
> through a second gva, then you will have two rmap chains for hva 
> 0x10000000.
>
> This doesn't happen today because we use the alias mechanism in qemu, but 
> we don't forbid it either.


Ok then it's better to be sure there will not be problems if alias are
removed. This should work fine with multiple rmap chains per hva too.

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 4086080..c527d7d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -18,6 +18,7 @@ config KVM
        tristate "Kernel-based Virtual Machine (KVM) support"
        depends on ARCH_SUPPORTS_KVM && EXPERIMENTAL
        select PREEMPT_NOTIFIERS
+       select MMU_NOTIFIER
        select ANON_INODES
        ---help---
          Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index cb62ef6..a025fde 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -532,6 +532,57 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
                kvm_flush_remote_tlbs(kvm);
 }
 
+static void unmap_spte(struct kvm *kvm, u64 *spte)
+{
+       struct page *page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> 
PAGE_SHIFT);
+       get_page(page);
+       rmap_remove(kvm, spte);
+       set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+       kvm_flush_remote_tlbs(kvm);
+       __free_page(page);
+}
+
+static void kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+       u64 *spte, *curr_spte;
+
+       spte = rmap_next(kvm, rmapp, NULL);
+       while (spte) {
+               BUG_ON(!(*spte & PT_PRESENT_MASK));
+               rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
+               curr_spte = spte;
+               spte = rmap_next(kvm, rmapp, spte);
+               unmap_spte(kvm, curr_spte);
+       }
+}
+
+void kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+       int i;
+
+       /*
+        * If mmap_sem isn't taken, we can look the memslots with only
+        * the mmu_lock by skipping over the slots with userspace_addr == 0.
+        */
+       spin_lock(&kvm->mmu_lock);
+       for (i = 0; i < kvm->nmemslots; i++) {
+               struct kvm_memory_slot *memslot = &kvm->memslots[i];
+               unsigned long start = memslot->userspace_addr;
+               unsigned long end;
+
+               /* mmu_lock protects userspace_addr */
+               if (!start)
+                       continue;
+
+               end = start + (memslot->npages << PAGE_SHIFT);
+               if (hva >= start && hva < end) {
+                       gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+                       kvm_unmap_rmapp(kvm, &memslot->rmap[gfn_offset]);
+               }
+       }
+       spin_unlock(&kvm->mmu_lock);
+}
+
 #ifdef MMU_DEBUG
 static int is_empty_shadow_page(u64 *spt)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f94a0b..5c445dd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3167,6 +3167,35 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
        free_page((unsigned long)vcpu->arch.pio_data);
 }
 
+static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
+{
+       return container_of(mn, struct kvm, mmu_notifier);
+}
+
+void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
+                                     struct mm_struct *mm,
+                                     unsigned long address)
+{
+       struct kvm *kvm = mmu_notifier_to_kvm(mn);
+       BUG_ON(mm != kvm->mm);
+       kvm_unmap_hva(kvm, address);
+}
+
+void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
+                                      struct mm_struct *mm,
+                                      unsigned long start, unsigned long end)
+{
+       for (; start < end; start += PAGE_SIZE)
+               kvm_mmu_notifier_invalidate_page(mn, mm, start);
+}
+
+static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
+       .invalidate_range       = kvm_mmu_notifier_invalidate_range,
+       .invalidate_page        = kvm_mmu_notifier_invalidate_page,
+       /* age page will drop the spte so follow_page will set the young bit */
+       .age_page               = kvm_mmu_notifier_invalidate_page,
+};
+
 struct  kvm *kvm_arch_create_vm(void)
 {
        struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
@@ -3175,6 +3204,7 @@ struct  kvm *kvm_arch_create_vm(void)
                return ERR_PTR(-ENOMEM);
 
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+       kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
 
        return kvm;
 }
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index d6db0de..f13d1c3 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -404,6 +404,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
 int kvm_mmu_setup(struct kvm_vcpu *vcpu);
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
 
+void kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
 void kvm_mmu_zap_all(struct kvm *kvm);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ea4764b..51c9bb8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -118,6 +118,7 @@ struct kvm {
        struct kvm_io_bus pio_bus;
        struct kvm_vm_stat stat;
        struct kvm_arch arch;
+       struct mmu_notifier mmu_notifier;
 };
 
 /* The guest did something we don't support. */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4295623..9e229ac 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -165,6 +165,7 @@ static struct kvm *kvm_create_vm(void)
 
        kvm->mm = current->mm;
        atomic_inc(&kvm->mm->mm_count);
+       mmu_notifier_register(&kvm->mmu_notifier, kvm->mm);
        spin_lock_init(&kvm->mmu_lock);
        kvm_io_bus_init(&kvm->pio_bus);
        mutex_init(&kvm->lock);

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Re: [kvm-devel] KVM swapping with mmu notifiers

Reply via email to