Le 31/05/2020 à 04:27, Ram Pai a écrit :
During the life of SVM, its GFNs can transition from secure to shared
state and vice-versa. Since the kernel does not track GFNs that are
shared, it is not possible to disambiguate a shared GFN from a GFN whose
PFN has not yet been migrated to a device-PFN.

The ability to identify a shared GFN is needed to skip migrating its PFN
to device PFN. This functionality is leveraged in a subsequent patch.

Add the ability to identify the state of a GFN.

Cc: Paul Mackerras <pau...@ozlabs.org>
Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Cc: Michael Ellerman <m...@ellerman.id.au>
Cc: Bharata B Rao <bhar...@linux.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.ku...@linux.ibm.com>
Cc: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
Cc: Laurent Dufour <lduf...@linux.ibm.com>
Cc: Thiago Jung Bauermann <bauer...@linux.ibm.com>
Cc: David Gibson <da...@gibson.dropbear.id.au>
Cc: Claudio Carvalho <cclau...@linux.ibm.com>
Cc: kvm-...@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Reviewed-by: Thiago Jung Bauermann <bauer...@linux.ibm.com>
Signed-off-by: Ram Pai <linux...@us.ibm.com>
---
  arch/powerpc/include/asm/kvm_book3s_uvmem.h |   6 +-
  arch/powerpc/kvm/book3s_64_mmu_radix.c      |   2 +-
  arch/powerpc/kvm/book3s_hv.c                |   2 +-
  arch/powerpc/kvm/book3s_hv_uvmem.c          | 115 ++++++++++++++++++++++++++--
  4 files changed, 113 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h 
b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
index 5a9834e..f0c5708 100644
--- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h
+++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
@@ -21,7 +21,8 @@ unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
  int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn);
  unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm);
  void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
-                            struct kvm *kvm, bool skip_page_out);
+                            struct kvm *kvm, bool skip_page_out,
+                            bool purge_gfn);
  #else
  static inline int kvmppc_uvmem_init(void)
  {
@@ -75,6 +76,7 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, 
unsigned long gfn)
static inline void
  kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
-                       struct kvm *kvm, bool skip_page_out) { }
+                       struct kvm *kvm, bool skip_page_out,
+                       bool purge_gfn) { }
  #endif /* CONFIG_PPC_UV */
  #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 803940d..3448459 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -1100,7 +1100,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
        unsigned int shift;
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
-               kvmppc_uvmem_drop_pages(memslot, kvm, true);
+               kvmppc_uvmem_drop_pages(memslot, kvm, true, false);

Why purge_gfn is false here?
That call function is called when dropping an hot plugged memslot.
That's being said, when called by kvmppc_core_commit_memory_region_hv(), the mem slot is then free by kvmppc_uvmem_slot_free() so that shared state will not remain long but there is a window...

if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
                return;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 103d13e..4c62bfe 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -5467,7 +5467,7 @@ static int kvmhv_svm_off(struct kvm *kvm)
                        continue;
kvm_for_each_memslot(memslot, slots) {
-                       kvmppc_uvmem_drop_pages(memslot, kvm, true);
+                       kvmppc_uvmem_drop_pages(memslot, kvm, true, true);
                        uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
                }
        }
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c 
b/arch/powerpc/kvm/book3s_hv_uvmem.c
index ea4a1f1..2ef1e03 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -99,14 +99,56 @@
  static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
#define KVMPPC_UVMEM_PFN (1UL << 63)
+#define KVMPPC_UVMEM_SHARED    (1UL << 62)
+#define KVMPPC_UVMEM_FLAG_MASK (KVMPPC_UVMEM_PFN | KVMPPC_UVMEM_SHARED)
+#define KVMPPC_UVMEM_PFN_MASK  (~KVMPPC_UVMEM_FLAG_MASK)
struct kvmppc_uvmem_slot {
        struct list_head list;
        unsigned long nr_pfns;
        unsigned long base_pfn;
+       /*
+        * pfns array has an entry for each GFN of the memory slot.
+        *
+        * The GFN can be in one of the following states.
+        *
+        * (a) Secure - The GFN is secure. Only Ultravisor can access it.
+        * (b) Shared - The GFN is shared. Both Hypervisor and Ultravisor
+        *              can access it.
+        * (c) Normal - The GFN is a normal.  Only Hypervisor can access it.
+        *
+        * Secure GFN is associated with a devicePFN. Its pfn[] has
+        * KVMPPC_UVMEM_PFN flag set, and has the value of the device PFN
+        * KVMPPC_UVMEM_SHARED flag unset, and has the value of the device PFN
+        *
+        * Shared GFN is associated with a memoryPFN. Its pfn[] has
+        * KVMPPC_UVMEM_SHARED flag set. But its KVMPPC_UVMEM_PFN is not set,
+        * and there is no PFN value stored.
+        *
+        * Normal GFN is not associated with memoryPFN. Its pfn[] has
+        * KVMPPC_UVMEM_SHARED and KVMPPC_UVMEM_PFN flag unset, and no PFN
+        * value is stored.
+        *
+        * Any other combination of values in pfn[] leads to undefined
+        * behavior.
+        *
+        * Life cycle of a GFN --
+        *
+        * ---------------------------------------------------------
+        * |        |     Share  |  Unshare | SVM       |slot      |
+        * |        |            |          | abort/    |flush     |
+        * |        |            |          | terminate |          |
+        * ---------------------------------------------------------
+        * |        |            |          |           |          |
+        * | Secure |     Shared | Secure   |Normal     |Secure    |
+        * |        |            |          |           |          |
+        * | Shared |     Shared | Secure   |Normal     |Shared    |
+        * |        |            |          |           |          |
+        * | Normal |     Shared | Secure   |Normal     |Normal    |
+        * ---------------------------------------------------------
+        */
        unsigned long *pfns;
  };
-
  struct kvmppc_uvmem_page_pvt {
        struct kvm *kvm;
        unsigned long gpa;
@@ -175,7 +217,12 @@ static void kvmppc_uvmem_pfn_remove(unsigned long gfn, 
struct kvm *kvm)
list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
                if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
-                       p->pfns[gfn - p->base_pfn] = 0;
+                       /*
+                        * Reset everything, but keep the KVMPPC_UVMEM_SHARED
+                        * flag intact.  A gfn continues to be shared or
+                        * unshared, with or without an associated device pfn.
+                        */
+                       p->pfns[gfn - p->base_pfn] &= KVMPPC_UVMEM_SHARED;
                        return;
                }
        }
@@ -193,7 +240,7 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, 
struct kvm *kvm,
                        if (p->pfns[index] & KVMPPC_UVMEM_PFN) {
                                if (uvmem_pfn)
                                        *uvmem_pfn = p->pfns[index] &
-                                                    ~KVMPPC_UVMEM_PFN;
+                                                    KVMPPC_UVMEM_PFN_MASK;
                                return true;
                        } else
                                return false;
@@ -202,6 +249,38 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, 
struct kvm *kvm,
        return false;
  }
+static void kvmppc_gfn_uvmem_shared(unsigned long gfn, struct kvm *kvm,
+               bool set)
+{
+       struct kvmppc_uvmem_slot *p;
+
+       list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+               if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+                       unsigned long index = gfn - p->base_pfn;
+
+                       if (set)
+                               p->pfns[index] |= KVMPPC_UVMEM_SHARED;
+                       else
+                               p->pfns[index] &= ~KVMPPC_UVMEM_SHARED;
+                       return;
+               }
+       }
+}
+
+bool kvmppc_gfn_is_uvmem_shared(unsigned long gfn, struct kvm *kvm)
+{
+       struct kvmppc_uvmem_slot *p;
+
+       list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+               if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+                       unsigned long index = gfn - p->base_pfn;
+
+                       return (p->pfns[index] & KVMPPC_UVMEM_SHARED);
+               }
+       }
+       return false;
+}
+
  unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
  {
        struct kvm_memslots *slots;
@@ -256,9 +335,13 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
   * is HV side fault on these pages. Next we *get* these pages, forcing
   * fault on them, do fault time migration to replace the device PTEs in
   * QEMU page table with normal PTEs from newly allocated pages.
+ *
+ * if @purge_gfn is set, cleanup any information related to each of
+ * the GFNs associated with this memory slot.
   */
  void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
-                            struct kvm *kvm, bool skip_page_out)
+                            struct kvm *kvm, bool skip_page_out,
+                            bool purge_gfn)
  {
        int i;
        struct kvmppc_uvmem_page_pvt *pvt;
@@ -269,11 +352,22 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot 
*free,
                struct page *uvmem_page;
mutex_lock(&kvm->arch.uvmem_lock);
+
+               if (purge_gfn) {
+                       /*
+                        * cleanup the shared status of the GFN here.
+                        * Any device PFN associated with the GFN shall
+                        * be cleaned up later, in kvmppc_uvmem_page_free()
+                        * when the device PFN is actually disassociated
+                        * from the GFN.
+                        */
+                       kvmppc_gfn_uvmem_shared(gfn, kvm, false);
+               }
+
                if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
                        mutex_unlock(&kvm->arch.uvmem_lock);
                        continue;
                }
-
                uvmem_page = pfn_to_page(uvmem_pfn);
                pvt = uvmem_page->zone_device_data;
                pvt->skip_page_out = skip_page_out;
@@ -304,7 +398,7 @@ unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
        srcu_idx = srcu_read_lock(&kvm->srcu);
kvm_for_each_memslot(memslot, kvm_memslots(kvm))
-               kvmppc_uvmem_drop_pages(memslot, kvm, false);
+               kvmppc_uvmem_drop_pages(memslot, kvm, false, true);
srcu_read_unlock(&kvm->srcu, srcu_idx); @@ -470,8 +564,11 @@ static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
                goto retry;
        }
- if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift))
+       if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
+                               page_shift)) {
+               kvmppc_gfn_uvmem_shared(gfn, kvm, true);
                ret = H_SUCCESS;
+       }
        kvm_release_pfn_clean(pfn);
        mutex_unlock(&kvm->arch.uvmem_lock);
  out:
@@ -527,8 +624,10 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, 
unsigned long gpa,
                goto out_unlock;
if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
-                               &downgrade))
+                               &downgrade)) {
+               kvmppc_gfn_uvmem_shared(gfn, kvm, false);
                ret = H_SUCCESS;
+       }
  out_unlock:
        mutex_unlock(&kvm->arch.uvmem_lock);
  out:


Reply via email to