Page-merging of pages in memory-slots associated with a Secure VM,
is disabled in H_SVM_PAGE_IN handler.

This operation should have been done much earlier; the moment the VM
is initiated for secure-transition. Delaying this operation, increases
the probability for those pages to acquire new references , making it
impossible to migrate those pages.

Disable page-migration in H_SVM_INIT_START handling.

Signed-off-by: Ram Pai <linux...@us.ibm.com>
---
 Documentation/powerpc/ultravisor.rst |  1 +
 arch/powerpc/kvm/book3s_hv_uvmem.c   | 98 +++++++++++++++++++++++++++---------
 2 files changed, 76 insertions(+), 23 deletions(-)

diff --git a/Documentation/powerpc/ultravisor.rst 
b/Documentation/powerpc/ultravisor.rst
index df136c8..a1c8c37 100644
--- a/Documentation/powerpc/ultravisor.rst
+++ b/Documentation/powerpc/ultravisor.rst
@@ -895,6 +895,7 @@ Return values
     One of the following values:
 
        * H_SUCCESS      on success.
+        * H_STATE        if the VM is not in a position to switch to secure.
 
 Description
 ~~~~~~~~~~~
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c 
b/arch/powerpc/kvm/book3s_hv_uvmem.c
index e6f76bc..0baa293 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -211,6 +211,65 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, 
struct kvm *kvm,
        return false;
 }
 
+static int kvmppc_memslot_page_merge(struct kvm *kvm,
+               struct kvm_memory_slot *memslot, bool merge)
+{
+       unsigned long gfn = memslot->base_gfn;
+       unsigned long end, start = gfn_to_hva(kvm, gfn);
+       int ret = 0;
+       struct vm_area_struct *vma;
+       int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
+
+       if (kvm_is_error_hva(start))
+               return H_STATE;
+
+       end = start + (memslot->npages << PAGE_SHIFT);
+
+       mmap_write_lock(kvm->mm);
+       do {
+               vma = find_vma_intersection(kvm->mm, start, end);
+               if (!vma) {
+                       ret = H_STATE;
+                       break;
+               }
+               ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+                         merge_flag, &vma->vm_flags);
+               if (ret) {
+                       ret = H_STATE;
+                       break;
+               }
+               start = vma->vm_end + 1;
+       } while (end > vma->vm_end);
+
+       mmap_write_unlock(kvm->mm);
+       return ret;
+}
+
+static int __kvmppc_page_merge(struct kvm *kvm, bool merge)
+{
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *memslot;
+       int ret = 0;
+
+       slots = kvm_memslots(kvm);
+       kvm_for_each_memslot(memslot, slots) {
+               ret = kvmppc_memslot_page_merge(kvm, memslot, merge);
+               if (ret)
+                       break;
+       }
+       return ret;
+}
+
+static inline int kvmppc_disable_page_merge(struct kvm *kvm)
+{
+       return __kvmppc_page_merge(kvm, false);
+}
+
+static inline int kvmppc_enable_page_merge(struct kvm *kvm)
+{
+       return __kvmppc_page_merge(kvm, true);
+}
+
 unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
 {
        struct kvm_memslots *slots;
@@ -232,11 +291,18 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
                return H_AUTHORITY;
 
        srcu_idx = srcu_read_lock(&kvm->srcu);
+
+       /* disable page-merging for all memslot */
+       ret = kvmppc_disable_page_merge(kvm);
+       if (ret)
+               goto out;
+
+       /* register the memslot */
        slots = kvm_memslots(kvm);
        kvm_for_each_memslot(memslot, slots) {
                if (kvmppc_uvmem_slot_init(kvm, memslot)) {
                        ret = H_PARAMETER;
-                       goto out;
+                       break;
                }
                ret = uv_register_mem_slot(kvm->arch.lpid,
                                           memslot->base_gfn << PAGE_SHIFT,
@@ -245,9 +311,12 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
                if (ret < 0) {
                        kvmppc_uvmem_slot_free(kvm, memslot);
                        ret = H_PARAMETER;
-                       goto out;
+                       break;
                }
        }
+
+       if (ret)
+               kvmppc_enable_page_merge(kvm);
 out:
        srcu_read_unlock(&kvm->srcu, srcu_idx);
        return ret;
@@ -384,7 +453,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long 
gpa, struct kvm *kvm)
  */
 static int kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
                   unsigned long end, unsigned long gpa, struct kvm *kvm,
-                  unsigned long page_shift, bool *downgrade)
+                  unsigned long page_shift)
 {
        unsigned long src_pfn, dst_pfn = 0;
        struct migrate_vma mig;
@@ -400,18 +469,6 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, 
unsigned long start,
        mig.src = &src_pfn;
        mig.dst = &dst_pfn;
 
-       /*
-        * We come here with mmap_lock write lock held just for
-        * ksm_madvise(), otherwise we only need read mmap_lock.
-        * Hence downgrade to read lock once ksm_madvise() is done.
-        */
-       ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
-                         MADV_UNMERGEABLE, &vma->vm_flags);
-       mmap_write_downgrade(kvm->mm);
-       *downgrade = true;
-       if (ret)
-               return ret;
-
        ret = migrate_vma_setup(&mig);
        if (ret)
                return ret;
@@ -503,7 +560,6 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, 
unsigned long gpa,
                unsigned long flags,
                unsigned long page_shift)
 {
-       bool downgrade = false;
        unsigned long start, end;
        struct vm_area_struct *vma;
        int srcu_idx;
@@ -524,7 +580,7 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, 
unsigned long gpa,
 
        ret = H_PARAMETER;
        srcu_idx = srcu_read_lock(&kvm->srcu);
-       mmap_write_lock(kvm->mm);
+       mmap_read_lock(kvm->mm);
 
        start = gfn_to_hva(kvm, gfn);
        if (kvm_is_error_hva(start))
@@ -540,16 +596,12 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, 
unsigned long gpa,
        if (!vma || vma->vm_start > start || vma->vm_end < end)
                goto out_unlock;
 
-       if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
-                               &downgrade))
+       if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift))
                ret = H_SUCCESS;
 out_unlock:
        mutex_unlock(&kvm->arch.uvmem_lock);
 out:
-       if (downgrade)
-               mmap_read_unlock(kvm->mm);
-       else
-               mmap_write_unlock(kvm->mm);
+       mmap_read_unlock(kvm->mm);
        srcu_read_unlock(&kvm->srcu, srcu_idx);
        return ret;
 }
-- 
1.8.3.1

Reply via email to