The current reexecute_instruction can not well detect the failed instruction
emulation. It allows guest to retry all the instructions except it accesses
on error pfn

For example, some cases are nested-write-protect - if the page we want to
write is used as PDE but it chains to itself. Under this case, we should
stop the emulation and report the case to userspace

Signed-off-by: Xiao Guangrong <xiaoguangr...@linux.vnet.ibm.com>
---
 arch/x86/include/asm/kvm_host.h |    7 +++++
 arch/x86/kvm/paging_tmpl.h      |   24 +++++++++++++-----
 arch/x86/kvm/x86.c              |   50 ++++++++++++++++++++++++--------------
 3 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c431b33..de229e6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -502,6 +502,13 @@ struct kvm_vcpu_arch {
                u64 msr_val;
                struct gfn_to_hva_cache data;
        } pv_eoi;
+
+       /*
+        * Indicate whether the gfn is used as page table in guest which
+        * is set when fix page fault and used to detect unhandeable
+        * instruction.
+        */
+       bool target_gfn_is_pt;
 };

 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 0453fa0..ca1be75 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -507,20 +507,27 @@ out_gpte_changed:
  */
 static bool
 FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
-                             struct guest_walker *walker, int user_fault)
+                             struct guest_walker *walker, int user_fault,
+                             bool *target_gfn_is_pt)
 {
        int level;
        gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
+       bool self_changed = false;
+
+       *target_gfn_is_pt = false;

        if (!(walker->pte_access & ACC_WRITE_MASK ||
              (!is_write_protection(vcpu) && !user_fault)))
                return false;

-       for (level = walker->level; level <= walker->max_level; level++)
-               if (!((walker->gfn ^ walker->table_gfn[level - 1]) & mask))
-                       return true;
+       for (level = walker->level; level <= walker->max_level; level++) {
+               gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];
+
+               self_changed |= !(gfn & mask);
+               *target_gfn_is_pt |= !gfn;
+       }

-       return false;
+       return self_changed;
 }

 /*
@@ -548,7 +555,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t 
addr, u32 error_code,
        int level = PT_PAGE_TABLE_LEVEL;
        int force_pt_level;
        unsigned long mmu_seq;
-       bool map_writable;
+       bool map_writable, is_self_change_mapping;

        pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);

@@ -576,9 +583,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t 
addr, u32 error_code,
                return 0;
        }

+       is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
+                     &walker, user_fault, &vcpu->arch.target_gfn_is_pt);
+
        if (walker.level >= PT_DIRECTORY_LEVEL)
                force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn)
-                  || FNAME(is_self_change_mapping)(vcpu, &walker, user_fault);
+                  || is_self_change_mapping;
        else
                force_pt_level = 1;
        if (!force_pt_level) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b0a3678..44c6992 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4756,15 +4756,8 @@ static int handle_emulation_failure(struct kvm_vcpu 
*vcpu)
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2)
 {
        gpa_t gpa = cr2;
+       gfn_t gfn;
        pfn_t pfn;
-       unsigned int indirect_shadow_pages;
-
-       spin_lock(&vcpu->kvm->mmu_lock);
-       indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
-       spin_unlock(&vcpu->kvm->mmu_lock);
-
-       if (!indirect_shadow_pages)
-               return false;

        if (!vcpu->arch.mmu.direct_map) {
                /*
@@ -4781,13 +4774,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, 
unsigned long cr2)
                        return true;
        }

-       /*
-        * if emulation was due to access to shadowed page table
-        * and it failed try to unshadow page and re-enter the
-        * guest to let CPU execute the instruction.
-        */
-       if (kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)))
-               return true;
+       gfn = gpa_to_gfn(gpa);

        /*
         * Do not retry the unhandleable instruction if it faults on the
@@ -4795,13 +4782,38 @@ static bool reexecute_instruction(struct kvm_vcpu 
*vcpu, unsigned long cr2)
         * retry instruction -> write #PF -> emulation fail -> retry
         * instruction -> ...
         */
-       pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
-       if (!is_error_noslot_pfn(pfn)) {
-               kvm_release_pfn_clean(pfn);
+       pfn = gfn_to_pfn(vcpu->kvm, gfn);
+
+       /*
+        * If the instruction failed on the error pfn, it can not be fixed,
+        * report the error to userspace.
+        */
+       if (is_error_noslot_pfn(pfn))
+               return false;
+
+       kvm_release_pfn_clean(pfn);
+
+       /* The instructions are well-emulated on direct mmu. */
+       if (vcpu->arch.mmu.direct_map) {
+               unsigned int indirect_shadow_pages;
+
+               spin_lock(&vcpu->kvm->mmu_lock);
+               indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
+               spin_unlock(&vcpu->kvm->mmu_lock);
+
+               if (indirect_shadow_pages)
+                       kvm_mmu_unprotect_page(vcpu->kvm, gfn);
+
                return true;
        }

-       return false;
+       kvm_mmu_unprotect_page(vcpu->kvm, gfn);
+
+       /* If the target gfn is used as page table, the fault can
+        * not be avoided by unprotecting shadow page and it will
+        * be reported to userspace.
+        */
+       return !vcpu->arch.target_gfn_is_pt;
 }

 static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
-- 
1.7.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to