On 14/11/2016 23:15, Brijesh Singh wrote:
> From: Tom Lendacky <thomas.lenda...@amd.com>
> 
> AMD hardware adds two additional bits to aid in nested page fault handling.
> 
> Bit 32 - NPF occurred while translating the guest's final physical address
> Bit 33 - NPF occurred while translating the guest page tables

I have two questions out of curiosity, and to better understand the
differences between Intel and AMD:

1) are the two bits mutually exclusive, and is one bit always set?

2) what bit is set if the processor is reading the PDPTEs of a 32-bit
PAE guest?

Thanks,

Paolo

> The guest page tables fault indicator can be used as an aid for nested
> virtualization. Using V0 for the host, V1 for the first level guest and
> V2 for the second level guest, when both V1 and V2 are using nested paging
> there are currently a number of unnecessary instruction emulations. When
> V2 is launched shadow paging is used in V1 for the nested tables of V2. As
> a result, KVM marks these pages as RO in the host nested page tables. When
> V2 exits and we resume V1, these pages are still marked RO.
> 
> Every nested walk for a guest page table is treated as a user-level write
> access and this causes a lot of NPFs because the V1 page tables are marked
> RO in the V0 nested tables. While executing V1, when these NPFs occur KVM
> sees a write to a read-only page, emulates the V1 instruction and unprotects
> the page (marking it RW). This patch looks for cases where we get a NPF due
> to a guest page table walk where the page was marked RO. It immediately
> unprotects the page and resumes the guest, leading to far fewer instruction
> emulations when nested virtualization is used.
> 
> Signed-off-by: Tom Lendacky <thomas.lenda...@amd.com>
> Reviewed-by: Borislav Petkov <b...@suse.de>
> Signed-off-by: Brijesh Singh <brijesh.si...@amd.com>
> ---
>  arch/x86/include/asm/kvm_host.h |   11 ++++++++++-
>  arch/x86/kvm/mmu.c              |   20 ++++++++++++++++++--
>  arch/x86/kvm/svm.c              |    2 +-
>  3 files changed, 29 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index bdde807..da07e17 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -191,6 +191,8 @@ enum {
>  #define PFERR_RSVD_BIT 3
>  #define PFERR_FETCH_BIT 4
>  #define PFERR_PK_BIT 5
> +#define PFERR_GUEST_FINAL_BIT 32
> +#define PFERR_GUEST_PAGE_BIT 33
>  
>  #define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
>  #define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
> @@ -198,6 +200,13 @@ enum {
>  #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
>  #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
>  #define PFERR_PK_MASK (1U << PFERR_PK_BIT)
> +#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT)
> +#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
> +
> +#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK |     \
> +                              PFERR_USER_MASK |              \
> +                              PFERR_WRITE_MASK |             \
> +                              PFERR_PRESENT_MASK)
>  
>  /* apic attention bits */
>  #define KVM_APIC_CHECK_VAPIC 0
> @@ -1203,7 +1212,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
>  
>  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
>  
> -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
> +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
>                      void *insn, int insn_len);
>  void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
>  void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index d9c7e98..f633d29 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -4508,7 +4508,7 @@ static void make_mmu_pages_available(struct kvm_vcpu 
> *vcpu)
>       kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
>  }
>  
> -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
> +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
>                      void *insn, int insn_len)
>  {
>       int r, emulation_type = EMULTYPE_RETRY;
> @@ -4527,12 +4527,28 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t 
> cr2, u32 error_code,
>                       return r;
>       }
>  
> -     r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
> +     r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
> +                                   false);
>       if (r < 0)
>               return r;
>       if (!r)
>               return 1;
>  
> +     /*
> +      * Before emulating the instruction, check if the error code
> +      * was due to a RO violation while translating the guest page.
> +      * This can occur when using nested virtualization with nested
> +      * paging in both guests. If true, we simply unprotect the page
> +      * and resume the guest.
> +      *
> +      * Note: AMD only (since it supports the PFERR_GUEST_PAGE_MASK used
> +      *       in PFERR_NEXT_GUEST_PAGE)
> +      */
> +     if (error_code == PFERR_NESTED_GUEST_PAGE) {
> +             kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
> +             return 1;
> +     }
> +
>       if (mmio_info_in_cache(vcpu, cr2, direct))
>               emulation_type = 0;
>  emulate:
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 8ca1eca..4e462bb 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -2074,7 +2074,7 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned 
> long value)
>  static int pf_interception(struct vcpu_svm *svm)
>  {
>       u64 fault_address = svm->vmcb->control.exit_info_2;
> -     u32 error_code;
> +     u64 error_code;
>       int r = 1;
>  
>       switch (svm->apf_reason) {
> 

Reply via email to