Re: [PATCH v9 7/8] KVM: Handle page fault for private memory

2022-11-17 Thread Chao Peng
On Wed, Nov 16, 2022 at 10:13:07PM +, Sean Christopherson wrote:
> On Wed, Nov 16, 2022, Ackerley Tng wrote:
> > >@@ -4173,6 +4203,22 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, 
> > >struct kvm_page_fault *fault)
> > >   return RET_PF_EMULATE;
> > >   }
> > >
> > >+  if (kvm_slot_can_be_private(slot) &&
> > >+  fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
> > >+  vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
> > >+  if (fault->is_private)
> > >+  vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
> > >+  else
> > >+  vcpu->run->memory.flags = 0;
> > >+  vcpu->run->memory.padding = 0;
> > >+  vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
> > >+  vcpu->run->memory.size = PAGE_SIZE;
> > >+  return RET_PF_USER;
> > >+  }
> > >+
> > >+  if (fault->is_private)
> > >+  return kvm_faultin_pfn_private(fault);
> > >+
> > 
> > Since each memslot may also not be backed by restricted memory, we
> > should also check if the memslot has been set up for private memory
> > with
> > 
> > if (fault->is_private && kvm_slot_can_be_private(slot))
> > return kvm_faultin_pfn_private(fault);
> > 
> > Without this check, restrictedmem_get_page will get called with NULL
> > in slot->restricted_file, which causes a NULL pointer dereference.
> 
> Hmm, silently skipping the faultin would result in KVM faulting in the shared
> portion of the memslot, and I believe would end up mapping that pfn as 
> private,
> i.e. would map a non-UPM PFN as a private mapping.  For TDX and SNP, that 
> would
> be double ungood as it would let the host access memory that is mapped 
> private,
> i.e. lead to #MC or #PF(RMP) in the host.

That's correct.

> 
> I believe the correct solution is to drop the "can be private" check from the
> above check, and instead handle that in kvm_faultin_pfn_private().  That 
> would fix
> another bug, e.g. if the fault is shared, the slot can't be private, but for
> whatever reason userspace marked the gfn as private.  Even though KVM might be
> able service the fault, the correct thing to do in that case is to exit to 
> userspace.

It makes sense to me.

Chao
> 
> E.g.
> 
> ---
>  arch/x86/kvm/mmu/mmu.c | 36 ++--
>  1 file changed, 22 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 10017a9f26ee..e2ac8873938e 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4158,11 +4158,29 @@ static inline u8 order_to_level(int order)
>   return PG_LEVEL_4K;
>  }
>  
> -static int kvm_faultin_pfn_private(struct kvm_page_fault *fault)
> +static int kvm_do_memory_fault_exit(struct kvm_vcpu *vcpu,
> + struct kvm_page_fault *fault)
> +{
> + vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
> + if (fault->is_private)
> + vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
> + else
> + vcpu->run->memory.flags = 0;
> + vcpu->run->memory.padding = 0;
> + vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
> + vcpu->run->memory.size = PAGE_SIZE;
> + return RET_PF_USER;
> +}
> +
> +static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
> +struct kvm_page_fault *fault)
>  {
>   int order;
>   struct kvm_memory_slot *slot = fault->slot;
>  
> + if (kvm_slot_can_be_private(slot))
> + return kvm_do_memory_fault_exit(vcpu, fault);
> +
>   if (kvm_restricted_mem_get_pfn(slot, fault->gfn, >pfn, ))
>   return RET_PF_RETRY;
>  
> @@ -4203,21 +4221,11 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, 
> struct kvm_page_fault *fault)
>   return RET_PF_EMULATE;
>   }
>  
> - if (kvm_slot_can_be_private(slot) &&
> - fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
> - vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
> - if (fault->is_private)
> - vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
> - else
> - vcpu->run->memory.flags = 0;
> - vcpu->run->memory.padding = 0;
> - vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
> - vcpu->run->memory.size = PAGE_SIZE;
> - return RET_PF_USER;
> - }
> + if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn))
> + return kvm_do_memory_fault_exit(vcpu, fault);
>  
>   if (fault->is_private)
> - return kvm_faultin_pfn_private(fault);
> + return kvm_faultin_pfn_private(vcpu, fault);
>  
>   async = false;
>   fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, ,
> 
> base-commit: 969d761bb7b8654605937f31ae76123dcb7f15a3
> -- 



Re: [PATCH v9 7/8] KVM: Handle page fault for private memory

2022-11-16 Thread Sean Christopherson
On Wed, Nov 16, 2022, Ackerley Tng wrote:
> >@@ -4173,6 +4203,22 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, 
> >struct kvm_page_fault *fault)
> > return RET_PF_EMULATE;
> > }
> >
> >+if (kvm_slot_can_be_private(slot) &&
> >+fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
> >+vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
> >+if (fault->is_private)
> >+vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
> >+else
> >+vcpu->run->memory.flags = 0;
> >+vcpu->run->memory.padding = 0;
> >+vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
> >+vcpu->run->memory.size = PAGE_SIZE;
> >+return RET_PF_USER;
> >+}
> >+
> >+if (fault->is_private)
> >+return kvm_faultin_pfn_private(fault);
> >+
> 
> Since each memslot may also not be backed by restricted memory, we
> should also check if the memslot has been set up for private memory
> with
> 
>   if (fault->is_private && kvm_slot_can_be_private(slot))
>   return kvm_faultin_pfn_private(fault);
> 
> Without this check, restrictedmem_get_page will get called with NULL
> in slot->restricted_file, which causes a NULL pointer dereference.

Hmm, silently skipping the faultin would result in KVM faulting in the shared
portion of the memslot, and I believe would end up mapping that pfn as private,
i.e. would map a non-UPM PFN as a private mapping.  For TDX and SNP, that would
be double ungood as it would let the host access memory that is mapped private,
i.e. lead to #MC or #PF(RMP) in the host.

I believe the correct solution is to drop the "can be private" check from the
above check, and instead handle that in kvm_faultin_pfn_private().  That would 
fix
another bug, e.g. if the fault is shared, the slot can't be private, but for
whatever reason userspace marked the gfn as private.  Even though KVM might be
able service the fault, the correct thing to do in that case is to exit to 
userspace.

E.g.

---
 arch/x86/kvm/mmu/mmu.c | 36 ++--
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 10017a9f26ee..e2ac8873938e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4158,11 +4158,29 @@ static inline u8 order_to_level(int order)
return PG_LEVEL_4K;
 }
 
-static int kvm_faultin_pfn_private(struct kvm_page_fault *fault)
+static int kvm_do_memory_fault_exit(struct kvm_vcpu *vcpu,
+   struct kvm_page_fault *fault)
+{
+   vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
+   if (fault->is_private)
+   vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
+   else
+   vcpu->run->memory.flags = 0;
+   vcpu->run->memory.padding = 0;
+   vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
+   vcpu->run->memory.size = PAGE_SIZE;
+   return RET_PF_USER;
+}
+
+static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
+  struct kvm_page_fault *fault)
 {
int order;
struct kvm_memory_slot *slot = fault->slot;
 
+   if (kvm_slot_can_be_private(slot))
+   return kvm_do_memory_fault_exit(vcpu, fault);
+
if (kvm_restricted_mem_get_pfn(slot, fault->gfn, >pfn, ))
return RET_PF_RETRY;
 
@@ -4203,21 +4221,11 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, 
struct kvm_page_fault *fault)
return RET_PF_EMULATE;
}
 
-   if (kvm_slot_can_be_private(slot) &&
-   fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
-   vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
-   if (fault->is_private)
-   vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
-   else
-   vcpu->run->memory.flags = 0;
-   vcpu->run->memory.padding = 0;
-   vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
-   vcpu->run->memory.size = PAGE_SIZE;
-   return RET_PF_USER;
-   }
+   if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn))
+   return kvm_do_memory_fault_exit(vcpu, fault);
 
if (fault->is_private)
-   return kvm_faultin_pfn_private(fault);
+   return kvm_faultin_pfn_private(vcpu, fault);
 
async = false;
fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, ,

base-commit: 969d761bb7b8654605937f31ae76123dcb7f15a3
-- 




Re: [PATCH v9 7/8] KVM: Handle page fault for private memory

2022-11-16 Thread Ackerley Tng
> A memslot with KVM_MEM_PRIVATE being set can include both fd-based
> private memory and hva-based shared memory. Architecture code (like TDX
> code) can tell whether the on-going fault is private or not. This patch
> adds a 'is_private' field to kvm_page_fault to indicate this and
> architecture code is expected to set it.
>
> To handle page fault for such memslot, the handling logic is different
> depending on whether the fault is private or shared. KVM checks if
> 'is_private' matches the host's view of the page (maintained in
> mem_attr_array).
>   - For a successful match, private pfn is obtained with
> restrictedmem_get_page () from private fd and shared pfn is obtained
> with existing get_user_pages().
>   - For a failed match, KVM causes a KVM_EXIT_MEMORY_FAULT exit to
> userspace. Userspace then can convert memory between private/shared
> in host's view and retry the fault.
>
> Co-developed-by: Yu Zhang 
> Signed-off-by: Yu Zhang 
> Signed-off-by: Chao Peng 
> ---
>  arch/x86/kvm/mmu/mmu.c  | 56 +++--
>  arch/x86/kvm/mmu/mmu_internal.h | 14 -
>  arch/x86/kvm/mmu/mmutrace.h |  1 +
>  arch/x86/kvm/mmu/spte.h |  6 
>  arch/x86/kvm/mmu/tdp_mmu.c  |  3 +-
>  include/linux/kvm_host.h| 28 +
>  6 files changed, 103 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 67a9823a8c35..10017a9f26ee 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3030,7 +3030,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, 
> gfn_t gfn,
>
>  int kvm_mmu_max_mapping_level(struct kvm *kvm,
> const struct kvm_memory_slot *slot, gfn_t gfn,
> -   int max_level)
> +   int max_level, bool is_private)
>  {
>   struct kvm_lpage_info *linfo;
>   int host_level;
> @@ -3042,6 +3042,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
>   break;
>   }
>
> + if (is_private)
> + return max_level;
> +
>   if (max_level == PG_LEVEL_4K)
>   return PG_LEVEL_4K;
>
> @@ -3070,7 +3073,8 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, 
> struct kvm_page_fault *fault
>* level, which will be used to do precise, accurate accounting.
>*/
>   fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
> -  fault->gfn, 
> fault->max_level);
> +  fault->gfn, 
> fault->max_level,
> +  fault->is_private);
>   if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
>   return;
>
> @@ -4141,6 +4145,32 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 
> struct kvm_async_pf *work)
>   kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
>  }
>
> +static inline u8 order_to_level(int order)
> +{
> + BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
> +
> + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
> + return PG_LEVEL_1G;
> +
> + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
> + return PG_LEVEL_2M;
> +
> + return PG_LEVEL_4K;
> +}
> +
> +static int kvm_faultin_pfn_private(struct kvm_page_fault *fault)
>  +{
>  +int order;
>  +struct kvm_memory_slot *slot = fault->slot;
>  +
>  +if (kvm_restricted_mem_get_pfn(slot, fault->gfn, >pfn, ))
>+  return RET_PF_RETRY;
>+
>+  fault->max_level = min(order_to_level(order), fault->max_level);
>+  fault->map_writable = !(slot->flags & KVM_MEM_READONLY);
>+  return RET_PF_CONTINUE;
>+}
>+
> static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault 
> *fault)
> {
>   struct kvm_memory_slot *slot = fault->slot;
>@@ -4173,6 +4203,22 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, 
>struct kvm_page_fault *fault)
>   return RET_PF_EMULATE;
>   }
>
>+  if (kvm_slot_can_be_private(slot) &&
>+  fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
>+  vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
>+  if (fault->is_private)
>+  vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
>+  else
>+  vcpu->run->memory.flags = 0;
>+  vcpu->run->memory.padding = 0;
>+  vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
>+  vcpu->run->memory.size = PAGE_SIZE;
>+  return RET_PF_USER;
>+  }
>+
>+  if (fault->is_private)
>+  return kvm_faultin_pfn_private(fault);
>+

Since each memslot may also not be backed by restricted memory, we
should also check if the memslot has been set up for private memory
with

if (fault->is_private && kvm_slot_can_be_private(slot))
return 

Re: [PATCH v9 7/8] KVM: Handle page fault for private memory

2022-11-01 Thread Chao Peng
On Mon, Oct 31, 2022 at 05:02:50PM -0700, Isaku Yamahata wrote:
> On Fri, Oct 28, 2022 at 02:55:45PM +0800,
> Chao Peng  wrote:
> 
> > On Wed, Oct 26, 2022 at 02:54:25PM -0700, Isaku Yamahata wrote:
> > > On Tue, Oct 25, 2022 at 11:13:43PM +0800,
> > > Chao Peng  wrote:
> > > 
> > > > A memslot with KVM_MEM_PRIVATE being set can include both fd-based
> > > > private memory and hva-based shared memory. Architecture code (like TDX
> > > > code) can tell whether the on-going fault is private or not. This patch
> > > > adds a 'is_private' field to kvm_page_fault to indicate this and
> > > > architecture code is expected to set it.
> > > > 
> > > > To handle page fault for such memslot, the handling logic is different
> > > > depending on whether the fault is private or shared. KVM checks if
> > > > 'is_private' matches the host's view of the page (maintained in
> > > > mem_attr_array).
> > > >   - For a successful match, private pfn is obtained with
> > > > restrictedmem_get_page () from private fd and shared pfn is obtained
> > > > with existing get_user_pages().
> > > >   - For a failed match, KVM causes a KVM_EXIT_MEMORY_FAULT exit to
> > > > userspace. Userspace then can convert memory between private/shared
> > > > in host's view and retry the fault.
> > > > 
> > > > Co-developed-by: Yu Zhang 
> > > > Signed-off-by: Yu Zhang 
> > > > Signed-off-by: Chao Peng 
> > > > ---
> > > >  arch/x86/kvm/mmu/mmu.c  | 56 +++--
> > > >  arch/x86/kvm/mmu/mmu_internal.h | 14 -
> > > >  arch/x86/kvm/mmu/mmutrace.h |  1 +
> > > >  arch/x86/kvm/mmu/spte.h |  6 
> > > >  arch/x86/kvm/mmu/tdp_mmu.c  |  3 +-
> > > >  include/linux/kvm_host.h| 28 +
> > > >  6 files changed, 103 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > > index 67a9823a8c35..10017a9f26ee 100644
> > > > --- a/arch/x86/kvm/mmu/mmu.c
> > > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > > @@ -3030,7 +3030,7 @@ static int host_pfn_mapping_level(struct kvm 
> > > > *kvm, gfn_t gfn,
> > > >  
> > > >  int kvm_mmu_max_mapping_level(struct kvm *kvm,
> > > >   const struct kvm_memory_slot *slot, gfn_t 
> > > > gfn,
> > > > - int max_level)
> > > > + int max_level, bool is_private)
> > > >  {
> > > > struct kvm_lpage_info *linfo;
> > > > int host_level;
> > > > @@ -3042,6 +3042,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
> > > > break;
> > > > }
> > > >  
> > > > +   if (is_private)
> > > > +   return max_level;
> > > 
> > > Below PG_LEVEL_NUM is passed by zap_collapsible_spte_range().  It doesn't 
> > > make
> > > sense.
> > > 
> > > > +
> > > > if (max_level == PG_LEVEL_4K)
> > > > return PG_LEVEL_4K;
> > > >  
> > > > @@ -3070,7 +3073,8 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu 
> > > > *vcpu, struct kvm_page_fault *fault
> > > >  * level, which will be used to do precise, accurate accounting.
> > > >  */
> > > > fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
> > > > -fault->gfn, 
> > > > fault->max_level);
> > > > +fault->gfn, 
> > > > fault->max_level,
> > > > +fault->is_private);
> > > > if (fault->req_level == PG_LEVEL_4K || 
> > > > fault->huge_page_disallowed)
> > > > return;
> > > >  
> > > > @@ -4141,6 +4145,32 @@ void kvm_arch_async_page_ready(struct kvm_vcpu 
> > > > *vcpu, struct kvm_async_pf *work)
> > > > kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
> > > >  }
> > > >  
> > > > +static inline u8 order_to_level(int order)
> > > > +{
> > > > +   BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
> > > > +
> > > > +   if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
> > > > +   return PG_LEVEL_1G;
> > > > +
> > > > +   if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
> > > > +   return PG_LEVEL_2M;
> > > > +
> > > > +   return PG_LEVEL_4K;
> > > > +}
> > > > +
> > > > +static int kvm_faultin_pfn_private(struct kvm_page_fault *fault)
> > > > +{
> > > > +   int order;
> > > > +   struct kvm_memory_slot *slot = fault->slot;
> > > > +
> > > > +   if (kvm_restricted_mem_get_pfn(slot, fault->gfn, >pfn, 
> > > > ))
> > > > +   return RET_PF_RETRY;
> > > > +
> > > > +   fault->max_level = min(order_to_level(order), fault->max_level);
> > > > +   fault->map_writable = !(slot->flags & KVM_MEM_READONLY);
> > > > +   return RET_PF_CONTINUE;
> > > > +}
> > > > +
> > > >  static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct 
> > > > kvm_page_fault *fault)
> > > >  {
> > > > struct kvm_memory_slot *slot = 

Re: [PATCH v9 7/8] KVM: Handle page fault for private memory

2022-10-31 Thread Isaku Yamahata
On Fri, Oct 28, 2022 at 02:55:45PM +0800,
Chao Peng  wrote:

> On Wed, Oct 26, 2022 at 02:54:25PM -0700, Isaku Yamahata wrote:
> > On Tue, Oct 25, 2022 at 11:13:43PM +0800,
> > Chao Peng  wrote:
> > 
> > > A memslot with KVM_MEM_PRIVATE being set can include both fd-based
> > > private memory and hva-based shared memory. Architecture code (like TDX
> > > code) can tell whether the on-going fault is private or not. This patch
> > > adds a 'is_private' field to kvm_page_fault to indicate this and
> > > architecture code is expected to set it.
> > > 
> > > To handle page fault for such memslot, the handling logic is different
> > > depending on whether the fault is private or shared. KVM checks if
> > > 'is_private' matches the host's view of the page (maintained in
> > > mem_attr_array).
> > >   - For a successful match, private pfn is obtained with
> > > restrictedmem_get_page () from private fd and shared pfn is obtained
> > > with existing get_user_pages().
> > >   - For a failed match, KVM causes a KVM_EXIT_MEMORY_FAULT exit to
> > > userspace. Userspace then can convert memory between private/shared
> > > in host's view and retry the fault.
> > > 
> > > Co-developed-by: Yu Zhang 
> > > Signed-off-by: Yu Zhang 
> > > Signed-off-by: Chao Peng 
> > > ---
> > >  arch/x86/kvm/mmu/mmu.c  | 56 +++--
> > >  arch/x86/kvm/mmu/mmu_internal.h | 14 -
> > >  arch/x86/kvm/mmu/mmutrace.h |  1 +
> > >  arch/x86/kvm/mmu/spte.h |  6 
> > >  arch/x86/kvm/mmu/tdp_mmu.c  |  3 +-
> > >  include/linux/kvm_host.h| 28 +
> > >  6 files changed, 103 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > index 67a9823a8c35..10017a9f26ee 100644
> > > --- a/arch/x86/kvm/mmu/mmu.c
> > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > @@ -3030,7 +3030,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, 
> > > gfn_t gfn,
> > >  
> > >  int kvm_mmu_max_mapping_level(struct kvm *kvm,
> > > const struct kvm_memory_slot *slot, gfn_t gfn,
> > > -   int max_level)
> > > +   int max_level, bool is_private)
> > >  {
> > >   struct kvm_lpage_info *linfo;
> > >   int host_level;
> > > @@ -3042,6 +3042,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
> > >   break;
> > >   }
> > >  
> > > + if (is_private)
> > > + return max_level;
> > 
> > Below PG_LEVEL_NUM is passed by zap_collapsible_spte_range().  It doesn't 
> > make
> > sense.
> > 
> > > +
> > >   if (max_level == PG_LEVEL_4K)
> > >   return PG_LEVEL_4K;
> > >  
> > > @@ -3070,7 +3073,8 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, 
> > > struct kvm_page_fault *fault
> > >* level, which will be used to do precise, accurate accounting.
> > >*/
> > >   fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
> > > -  fault->gfn, 
> > > fault->max_level);
> > > +  fault->gfn, 
> > > fault->max_level,
> > > +  fault->is_private);
> > >   if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
> > >   return;
> > >  
> > > @@ -4141,6 +4145,32 @@ void kvm_arch_async_page_ready(struct kvm_vcpu 
> > > *vcpu, struct kvm_async_pf *work)
> > >   kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
> > >  }
> > >  
> > > +static inline u8 order_to_level(int order)
> > > +{
> > > + BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
> > > +
> > > + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
> > > + return PG_LEVEL_1G;
> > > +
> > > + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
> > > + return PG_LEVEL_2M;
> > > +
> > > + return PG_LEVEL_4K;
> > > +}
> > > +
> > > +static int kvm_faultin_pfn_private(struct kvm_page_fault *fault)
> > > +{
> > > + int order;
> > > + struct kvm_memory_slot *slot = fault->slot;
> > > +
> > > + if (kvm_restricted_mem_get_pfn(slot, fault->gfn, >pfn, ))
> > > + return RET_PF_RETRY;
> > > +
> > > + fault->max_level = min(order_to_level(order), fault->max_level);
> > > + fault->map_writable = !(slot->flags & KVM_MEM_READONLY);
> > > + return RET_PF_CONTINUE;
> > > +}
> > > +
> > >  static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault 
> > > *fault)
> > >  {
> > >   struct kvm_memory_slot *slot = fault->slot;
> > > @@ -4173,6 +4203,22 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, 
> > > struct kvm_page_fault *fault)
> > >   return RET_PF_EMULATE;
> > >   }
> > >  
> > > + if (kvm_slot_can_be_private(slot) &&
> > > + fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
> > > + vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
> > > + if (fault->is_private)
> > > + vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
> > > + else
> > > 

Re: [PATCH v9 7/8] KVM: Handle page fault for private memory

2022-10-28 Thread Chao Peng
On Wed, Oct 26, 2022 at 02:54:25PM -0700, Isaku Yamahata wrote:
> On Tue, Oct 25, 2022 at 11:13:43PM +0800,
> Chao Peng  wrote:
> 
> > A memslot with KVM_MEM_PRIVATE being set can include both fd-based
> > private memory and hva-based shared memory. Architecture code (like TDX
> > code) can tell whether the on-going fault is private or not. This patch
> > adds a 'is_private' field to kvm_page_fault to indicate this and
> > architecture code is expected to set it.
> > 
> > To handle page fault for such memslot, the handling logic is different
> > depending on whether the fault is private or shared. KVM checks if
> > 'is_private' matches the host's view of the page (maintained in
> > mem_attr_array).
> >   - For a successful match, private pfn is obtained with
> > restrictedmem_get_page () from private fd and shared pfn is obtained
> > with existing get_user_pages().
> >   - For a failed match, KVM causes a KVM_EXIT_MEMORY_FAULT exit to
> > userspace. Userspace then can convert memory between private/shared
> > in host's view and retry the fault.
> > 
> > Co-developed-by: Yu Zhang 
> > Signed-off-by: Yu Zhang 
> > Signed-off-by: Chao Peng 
> > ---
> >  arch/x86/kvm/mmu/mmu.c  | 56 +++--
> >  arch/x86/kvm/mmu/mmu_internal.h | 14 -
> >  arch/x86/kvm/mmu/mmutrace.h |  1 +
> >  arch/x86/kvm/mmu/spte.h |  6 
> >  arch/x86/kvm/mmu/tdp_mmu.c  |  3 +-
> >  include/linux/kvm_host.h| 28 +
> >  6 files changed, 103 insertions(+), 5 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index 67a9823a8c35..10017a9f26ee 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -3030,7 +3030,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, 
> > gfn_t gfn,
> >  
> >  int kvm_mmu_max_mapping_level(struct kvm *kvm,
> >   const struct kvm_memory_slot *slot, gfn_t gfn,
> > - int max_level)
> > + int max_level, bool is_private)
> >  {
> > struct kvm_lpage_info *linfo;
> > int host_level;
> > @@ -3042,6 +3042,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
> > break;
> > }
> >  
> > +   if (is_private)
> > +   return max_level;
> 
> Below PG_LEVEL_NUM is passed by zap_collapsible_spte_range().  It doesn't make
> sense.
> 
> > +
> > if (max_level == PG_LEVEL_4K)
> > return PG_LEVEL_4K;
> >  
> > @@ -3070,7 +3073,8 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, 
> > struct kvm_page_fault *fault
> >  * level, which will be used to do precise, accurate accounting.
> >  */
> > fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
> > -fault->gfn, 
> > fault->max_level);
> > +fault->gfn, 
> > fault->max_level,
> > +fault->is_private);
> > if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
> > return;
> >  
> > @@ -4141,6 +4145,32 @@ void kvm_arch_async_page_ready(struct kvm_vcpu 
> > *vcpu, struct kvm_async_pf *work)
> > kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
> >  }
> >  
> > +static inline u8 order_to_level(int order)
> > +{
> > +   BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
> > +
> > +   if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
> > +   return PG_LEVEL_1G;
> > +
> > +   if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
> > +   return PG_LEVEL_2M;
> > +
> > +   return PG_LEVEL_4K;
> > +}
> > +
> > +static int kvm_faultin_pfn_private(struct kvm_page_fault *fault)
> > +{
> > +   int order;
> > +   struct kvm_memory_slot *slot = fault->slot;
> > +
> > +   if (kvm_restricted_mem_get_pfn(slot, fault->gfn, >pfn, ))
> > +   return RET_PF_RETRY;
> > +
> > +   fault->max_level = min(order_to_level(order), fault->max_level);
> > +   fault->map_writable = !(slot->flags & KVM_MEM_READONLY);
> > +   return RET_PF_CONTINUE;
> > +}
> > +
> >  static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault 
> > *fault)
> >  {
> > struct kvm_memory_slot *slot = fault->slot;
> > @@ -4173,6 +4203,22 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, 
> > struct kvm_page_fault *fault)
> > return RET_PF_EMULATE;
> > }
> >  
> > +   if (kvm_slot_can_be_private(slot) &&
> > +   fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
> > +   vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
> > +   if (fault->is_private)
> > +   vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
> > +   else
> > +   vcpu->run->memory.flags = 0;
> > +   vcpu->run->memory.padding = 0;
> > +   vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
> > +   vcpu->run->memory.size = PAGE_SIZE;
> > +  

Re: [PATCH v9 7/8] KVM: Handle page fault for private memory

2022-10-26 Thread Isaku Yamahata
On Tue, Oct 25, 2022 at 11:13:43PM +0800,
Chao Peng  wrote:

> A memslot with KVM_MEM_PRIVATE being set can include both fd-based
> private memory and hva-based shared memory. Architecture code (like TDX
> code) can tell whether the on-going fault is private or not. This patch
> adds a 'is_private' field to kvm_page_fault to indicate this and
> architecture code is expected to set it.
> 
> To handle page fault for such memslot, the handling logic is different
> depending on whether the fault is private or shared. KVM checks if
> 'is_private' matches the host's view of the page (maintained in
> mem_attr_array).
>   - For a successful match, private pfn is obtained with
> restrictedmem_get_page () from private fd and shared pfn is obtained
> with existing get_user_pages().
>   - For a failed match, KVM causes a KVM_EXIT_MEMORY_FAULT exit to
> userspace. Userspace then can convert memory between private/shared
> in host's view and retry the fault.
> 
> Co-developed-by: Yu Zhang 
> Signed-off-by: Yu Zhang 
> Signed-off-by: Chao Peng 
> ---
>  arch/x86/kvm/mmu/mmu.c  | 56 +++--
>  arch/x86/kvm/mmu/mmu_internal.h | 14 -
>  arch/x86/kvm/mmu/mmutrace.h |  1 +
>  arch/x86/kvm/mmu/spte.h |  6 
>  arch/x86/kvm/mmu/tdp_mmu.c  |  3 +-
>  include/linux/kvm_host.h| 28 +
>  6 files changed, 103 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 67a9823a8c35..10017a9f26ee 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3030,7 +3030,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, 
> gfn_t gfn,
>  
>  int kvm_mmu_max_mapping_level(struct kvm *kvm,
> const struct kvm_memory_slot *slot, gfn_t gfn,
> -   int max_level)
> +   int max_level, bool is_private)
>  {
>   struct kvm_lpage_info *linfo;
>   int host_level;
> @@ -3042,6 +3042,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
>   break;
>   }
>  
> + if (is_private)
> + return max_level;

Below PG_LEVEL_NUM is passed by zap_collapsible_spte_range().  It doesn't make
sense.

> +
>   if (max_level == PG_LEVEL_4K)
>   return PG_LEVEL_4K;
>  
> @@ -3070,7 +3073,8 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, 
> struct kvm_page_fault *fault
>* level, which will be used to do precise, accurate accounting.
>*/
>   fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
> -  fault->gfn, 
> fault->max_level);
> +  fault->gfn, 
> fault->max_level,
> +  fault->is_private);
>   if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
>   return;
>  
> @@ -4141,6 +4145,32 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 
> struct kvm_async_pf *work)
>   kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
>  }
>  
> +static inline u8 order_to_level(int order)
> +{
> + BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
> +
> + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
> + return PG_LEVEL_1G;
> +
> + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
> + return PG_LEVEL_2M;
> +
> + return PG_LEVEL_4K;
> +}
> +
> +static int kvm_faultin_pfn_private(struct kvm_page_fault *fault)
> +{
> + int order;
> + struct kvm_memory_slot *slot = fault->slot;
> +
> + if (kvm_restricted_mem_get_pfn(slot, fault->gfn, >pfn, ))
> + return RET_PF_RETRY;
> +
> + fault->max_level = min(order_to_level(order), fault->max_level);
> + fault->map_writable = !(slot->flags & KVM_MEM_READONLY);
> + return RET_PF_CONTINUE;
> +}
> +
>  static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault 
> *fault)
>  {
>   struct kvm_memory_slot *slot = fault->slot;
> @@ -4173,6 +4203,22 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, 
> struct kvm_page_fault *fault)
>   return RET_PF_EMULATE;
>   }
>  
> + if (kvm_slot_can_be_private(slot) &&
> + fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
> + vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
> + if (fault->is_private)
> + vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
> + else
> + vcpu->run->memory.flags = 0;
> + vcpu->run->memory.padding = 0;
> + vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
> + vcpu->run->memory.size = PAGE_SIZE;
> + return RET_PF_USER;
> + }
> +
> + if (fault->is_private)
> + return kvm_faultin_pfn_private(fault);
> +
>   async = false;
>   fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn,