Re: [PATCH v5 29/36] KVM: arm64: Use page-table to track page ownership

2021-03-15 Thread Will Deacon
On Mon, Mar 15, 2021 at 04:53:18PM +, Quentin Perret wrote:
> On Monday 15 Mar 2021 at 16:36:19 (+), Will Deacon wrote:
> > On Mon, Mar 15, 2021 at 02:35:29PM +, Quentin Perret wrote:
> > > As the host stage 2 will be identity mapped, all the .hyp memory regions
> > > and/or memory pages donated to protected guestis will have to marked
> > > invalid in the host stage 2 page-table. At the same time, the hypervisor
> > > will need a way to track the ownership of each physical page to ensure
> > > memory sharing or donation between entities (host, guests, hypervisor) is
> > > legal.
> > > 
> > > In order to enable this tracking at EL2, let's use the host stage 2
> > > page-table itself. The idea is to use the top bits of invalid mappings
> > > to store the unique identifier of the page owner. The page-table owner
> > > (the host) gets identifier 0 such that, at boot time, it owns the entire
> > > IPA space as the pgd starts zeroed.
> > > 
> > > Provide kvm_pgtable_stage2_set_owner() which allows to modify the
> > > ownership of pages in the host stage 2. It re-uses most of the map()
> > > logic, but ends up creating invalid mappings instead. This impacts
> > > how we do refcount as we now need to count invalid mappings when they
> > > are used for ownership tracking.
> > > 
> > > Signed-off-by: Quentin Perret 
> > > ---
> > >  arch/arm64/include/asm/kvm_pgtable.h |  21 +
> > >  arch/arm64/kvm/hyp/pgtable.c | 127 ++-
> > >  2 files changed, 124 insertions(+), 24 deletions(-)
> > > 
> > > diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
> > > b/arch/arm64/include/asm/kvm_pgtable.h
> > > index 4ae19247837b..683e96abdc24 100644
> > > --- a/arch/arm64/include/asm/kvm_pgtable.h
> > > +++ b/arch/arm64/include/asm/kvm_pgtable.h
> > > @@ -238,6 +238,27 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, 
> > > u64 addr, u64 size,
> > >  u64 phys, enum kvm_pgtable_prot prot,
> > >  void *mc);
> > >  
> > > +/**
> > > + * kvm_pgtable_stage2_set_owner() - Annotate invalid mappings with 
> > > metadata
> > > + *   encoding the ownership of a page in 
> > > the
> > > + *   IPA space.
> > 
> > The function does more than this, though, as it will also go ahead and unmap
> > existing valid mappings which I think should be mentioned here, no?
> 
> Right, I see why you mean. How about:
> 
> 'Unmap and annotate pages in the IPA space to track ownership'

I think I'd go with:

'Unmap pages and annotate the invalid mappings with ownership metadata for
 the unmapped IPA range.'

as it's the page-table which is annotated, not the actual pages (which could
potentially be mapped by other page-tables).

Will


Re: [PATCH v5 29/36] KVM: arm64: Use page-table to track page ownership

2021-03-15 Thread Quentin Perret
On Monday 15 Mar 2021 at 16:36:19 (+), Will Deacon wrote:
> On Mon, Mar 15, 2021 at 02:35:29PM +, Quentin Perret wrote:
> > As the host stage 2 will be identity mapped, all the .hyp memory regions
> > and/or memory pages donated to protected guestis will have to marked
> > invalid in the host stage 2 page-table. At the same time, the hypervisor
> > will need a way to track the ownership of each physical page to ensure
> > memory sharing or donation between entities (host, guests, hypervisor) is
> > legal.
> > 
> > In order to enable this tracking at EL2, let's use the host stage 2
> > page-table itself. The idea is to use the top bits of invalid mappings
> > to store the unique identifier of the page owner. The page-table owner
> > (the host) gets identifier 0 such that, at boot time, it owns the entire
> > IPA space as the pgd starts zeroed.
> > 
> > Provide kvm_pgtable_stage2_set_owner() which allows to modify the
> > ownership of pages in the host stage 2. It re-uses most of the map()
> > logic, but ends up creating invalid mappings instead. This impacts
> > how we do refcount as we now need to count invalid mappings when they
> > are used for ownership tracking.
> > 
> > Signed-off-by: Quentin Perret 
> > ---
> >  arch/arm64/include/asm/kvm_pgtable.h |  21 +
> >  arch/arm64/kvm/hyp/pgtable.c | 127 ++-
> >  2 files changed, 124 insertions(+), 24 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
> > b/arch/arm64/include/asm/kvm_pgtable.h
> > index 4ae19247837b..683e96abdc24 100644
> > --- a/arch/arm64/include/asm/kvm_pgtable.h
> > +++ b/arch/arm64/include/asm/kvm_pgtable.h
> > @@ -238,6 +238,27 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, 
> > u64 addr, u64 size,
> >u64 phys, enum kvm_pgtable_prot prot,
> >void *mc);
> >  
> > +/**
> > + * kvm_pgtable_stage2_set_owner() - Annotate invalid mappings with metadata
> > + * encoding the ownership of a page in the
> > + * IPA space.
> 
> The function does more than this, though, as it will also go ahead and unmap
> existing valid mappings which I think should be mentioned here, no?

Right, I see why you mean. How about:

'Unmap and annotate pages in the IPA space to track ownership'

> > +int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 
> > size,
> > +void *mc, u8 owner_id)
> > +{
> > +   int ret;
> > +   struct stage2_map_data map_data = {
> > +   .phys   = KVM_PHYS_INVALID,
> > +   .mmu= pgt->mmu,
> > +   .memcache   = mc,
> > +   .mm_ops = pgt->mm_ops,
> > +   .owner_id   = owner_id,
> > +   };
> > +   struct kvm_pgtable_walker walker = {
> > +   .cb = stage2_map_walker,
> > +   .flags  = KVM_PGTABLE_WALK_TABLE_PRE |
> > + KVM_PGTABLE_WALK_LEAF |
> > + KVM_PGTABLE_WALK_TABLE_POST,
> > +   .arg= &map_data,
> > +   };
> > +
> > +   if (owner_id > KVM_MAX_OWNER_ID)
> > +   return -EINVAL;
> > +
> > +   ret = kvm_pgtable_walk(pgt, addr, size, &walker);
> > +   dsb(ishst);
> 
> Why is the DSB needed here? afaict, we only ever unmap a valid entry (which
> will have a DSB as part of the TLBI sequence) or we update the owner for an
> existing invalid entry, in which case the walker doesn't care.

Indeed, that is now unnecessary. I'll remove it.

Thanks,
Quentin


Re: [PATCH v5 29/36] KVM: arm64: Use page-table to track page ownership

2021-03-15 Thread Will Deacon
On Mon, Mar 15, 2021 at 02:35:29PM +, Quentin Perret wrote:
> As the host stage 2 will be identity mapped, all the .hyp memory regions
> and/or memory pages donated to protected guestis will have to marked
> invalid in the host stage 2 page-table. At the same time, the hypervisor
> will need a way to track the ownership of each physical page to ensure
> memory sharing or donation between entities (host, guests, hypervisor) is
> legal.
> 
> In order to enable this tracking at EL2, let's use the host stage 2
> page-table itself. The idea is to use the top bits of invalid mappings
> to store the unique identifier of the page owner. The page-table owner
> (the host) gets identifier 0 such that, at boot time, it owns the entire
> IPA space as the pgd starts zeroed.
> 
> Provide kvm_pgtable_stage2_set_owner() which allows to modify the
> ownership of pages in the host stage 2. It re-uses most of the map()
> logic, but ends up creating invalid mappings instead. This impacts
> how we do refcount as we now need to count invalid mappings when they
> are used for ownership tracking.
> 
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/include/asm/kvm_pgtable.h |  21 +
>  arch/arm64/kvm/hyp/pgtable.c | 127 ++-
>  2 files changed, 124 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
> b/arch/arm64/include/asm/kvm_pgtable.h
> index 4ae19247837b..683e96abdc24 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -238,6 +238,27 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 
> addr, u64 size,
>  u64 phys, enum kvm_pgtable_prot prot,
>  void *mc);
>  
> +/**
> + * kvm_pgtable_stage2_set_owner() - Annotate invalid mappings with metadata
> + *   encoding the ownership of a page in the
> + *   IPA space.

The function does more than this, though, as it will also go ahead and unmap
existing valid mappings which I think should be mentioned here, no?

> +int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
> +  void *mc, u8 owner_id)
> +{
> + int ret;
> + struct stage2_map_data map_data = {
> + .phys   = KVM_PHYS_INVALID,
> + .mmu= pgt->mmu,
> + .memcache   = mc,
> + .mm_ops = pgt->mm_ops,
> + .owner_id   = owner_id,
> + };
> + struct kvm_pgtable_walker walker = {
> + .cb = stage2_map_walker,
> + .flags  = KVM_PGTABLE_WALK_TABLE_PRE |
> +   KVM_PGTABLE_WALK_LEAF |
> +   KVM_PGTABLE_WALK_TABLE_POST,
> + .arg= &map_data,
> + };
> +
> + if (owner_id > KVM_MAX_OWNER_ID)
> + return -EINVAL;
> +
> + ret = kvm_pgtable_walk(pgt, addr, size, &walker);
> + dsb(ishst);

Why is the DSB needed here? afaict, we only ever unmap a valid entry (which
will have a DSB as part of the TLBI sequence) or we update the owner for an
existing invalid entry, in which case the walker doesn't care.

Will


[PATCH v5 29/36] KVM: arm64: Use page-table to track page ownership

2021-03-15 Thread Quentin Perret
As the host stage 2 will be identity mapped, all the .hyp memory regions
and/or memory pages donated to protected guestis will have to marked
invalid in the host stage 2 page-table. At the same time, the hypervisor
will need a way to track the ownership of each physical page to ensure
memory sharing or donation between entities (host, guests, hypervisor) is
legal.

In order to enable this tracking at EL2, let's use the host stage 2
page-table itself. The idea is to use the top bits of invalid mappings
to store the unique identifier of the page owner. The page-table owner
(the host) gets identifier 0 such that, at boot time, it owns the entire
IPA space as the pgd starts zeroed.

Provide kvm_pgtable_stage2_set_owner() which allows to modify the
ownership of pages in the host stage 2. It re-uses most of the map()
logic, but ends up creating invalid mappings instead. This impacts
how we do refcount as we now need to count invalid mappings when they
are used for ownership tracking.

Signed-off-by: Quentin Perret 
---
 arch/arm64/include/asm/kvm_pgtable.h |  21 +
 arch/arm64/kvm/hyp/pgtable.c | 127 ++-
 2 files changed, 124 insertions(+), 24 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h 
b/arch/arm64/include/asm/kvm_pgtable.h
index 4ae19247837b..683e96abdc24 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -238,6 +238,27 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 
addr, u64 size,
   u64 phys, enum kvm_pgtable_prot prot,
   void *mc);
 
+/**
+ * kvm_pgtable_stage2_set_owner() - Annotate invalid mappings with metadata
+ * encoding the ownership of a page in the
+ * IPA space.
+ * @pgt:   Page-table structure initialised by kvm_pgtable_stage2_init().
+ * @addr:  Base intermediate physical address to annotate.
+ * @size:  Size of the annotated range.
+ * @mc:Cache of pre-allocated and zeroed memory from which to 
allocate
+ * page-table pages.
+ * @owner_id:  Unique identifier for the owner of the page.
+ *
+ * By default, all page-tables are owned by identifier 0. This function can be
+ * used to mark portions of the IPA space as owned by other entities. When a
+ * stage 2 is used with identity-mappings, these annotations allow to use the
+ * page-table data structure as a simple rmap.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
+void *mc, u8 owner_id);
+
 /**
  * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 
page-table.
  * @pgt:   Page-table structure initialised by kvm_pgtable_stage2_init().
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f37b4179b880..bd44e84dedc4 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -48,6 +48,9 @@
 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
 KVM_PTE_LEAF_ATTR_HI_S2_XN)
 
+#define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 56)
+#define KVM_MAX_OWNER_ID   1
+
 struct kvm_pgtable_walk_data {
struct kvm_pgtable  *pgt;
struct kvm_pgtable_walker   *walker;
@@ -67,6 +70,13 @@ static u64 kvm_granule_size(u32 level)
return BIT(kvm_granule_shift(level));
 }
 
+#define KVM_PHYS_INVALID (-1ULL)
+
+static bool kvm_phys_is_valid(u64 phys)
+{
+   return phys < 
BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX));
+}
+
 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
 {
u64 granule = kvm_granule_size(level);
@@ -81,7 +91,10 @@ static bool kvm_block_mapping_supported(u64 addr, u64 end, 
u64 phys, u32 level)
if (granule > (end - addr))
return false;
 
-   return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule);
+   if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
+   return false;
+
+   return IS_ALIGNED(addr, granule);
 }
 
 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
@@ -186,6 +199,11 @@ static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t 
attr, u32 level)
return pte;
 }
 
+static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
+{
+   return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
+}
+
 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
  u32 level, kvm_pte_t *ptep,
  enum kvm_pgtable_walk_flags flag)
@@ -440,6 +458,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
 struct stage2_map_data {
u64 phys;
kvm_pte_t   attr;
+   u8