Re: [PATCH v3 17/21] KVM: arm64: Mark host bss and rodata section as shared

2021-08-03 Thread Fuad Tabba
Hi Quentin,

> > > +   ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot);
> >
> > nit: for clarity, I wonder if it might be good to create an alias of
> > __hyp_bss_end as __bss_start or something. When it's been moved here,
> > it sticks out a bit more and makes the reader wonder about the
> > significance of __hyp_bss_end.
>
> I understand what you mean, but I'm not sure this aliasing is really
> going to clarify things much. We have a comment in arm.c (see
> init_hyp_mode()) to explain exactly why we're doing this, so maybe it
> would be worth adding it here too. WDYT?

Not sure to be honest. Comments are good, until they're stale, and
replicating the comment increases the odds of that happening. No
strong opinion either way.

> > > +   if (ret)
> > > +   return ret;
> > > +
> > > return 0;
> > >  }
> > >
> > > @@ -148,6 +159,57 @@ static void hpool_put_page(void *addr)
> > > hyp_put_page(&hpool, addr);
> > >  }
> > >
> > > +static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
> > > +kvm_pte_t *ptep,
> > > +enum kvm_pgtable_walk_flags flag,
> > > +void * const arg)
> > > +{
> > > +   enum kvm_pgtable_prot prot;
> > > +   enum pkvm_page_state state;
> > > +   kvm_pte_t pte = *ptep;
> > > +   phys_addr_t phys;
> > > +
> > > +   if (!kvm_pte_valid(pte))
> > > +   return 0;
> > > +
> > > +   if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
> > > +   return -EINVAL;
> >
> > I know that it's not in scope here, but I'm wondering whether we
> > should be checking for KVM_PTE_TYPE_PAGE instead of the level. Maybe
>
> Well these would check different things no?
>
> > it would be good to have a helper somewhere for all these checks both
> > for clarity and to ensure that nothing has gone wrong with the pte.
>
> The reason I need this check is just to make sure the call to
> host_stage2_idmap_locked() further down is correct with a hardcoded
> PAGE_SIZE size. The alternative would be to not be lazy and actually
> compute the current granule size based on the level and use that, as
> that would make this code robust to using block mappings at EL2 stage-1
> in the future.
>
> And I'll fix this up for v4.

I get it now. Thanks!
/fuad


> Cheers,
> Quentin
>
> > > +
> > > +   phys = kvm_pte_to_phys(pte);
> > > +   if (!addr_is_memory(phys))
> > > +   return 0;
> > > +
> > > +   /*
> > > +* Adjust the host stage-2 mappings to match the ownership 
> > > attributes
> > > +* configured in the hypervisor stage-1.
> > > +*/
> > > +   state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
> > > +   switch (state) {
> > > +   case PKVM_PAGE_OWNED:
> > > +   return host_stage2_set_owner_locked(phys, phys + 
> > > PAGE_SIZE, pkvm_hyp_id);
> > > +   case PKVM_PAGE_SHARED_OWNED:
> > > +   prot = pkvm_mkstate(KVM_PGTABLE_PROT_RWX, 
> > > PKVM_PAGE_SHARED_BORROWED);
> > > +   break;
> > > +   case PKVM_PAGE_SHARED_BORROWED:
> > > +   prot = pkvm_mkstate(KVM_PGTABLE_PROT_RWX, 
> > > PKVM_PAGE_SHARED_OWNED);
> > > +   break;
> > > +   default:
> > > +   return -EINVAL;
> > > +   }
> > > +
> > > +   return host_stage2_idmap_locked(phys, phys + PAGE_SIZE, prot);
> > > +}
> > > +
> > > +static int finalize_host_mappings(void)
> > > +{
> > > +   struct kvm_pgtable_walker walker = {
> > > +   .cb = finalize_host_mappings_walker,
> > > +   .flags  = KVM_PGTABLE_WALK_LEAF,
> > > +   };
> > > +
> > > +   return kvm_pgtable_walk(&pkvm_pgtable, 0, 
> > > BIT(pkvm_pgtable.ia_bits), &walker);
> > > +}
> > > +
> > >  void __noreturn __pkvm_init_finalise(void)
> > >  {
> > > struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
> > > @@ -167,6 +229,10 @@ void __noreturn __pkvm_init_finalise(void)
> > > if (ret)
> > > goto out;
> > >
> > > +   ret = finalize_host_mappings();
> > > +   if (ret)
> > > +   goto out;
> > > +
> > > pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
> > > .zalloc_page = hyp_zalloc_hyp_page,
> > > .phys_to_virt = hyp_phys_to_virt,
> > > --
> > > 2.32.0.432.gabb21c7263-goog
> > >
> >
> > Thanks,
> > /fuad
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 17/21] KVM: arm64: Mark host bss and rodata section as shared

2021-08-03 Thread Quentin Perret
On Tuesday 03 Aug 2021 at 07:02:42 (+0200), Fuad Tabba wrote:
> Hi Quentin,
> 
> On Thu, Jul 29, 2021 at 3:29 PM Quentin Perret  wrote:
> >
> > As the hypervisor maps the host's .bss and .rodata sections in its
> > stage-1, make sure to tag them as shared in hyp and host page-tables.
> >
> > But since the hypervisor relies on the presence of these mappings, we
> > cannot let the host in complete control of the memory regions -- it
> > must not unshare or donate them to another entity for example. To
> > prevent this, let's transfer the ownership of those ranges to the
> > hypervisor itself, and share the pages back with the host.
> >
> > Signed-off-by: Quentin Perret 
> > ---
> >  arch/arm64/kvm/hyp/nvhe/setup.c | 82 +
> >  1 file changed, 74 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c 
> > b/arch/arm64/kvm/hyp/nvhe/setup.c
> > index 0b574d106519..7f557b264f62 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/setup.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/setup.c
> > @@ -58,6 +58,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, 
> > unsigned long size,
> >  {
> > void *start, *end, *virt = hyp_phys_to_virt(phys);
> > unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT;
> > +   enum kvm_pgtable_prot prot;
> > int ret, i;
> >
> > /* Recreate the hyp page-table using the early page allocator */
> > @@ -83,10 +84,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, 
> > unsigned long size,
> > if (ret)
> > return ret;
> >
> > -   ret = pkvm_create_mappings(__start_rodata, __end_rodata, 
> > PAGE_HYP_RO);
> > -   if (ret)
> > -   return ret;
> > -
> > ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, 
> > PAGE_HYP_RO);
> > if (ret)
> > return ret;
> > @@ -95,10 +92,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, 
> > unsigned long size,
> > if (ret)
> > return ret;
> >
> > -   ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO);
> > -   if (ret)
> > -   return ret;
> > -
> > ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP);
> > if (ret)
> > return ret;
> > @@ -117,6 +110,24 @@ static int recreate_hyp_mappings(phys_addr_t phys, 
> > unsigned long size,
> > return ret;
> > }
> >
> > +   /*
> > +* Map the host's .bss and .rodata sections RO in the hypervisor, 
> > but
> > +* transfer the ownerhsip from the host to the hypervisor itself to
> > +* make sure it can't be donated or shared with another entity.
> 
> nit: ownerhsip -> ownership
> 
> > +*
> > +* The ownership transtion requires matching changes in the host
> 
> nit: transtion -> transition
> 
> > +* stage-2. This will done later (see finalize_host_mappings()) 
> > once the
> 
> nit: will done -> will be done

Urgh, I clearly went too fast writing this, thanks!

> > +* hyp_vmemmap is addressable.
> > +*/
> > +   prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED);
> > +   ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot);
> > +   if (ret)
> > +   return ret;
> > +
> > +   ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot);
> 
> nit: for clarity, I wonder if it might be good to create an alias of
> __hyp_bss_end as __bss_start or something. When it's been moved here,
> it sticks out a bit more and makes the reader wonder about the
> significance of __hyp_bss_end.

I understand what you mean, but I'm not sure this aliasing is really
going to clarify things much. We have a comment in arm.c (see
init_hyp_mode()) to explain exactly why we're doing this, so maybe it
would be worth adding it here too. WDYT?

> > +   if (ret)
> > +   return ret;
> > +
> > return 0;
> >  }
> >
> > @@ -148,6 +159,57 @@ static void hpool_put_page(void *addr)
> > hyp_put_page(&hpool, addr);
> >  }
> >
> > +static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
> > +kvm_pte_t *ptep,
> > +enum kvm_pgtable_walk_flags flag,
> > +void * const arg)
> > +{
> > +   enum kvm_pgtable_prot prot;
> > +   enum pkvm_page_state state;
> > +   kvm_pte_t pte = *ptep;
> > +   phys_addr_t phys;
> > +
> > +   if (!kvm_pte_valid(pte))
> > +   return 0;
> > +
> > +   if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
> > +   return -EINVAL;
> 
> I know that it's not in scope here, but I'm wondering whether we
> should be checking for KVM_PTE_TYPE_PAGE instead of the level. Maybe

Well these would check different things no?

> it would be good to have a helper somewhere for all these checks both
> for clarity and to ensure that nothing has gone wr

Re: [PATCH v3 17/21] KVM: arm64: Mark host bss and rodata section as shared

2021-08-03 Thread Fuad Tabba
Hi Quentin,

On Thu, Jul 29, 2021 at 3:29 PM Quentin Perret  wrote:
>
> As the hypervisor maps the host's .bss and .rodata sections in its
> stage-1, make sure to tag them as shared in hyp and host page-tables.
>
> But since the hypervisor relies on the presence of these mappings, we
> cannot let the host in complete control of the memory regions -- it
> must not unshare or donate them to another entity for example. To
> prevent this, let's transfer the ownership of those ranges to the
> hypervisor itself, and share the pages back with the host.
>
> Signed-off-by: Quentin Perret 
> ---
>  arch/arm64/kvm/hyp/nvhe/setup.c | 82 +
>  1 file changed, 74 insertions(+), 8 deletions(-)
>
> diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
> index 0b574d106519..7f557b264f62 100644
> --- a/arch/arm64/kvm/hyp/nvhe/setup.c
> +++ b/arch/arm64/kvm/hyp/nvhe/setup.c
> @@ -58,6 +58,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned 
> long size,
>  {
> void *start, *end, *virt = hyp_phys_to_virt(phys);
> unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT;
> +   enum kvm_pgtable_prot prot;
> int ret, i;
>
> /* Recreate the hyp page-table using the early page allocator */
> @@ -83,10 +84,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, 
> unsigned long size,
> if (ret)
> return ret;
>
> -   ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO);
> -   if (ret)
> -   return ret;
> -
> ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, 
> PAGE_HYP_RO);
> if (ret)
> return ret;
> @@ -95,10 +92,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, 
> unsigned long size,
> if (ret)
> return ret;
>
> -   ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO);
> -   if (ret)
> -   return ret;
> -
> ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP);
> if (ret)
> return ret;
> @@ -117,6 +110,24 @@ static int recreate_hyp_mappings(phys_addr_t phys, 
> unsigned long size,
> return ret;
> }
>
> +   /*
> +* Map the host's .bss and .rodata sections RO in the hypervisor, but
> +* transfer the ownerhsip from the host to the hypervisor itself to
> +* make sure it can't be donated or shared with another entity.

nit: ownerhsip -> ownership

> +*
> +* The ownership transtion requires matching changes in the host

nit: transtion -> transition

> +* stage-2. This will done later (see finalize_host_mappings()) once 
> the

nit: will done -> will be done

> +* hyp_vmemmap is addressable.
> +*/
> +   prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED);
> +   ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot);
> +   if (ret)
> +   return ret;
> +
> +   ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot);

nit: for clarity, I wonder if it might be good to create an alias of
__hyp_bss_end as __bss_start or something. When it's been moved here,
it sticks out a bit more and makes the reader wonder about the
significance of __hyp_bss_end.

> +   if (ret)
> +   return ret;
> +
> return 0;
>  }
>
> @@ -148,6 +159,57 @@ static void hpool_put_page(void *addr)
> hyp_put_page(&hpool, addr);
>  }
>
> +static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
> +kvm_pte_t *ptep,
> +enum kvm_pgtable_walk_flags flag,
> +void * const arg)
> +{
> +   enum kvm_pgtable_prot prot;
> +   enum pkvm_page_state state;
> +   kvm_pte_t pte = *ptep;
> +   phys_addr_t phys;
> +
> +   if (!kvm_pte_valid(pte))
> +   return 0;
> +
> +   if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
> +   return -EINVAL;

I know that it's not in scope here, but I'm wondering whether we
should be checking for KVM_PTE_TYPE_PAGE instead of the level. Maybe
it would be good to have a helper somewhere for all these checks both
for clarity and to ensure that nothing has gone wrong with the pte.

> +
> +   phys = kvm_pte_to_phys(pte);
> +   if (!addr_is_memory(phys))
> +   return 0;
> +
> +   /*
> +* Adjust the host stage-2 mappings to match the ownership attributes
> +* configured in the hypervisor stage-1.
> +*/
> +   state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
> +   switch (state) {
> +   case PKVM_PAGE_OWNED:
> +   return host_stage2_set_owner_locked(phys, phys + PAGE_SIZE, 
> pkvm_hyp_id);
> +   case PKVM_PAGE_SHARED_OWNED:
> +   prot = pkvm_mkstate(KVM_PGTABLE_PROT_RWX, 
> PKVM_PAGE_SHARED_BORROWED);
> +   brea

[PATCH v3 17/21] KVM: arm64: Mark host bss and rodata section as shared

2021-07-29 Thread Quentin Perret
As the hypervisor maps the host's .bss and .rodata sections in its
stage-1, make sure to tag them as shared in hyp and host page-tables.

But since the hypervisor relies on the presence of these mappings, we
cannot let the host in complete control of the memory regions -- it
must not unshare or donate them to another entity for example. To
prevent this, let's transfer the ownership of those ranges to the
hypervisor itself, and share the pages back with the host.

Signed-off-by: Quentin Perret 
---
 arch/arm64/kvm/hyp/nvhe/setup.c | 82 +
 1 file changed, 74 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 0b574d106519..7f557b264f62 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -58,6 +58,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned 
long size,
 {
void *start, *end, *virt = hyp_phys_to_virt(phys);
unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT;
+   enum kvm_pgtable_prot prot;
int ret, i;
 
/* Recreate the hyp page-table using the early page allocator */
@@ -83,10 +84,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned 
long size,
if (ret)
return ret;
 
-   ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO);
-   if (ret)
-   return ret;
-
ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, 
PAGE_HYP_RO);
if (ret)
return ret;
@@ -95,10 +92,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned 
long size,
if (ret)
return ret;
 
-   ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO);
-   if (ret)
-   return ret;
-
ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP);
if (ret)
return ret;
@@ -117,6 +110,24 @@ static int recreate_hyp_mappings(phys_addr_t phys, 
unsigned long size,
return ret;
}
 
+   /*
+* Map the host's .bss and .rodata sections RO in the hypervisor, but
+* transfer the ownerhsip from the host to the hypervisor itself to
+* make sure it can't be donated or shared with another entity.
+*
+* The ownership transtion requires matching changes in the host
+* stage-2. This will done later (see finalize_host_mappings()) once the
+* hyp_vmemmap is addressable.
+*/
+   prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED);
+   ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot);
+   if (ret)
+   return ret;
+
+   ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot);
+   if (ret)
+   return ret;
+
return 0;
 }
 
@@ -148,6 +159,57 @@ static void hpool_put_page(void *addr)
hyp_put_page(&hpool, addr);
 }
 
+static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
+kvm_pte_t *ptep,
+enum kvm_pgtable_walk_flags flag,
+void * const arg)
+{
+   enum kvm_pgtable_prot prot;
+   enum pkvm_page_state state;
+   kvm_pte_t pte = *ptep;
+   phys_addr_t phys;
+
+   if (!kvm_pte_valid(pte))
+   return 0;
+
+   if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
+   return -EINVAL;
+
+   phys = kvm_pte_to_phys(pte);
+   if (!addr_is_memory(phys))
+   return 0;
+
+   /*
+* Adjust the host stage-2 mappings to match the ownership attributes
+* configured in the hypervisor stage-1.
+*/
+   state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
+   switch (state) {
+   case PKVM_PAGE_OWNED:
+   return host_stage2_set_owner_locked(phys, phys + PAGE_SIZE, 
pkvm_hyp_id);
+   case PKVM_PAGE_SHARED_OWNED:
+   prot = pkvm_mkstate(KVM_PGTABLE_PROT_RWX, 
PKVM_PAGE_SHARED_BORROWED);
+   break;
+   case PKVM_PAGE_SHARED_BORROWED:
+   prot = pkvm_mkstate(KVM_PGTABLE_PROT_RWX, 
PKVM_PAGE_SHARED_OWNED);
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return host_stage2_idmap_locked(phys, phys + PAGE_SIZE, prot);
+}
+
+static int finalize_host_mappings(void)
+{
+   struct kvm_pgtable_walker walker = {
+   .cb = finalize_host_mappings_walker,
+   .flags  = KVM_PGTABLE_WALK_LEAF,
+   };
+
+   return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), 
&walker);
+}
+
 void __noreturn __pkvm_init_finalise(void)
 {
struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
@@ -167,6 +229,10 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
 
+   ret = finalize_host_mappings();
+   if (ret)
+   go