Re: [PATCH v16 1/3] mem_sharing: fix sharability check during fork reset

2020-04-22 Thread Tamas K Lengyel
On Wed, Apr 22, 2020 at 9:50 AM Roger Pau Monné  wrote:
>
> On Wed, Apr 22, 2020 at 06:42:42AM -0600, Tamas K Lengyel wrote:
> > On Wed, Apr 22, 2020 at 3:00 AM Roger Pau Monné  
> > wrote:
> > >
> > > On Tue, Apr 21, 2020 at 10:47:23AM -0700, Tamas K Lengyel wrote:
> > > > @@ -666,20 +670,31 @@ static int page_make_sharable(struct domain *d,
> > > >   */
> > > >  if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
> > > >  {
> > > > -spin_unlock(&d->page_alloc_lock);
> > > >  /* Return type count back to zero */
> > > >  put_page_and_type(page);
> > > > -return -E2BIG;
> > > > +rc = -E2BIG;
> > > > +goto out;
> > > > +}
> > > > +
> > > > +rc = 0;
> > > > +
> > > > +if ( validate_only )
> > > > +{
> > > > +put_page_and_type(page);
> > >
> > > You seem to check some page attributes but then put the page again,
> > > which looks racy to me. Since you put the page, couldn't the checks
> > > that you have performed be stale by the point the data is consumed by
> > > the caller?
> >
> > During fork reset when this is called with validate_only = true the
> > domain is paused. Furthermore, fork reset is only for forks that have
> > no device model running, so nothing is interacting with its memory
> > that could acquire extra references. So no, this isn't racy since
> > there is nothing to race against that I'm aware of. Also, this check
> > is really to check for special pages, all of which are setup during
> > the initial fork process, not during runtime of the fork.
>
> Right, it would feel safer to me however if you just return from
> page_make_sharable while having a page reference, and drop it in
> mem_sharing_fork_reset if the page shouldn't be removed from the fork.
>
> This way you could also avoid having to take an extra reference just
> after returning from nominate_page in mem_sharing_fork_reset.
> page_make_sharable already returns while having taken an extra
> reference to the page in the non validate only case anyway.

Ah yea, that would make sense. Good idea!

> > >
> > > > +goto out;
> > > >  }
> > > >
> > > >  page_set_owner(page, dom_cow);
> > > >  drop_dom_ref = !domain_adjust_tot_pages(d, -1);
> > > >  page_list_del(page, &d->page_list);
> > > > -spin_unlock(&d->page_alloc_lock);
> > > >
> > > > +out:
> > > > +if ( !validate_only )
> > > > +spin_unlock(&d->page_alloc_lock);
> > > >  if ( drop_dom_ref )
> > > >  put_domain(d);
> > > > -return 0;
> > > > +
> > > > +return rc;
> > > >  }
> > > >
> > > >  static int page_make_private(struct domain *d, struct page_info *page)
> > > > @@ -809,8 +824,8 @@ static int debug_gref(struct domain *d, grant_ref_t 
> > > > ref)
> > > >  return debug_gfn(d, gfn);
> > > >  }
> > > >
> > > > -static int nominate_page(struct domain *d, gfn_t gfn,
> > > > - int expected_refcnt, shr_handle_t *phandle)
> > > > +static int nominate_page(struct domain *d, gfn_t gfn, int 
> > > > expected_refcnt,
> > >
> > > Is there any reason for expected_refcnt to be signed? All callers use
> > > unsigned values.
> >
> > No reason. It's just how the code was written by the original author
> > and we never changed it.
>
> Since you are already changing those lines, can I ask you to also
> change it to unsigned in the places that you touch?

Sure thing.

Thanks,
Tamas



Re: [PATCH v16 1/3] mem_sharing: fix sharability check during fork reset

2020-04-22 Thread Roger Pau Monné
On Wed, Apr 22, 2020 at 06:42:42AM -0600, Tamas K Lengyel wrote:
> On Wed, Apr 22, 2020 at 3:00 AM Roger Pau Monné  wrote:
> >
> > On Tue, Apr 21, 2020 at 10:47:23AM -0700, Tamas K Lengyel wrote:
> > > @@ -666,20 +670,31 @@ static int page_make_sharable(struct domain *d,
> > >   */
> > >  if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
> > >  {
> > > -spin_unlock(&d->page_alloc_lock);
> > >  /* Return type count back to zero */
> > >  put_page_and_type(page);
> > > -return -E2BIG;
> > > +rc = -E2BIG;
> > > +goto out;
> > > +}
> > > +
> > > +rc = 0;
> > > +
> > > +if ( validate_only )
> > > +{
> > > +put_page_and_type(page);
> >
> > You seem to check some page attributes but then put the page again,
> > which looks racy to me. Since you put the page, couldn't the checks
> > that you have performed be stale by the point the data is consumed by
> > the caller?
> 
> During fork reset when this is called with validate_only = true the
> domain is paused. Furthermore, fork reset is only for forks that have
> no device model running, so nothing is interacting with its memory
> that could acquire extra references. So no, this isn't racy since
> there is nothing to race against that I'm aware of. Also, this check
> is really to check for special pages, all of which are setup during
> the initial fork process, not during runtime of the fork.

Right, it would feel safer to me however if you just return from
page_make_sharable while having a page reference, and drop it in
mem_sharing_fork_reset if the page shouldn't be removed from the fork.

This way you could also avoid having to take an extra reference just
after returning from nominate_page in mem_sharing_fork_reset.
page_make_sharable already returns while having taken an extra
reference to the page in the non validate only case anyway.

> >
> > > +goto out;
> > >  }
> > >
> > >  page_set_owner(page, dom_cow);
> > >  drop_dom_ref = !domain_adjust_tot_pages(d, -1);
> > >  page_list_del(page, &d->page_list);
> > > -spin_unlock(&d->page_alloc_lock);
> > >
> > > +out:
> > > +if ( !validate_only )
> > > +spin_unlock(&d->page_alloc_lock);
> > >  if ( drop_dom_ref )
> > >  put_domain(d);
> > > -return 0;
> > > +
> > > +return rc;
> > >  }
> > >
> > >  static int page_make_private(struct domain *d, struct page_info *page)
> > > @@ -809,8 +824,8 @@ static int debug_gref(struct domain *d, grant_ref_t 
> > > ref)
> > >  return debug_gfn(d, gfn);
> > >  }
> > >
> > > -static int nominate_page(struct domain *d, gfn_t gfn,
> > > - int expected_refcnt, shr_handle_t *phandle)
> > > +static int nominate_page(struct domain *d, gfn_t gfn, int 
> > > expected_refcnt,
> >
> > Is there any reason for expected_refcnt to be signed? All callers use
> > unsigned values.
> 
> No reason. It's just how the code was written by the original author
> and we never changed it.

Since you are already changing those lines, can I ask you to also
change it to unsigned in the places that you touch?

Thanks, Roger.



Re: [PATCH v16 1/3] mem_sharing: fix sharability check during fork reset

2020-04-22 Thread Tamas K Lengyel
On Wed, Apr 22, 2020 at 3:00 AM Roger Pau Monné  wrote:
>
> On Tue, Apr 21, 2020 at 10:47:23AM -0700, Tamas K Lengyel wrote:
> > When resetting a VM fork we ought to only remove pages that were allocated 
> > for
> > the fork during it's execution and the contents copied over from the parent.
> > This can be determined if the page is sharable as special pages used by the
> > fork for other purposes will not pass this test.
>
> Would it be easier to just check if the page refcount is > 1? (as I
> expect Xen is also holding a reference to this page)

That by itself is not necessarily enough.

>
> > Unfortunately during the fork
> > reset loop we only partially check whether that's the case. A page's type 
> > may
> > indicate it is sharable (pass p2m_is_sharable) but that's not a sufficient
> > check by itself. All checks that are normally performed before a page is
> > converted to the sharable type need to be performed to avoid removing pages
> > from the p2m that may be used for other purposes. For example, currently the
> > reset loop also removes the vcpu info pages from the p2m, potentially 
> > putting
> > the guest into infinite page-fault loops.
> >
> > For this we extend the existing nominate_page and page_make_sharable 
> > functions
> > to perform a validation-only run without actually converting the page.
>
> Maybe you could split that chunk into a separate helper that just
> performs the checks?

I think it's fine this way that we just bail half-way through the
process of making the page shared. Splitting this out to a helper
would require a lot more code-shuffling.

>
> > Signed-off-by: Tamas K Lengyel 
> > ---
> >  xen/arch/x86/mm/mem_sharing.c | 79 ++-
> >  1 file changed, 50 insertions(+), 29 deletions(-)
> >
> > diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
> > index e572e9e39d..d8ed660abb 100644
> > --- a/xen/arch/x86/mm/mem_sharing.c
> > +++ b/xen/arch/x86/mm/mem_sharing.c
> > @@ -633,31 +633,35 @@ unsigned int mem_sharing_get_nr_shared_mfns(void)
> >  /* Functions that change a page's type and ownership */
> >  static int page_make_sharable(struct domain *d,
> >struct page_info *page,
> > -  int expected_refcnt)
> > +  int expected_refcnt,
> > +  bool validate_only)
> >  {
> > -bool_t drop_dom_ref;
> > +int rc;
> > +bool drop_dom_ref = false;
> >
> > -spin_lock(&d->page_alloc_lock);
> > +/* caller already has the lock when validating only */
> > +if ( !validate_only )
> > +spin_lock(&d->page_alloc_lock);
>
> page_alloc_lock seems to be used as a recursive lock by some callers,
> could you do the same here?

We can do that, yes.

>
> >
> >  if ( d->is_dying )
> >  {
> > -spin_unlock(&d->page_alloc_lock);
> > -return -EBUSY;
> > +rc = -EBUSY;
> > +goto out;
> >  }
> >
> >  /* Change page type and count atomically */
> >  if ( !get_page_and_type(page, d, PGT_shared_page) )
> >  {
> > -spin_unlock(&d->page_alloc_lock);
> > -return -EINVAL;
> > +rc = -EINVAL;
> > +goto out;
> >  }
> >
> >  /* Check it wasn't already sharable and undo if it was */
> >  if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
> >  {
> > -spin_unlock(&d->page_alloc_lock);
> >  put_page_and_type(page);
> > -return -EEXIST;
> > +rc = -EEXIST;
> > +goto out;
> >  }
> >
> >  /*
> > @@ -666,20 +670,31 @@ static int page_make_sharable(struct domain *d,
> >   */
> >  if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
> >  {
> > -spin_unlock(&d->page_alloc_lock);
> >  /* Return type count back to zero */
> >  put_page_and_type(page);
> > -return -E2BIG;
> > +rc = -E2BIG;
> > +goto out;
> > +}
> > +
> > +rc = 0;
> > +
> > +if ( validate_only )
> > +{
> > +put_page_and_type(page);
>
> You seem to check some page attributes but then put the page again,
> which looks racy to me. Since you put the page, couldn't the checks
> that you have performed be stale by the point the data is consumed by
> the caller?

During fork reset when this is called with validate_only = true the
domain is paused. Furthermore, fork reset is only for forks that have
no device model running, so nothing is interacting with its memory
that could acquire extra references. So no, this isn't racy since
there is nothing to race against that I'm aware of. Also, this check
is really to check for special pages, all of which are setup during
the initial fork process, not during runtime of the fork.

>
> > +goto out;
> >  }
> >
> >  page_set_owner(page, dom_cow);
> >  drop_dom_ref = !domain_adjust_tot_pages(d, -1);
> >  page_list_del(page, &d->page_list);
> > -spin_unlock(&d->page

Re: [PATCH v16 1/3] mem_sharing: fix sharability check during fork reset

2020-04-22 Thread Roger Pau Monné
On Tue, Apr 21, 2020 at 10:47:23AM -0700, Tamas K Lengyel wrote:
> When resetting a VM fork we ought to only remove pages that were allocated for
> the fork during it's execution and the contents copied over from the parent.
> This can be determined if the page is sharable as special pages used by the
> fork for other purposes will not pass this test.

Would it be easier to just check if the page refcount is > 1? (as I
expect Xen is also holding a reference to this page)

> Unfortunately during the fork
> reset loop we only partially check whether that's the case. A page's type may
> indicate it is sharable (pass p2m_is_sharable) but that's not a sufficient
> check by itself. All checks that are normally performed before a page is
> converted to the sharable type need to be performed to avoid removing pages
> from the p2m that may be used for other purposes. For example, currently the
> reset loop also removes the vcpu info pages from the p2m, potentially putting
> the guest into infinite page-fault loops.
> 
> For this we extend the existing nominate_page and page_make_sharable functions
> to perform a validation-only run without actually converting the page.

Maybe you could split that chunk into a separate helper that just
performs the checks?

> Signed-off-by: Tamas K Lengyel 
> ---
>  xen/arch/x86/mm/mem_sharing.c | 79 ++-
>  1 file changed, 50 insertions(+), 29 deletions(-)
> 
> diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
> index e572e9e39d..d8ed660abb 100644
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -633,31 +633,35 @@ unsigned int mem_sharing_get_nr_shared_mfns(void)
>  /* Functions that change a page's type and ownership */
>  static int page_make_sharable(struct domain *d,
>struct page_info *page,
> -  int expected_refcnt)
> +  int expected_refcnt,
> +  bool validate_only)
>  {
> -bool_t drop_dom_ref;
> +int rc;
> +bool drop_dom_ref = false;
>  
> -spin_lock(&d->page_alloc_lock);
> +/* caller already has the lock when validating only */
> +if ( !validate_only )
> +spin_lock(&d->page_alloc_lock);

page_alloc_lock seems to be used as a recursive lock by some callers,
could you do the same here?

>  
>  if ( d->is_dying )
>  {
> -spin_unlock(&d->page_alloc_lock);
> -return -EBUSY;
> +rc = -EBUSY;
> +goto out;
>  }
>  
>  /* Change page type and count atomically */
>  if ( !get_page_and_type(page, d, PGT_shared_page) )
>  {
> -spin_unlock(&d->page_alloc_lock);
> -return -EINVAL;
> +rc = -EINVAL;
> +goto out;
>  }
>  
>  /* Check it wasn't already sharable and undo if it was */
>  if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
>  {
> -spin_unlock(&d->page_alloc_lock);
>  put_page_and_type(page);
> -return -EEXIST;
> +rc = -EEXIST;
> +goto out;
>  }
>  
>  /*
> @@ -666,20 +670,31 @@ static int page_make_sharable(struct domain *d,
>   */
>  if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
>  {
> -spin_unlock(&d->page_alloc_lock);
>  /* Return type count back to zero */
>  put_page_and_type(page);
> -return -E2BIG;
> +rc = -E2BIG;
> +goto out;
> +}
> +
> +rc = 0;
> +
> +if ( validate_only )
> +{
> +put_page_and_type(page);

You seem to check some page attributes but then put the page again,
which looks racy to me. Since you put the page, couldn't the checks
that you have performed be stale by the point the data is consumed by
the caller?

> +goto out;
>  }
>  
>  page_set_owner(page, dom_cow);
>  drop_dom_ref = !domain_adjust_tot_pages(d, -1);
>  page_list_del(page, &d->page_list);
> -spin_unlock(&d->page_alloc_lock);
>  
> +out:
> +if ( !validate_only )
> +spin_unlock(&d->page_alloc_lock);
>  if ( drop_dom_ref )
>  put_domain(d);
> -return 0;
> +
> +return rc;
>  }
>  
>  static int page_make_private(struct domain *d, struct page_info *page)
> @@ -809,8 +824,8 @@ static int debug_gref(struct domain *d, grant_ref_t ref)
>  return debug_gfn(d, gfn);
>  }
>  
> -static int nominate_page(struct domain *d, gfn_t gfn,
> - int expected_refcnt, shr_handle_t *phandle)
> +static int nominate_page(struct domain *d, gfn_t gfn, int expected_refcnt,

Is there any reason for expected_refcnt to be signed? All callers use
unsigned values.

Thanks, Roger.



[PATCH v16 1/3] mem_sharing: fix sharability check during fork reset

2020-04-21 Thread Tamas K Lengyel
When resetting a VM fork we ought to only remove pages that were allocated for
the fork during it's execution and the contents copied over from the parent.
This can be determined if the page is sharable as special pages used by the
fork for other purposes will not pass this test. Unfortunately during the fork
reset loop we only partially check whether that's the case. A page's type may
indicate it is sharable (pass p2m_is_sharable) but that's not a sufficient
check by itself. All checks that are normally performed before a page is
converted to the sharable type need to be performed to avoid removing pages
from the p2m that may be used for other purposes. For example, currently the
reset loop also removes the vcpu info pages from the p2m, potentially putting
the guest into infinite page-fault loops.

For this we extend the existing nominate_page and page_make_sharable functions
to perform a validation-only run without actually converting the page.

Signed-off-by: Tamas K Lengyel 
---
 xen/arch/x86/mm/mem_sharing.c | 79 ++-
 1 file changed, 50 insertions(+), 29 deletions(-)

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index e572e9e39d..d8ed660abb 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -633,31 +633,35 @@ unsigned int mem_sharing_get_nr_shared_mfns(void)
 /* Functions that change a page's type and ownership */
 static int page_make_sharable(struct domain *d,
   struct page_info *page,
-  int expected_refcnt)
+  int expected_refcnt,
+  bool validate_only)
 {
-bool_t drop_dom_ref;
+int rc;
+bool drop_dom_ref = false;
 
-spin_lock(&d->page_alloc_lock);
+/* caller already has the lock when validating only */
+if ( !validate_only )
+spin_lock(&d->page_alloc_lock);
 
 if ( d->is_dying )
 {
-spin_unlock(&d->page_alloc_lock);
-return -EBUSY;
+rc = -EBUSY;
+goto out;
 }
 
 /* Change page type and count atomically */
 if ( !get_page_and_type(page, d, PGT_shared_page) )
 {
-spin_unlock(&d->page_alloc_lock);
-return -EINVAL;
+rc = -EINVAL;
+goto out;
 }
 
 /* Check it wasn't already sharable and undo if it was */
 if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
 {
-spin_unlock(&d->page_alloc_lock);
 put_page_and_type(page);
-return -EEXIST;
+rc = -EEXIST;
+goto out;
 }
 
 /*
@@ -666,20 +670,31 @@ static int page_make_sharable(struct domain *d,
  */
 if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
 {
-spin_unlock(&d->page_alloc_lock);
 /* Return type count back to zero */
 put_page_and_type(page);
-return -E2BIG;
+rc = -E2BIG;
+goto out;
+}
+
+rc = 0;
+
+if ( validate_only )
+{
+put_page_and_type(page);
+goto out;
 }
 
 page_set_owner(page, dom_cow);
 drop_dom_ref = !domain_adjust_tot_pages(d, -1);
 page_list_del(page, &d->page_list);
-spin_unlock(&d->page_alloc_lock);
 
+out:
+if ( !validate_only )
+spin_unlock(&d->page_alloc_lock);
 if ( drop_dom_ref )
 put_domain(d);
-return 0;
+
+return rc;
 }
 
 static int page_make_private(struct domain *d, struct page_info *page)
@@ -809,8 +824,8 @@ static int debug_gref(struct domain *d, grant_ref_t ref)
 return debug_gfn(d, gfn);
 }
 
-static int nominate_page(struct domain *d, gfn_t gfn,
- int expected_refcnt, shr_handle_t *phandle)
+static int nominate_page(struct domain *d, gfn_t gfn, int expected_refcnt,
+ bool validate_only, shr_handle_t *phandle)
 {
 struct p2m_domain *hp2m = p2m_get_hostp2m(d);
 p2m_type_t p2mt;
@@ -879,8 +894,8 @@ static int nominate_page(struct domain *d, gfn_t gfn,
 }
 
 /* Try to convert the mfn to the sharable type */
-ret = page_make_sharable(d, page, expected_refcnt);
-if ( ret )
+ret = page_make_sharable(d, page, expected_refcnt, validate_only);
+if ( ret || validate_only )
 goto out;
 
 /*
@@ -1392,13 +1407,13 @@ static int range_share(struct domain *d, struct domain 
*cd,
  * We only break out if we run out of memory as individual pages may
  * legitimately be unsharable and we just want to skip over those.
  */
-rc = nominate_page(d, _gfn(start), 0, &sh);
+rc = nominate_page(d, _gfn(start), 0, false, &sh);
 if ( rc == -ENOMEM )
 break;
 
 if ( !rc )
 {
-rc = nominate_page(cd, _gfn(start), 0, &ch);
+rc = nominate_page(cd, _gfn(start), 0, false, &ch);
 if ( rc == -ENOMEM )
 break;
 
@@ -1476,7 +1491,7 @@ int mem_sharing_fork_page(struct domain *d, gfn_t gfn, 
bool uns