Re: [PATCH v2 04/11] xen/memory: Fix acquire_resource size semantics

2020-09-25 Thread Jan Beulich
On 22.09.2020 20:24, Andrew Cooper wrote:
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -1007,6 +1007,26 @@ static long xatp_permission_check(struct domain *d, 
> unsigned int space)
>  return xsm_add_to_physmap(XSM_TARGET, current->domain, d);
>  }
>  
> +/*
> + * Return 0 on any kind of error.  Caller converts to -EINVAL.
> + *
> + * All nonzero values should be repeatable (i.e. derived from some fixed
> + * property of the domain), and describe the full resource (i.e. mapping the
> + * result of this call will be the entire resource).
> + */
> +static unsigned int resource_max_frames(struct domain *d,

With the lockless intentions I think this could be const from
here on through all the descendants. With this
Reviewed-by: Jan Beulich 
albeit I have one more minor remark:

> @@ -1058,6 +1066,27 @@ static int acquire_resource(
>  if ( rc )
>  goto out;
>  
> +max_frames = resource_max_frames(d, xmar.type, xmar.id);
> +
> +rc = -EINVAL;
> +if ( !max_frames )
> +goto out;
> +
> +if ( guest_handle_is_null(xmar.frame_list) )
> +{
> +if ( xmar.nr_frames )
> +goto out;
> +
> +xmar.nr_frames = max_frames;
> +
> +rc = -EFAULT;
> +if ( __copy_field_to_guest(arg, &xmar, nr_frames) )
> +goto out;
> +
> +rc = 0;
> +goto out;
> +}

That's a lot of "goto out" here. I don't suppose I could talk you
into reducing their amount some, since at least the last two look
to be easy to fold?

Jan



Re: [PATCH v2 04/11] xen/memory: Fix acquire_resource size semantics

2020-09-24 Thread Andrew Cooper
On 24/09/2020 11:06, Paul Durrant wrote:
>> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
>> index d1cfc8fb4a..e82307bdae 100644
>> --- a/xen/arch/x86/mm.c
>> +++ b/xen/arch/x86/mm.c
>> @@ -4591,6 +4591,26 @@ int xenmem_add_to_physmap_one(
>>  return rc;
>>  }
>>
>> +unsigned int arch_resource_max_frames(
>> +struct domain *d, unsigned int type, unsigned int id)
>> +{
>> +unsigned int nr = 0;
>> +
>> +switch ( type )
>> +{
>> +#ifdef CONFIG_HVM
>> +case XENMEM_resource_ioreq_server:
>> +if ( !is_hvm_domain(d) )
>> +break;
>> +/* One frame for the buf-ioreq ring, and one frame per 128 vcpus. */
>> +nr = 1 + DIV_ROUND_UP(d->max_vcpus * sizeof(struct ioreq), 
>> PAGE_SIZE);
> The buf-ioreq ring is optional

Yes, but it's position within the resource, and effect on the position
of the ioreq page(s), is not.

>  so a caller using this value may still get a resource acquisition failure 
> unless the id is used to actually look up and check the ioreq server in 
> question for the actual maximum.

Yes, but that is potentially true of *any* acquisition attempt, even if
the id matches, because maybe someone else has destroyed the ioreq
server, or the domain, in the meantime.


What we have is an mmap() where the caller needs to know to not try and
map page 0 for an ioreq server where buf-ioreq doesn't exist.

This is a direct consequence of:

#define XENMEM_resource_ioreq_server_frame_bufioreq 0
#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n))

and in practice, what a qemu/demu/other needs to do to map just the
ioreq frames (in a manner compatible with >127 vcpu HVM domains) is to
query the resource size and map size-1 pages from offset 1. 

~Andrew



RE: [PATCH v2 04/11] xen/memory: Fix acquire_resource size semantics

2020-09-24 Thread Paul Durrant
> -Original Message-
> From: Andrew Cooper 
> Sent: 24 September 2020 11:58
> To: p...@xen.org; 'Xen-devel' 
> Cc: 'George Dunlap' ; 'Ian Jackson' 
> ; 'Jan Beulich'
> ; 'Stefano Stabellini' ; 'Wei Liu' 
> ; 'Julien
> Grall' ; 'Michał Leszczyński' ; 
> 'Hubert Jasudowicz'
> ; 'Tamas K Lengyel' 
> Subject: Re: [PATCH v2 04/11] xen/memory: Fix acquire_resource size semantics
> 
> On 24/09/2020 11:06, Paul Durrant wrote:
> >> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> >> index d1cfc8fb4a..e82307bdae 100644
> >> --- a/xen/arch/x86/mm.c
> >> +++ b/xen/arch/x86/mm.c
> >> @@ -4591,6 +4591,26 @@ int xenmem_add_to_physmap_one(
> >>  return rc;
> >>  }
> >>
> >> +unsigned int arch_resource_max_frames(
> >> +struct domain *d, unsigned int type, unsigned int id)
> >> +{
> >> +unsigned int nr = 0;
> >> +
> >> +switch ( type )
> >> +{
> >> +#ifdef CONFIG_HVM
> >> +case XENMEM_resource_ioreq_server:
> >> +if ( !is_hvm_domain(d) )
> >> +break;
> >> +/* One frame for the buf-ioreq ring, and one frame per 128 vcpus. 
> >> */
> >> +nr = 1 + DIV_ROUND_UP(d->max_vcpus * sizeof(struct ioreq), 
> >> PAGE_SIZE);
> > The buf-ioreq ring is optional
> 
> Yes, but it's position within the resource, and effect on the position
> of the ioreq page(s), is not.

Oh yes, I was forgetting that this is fixed so...

Reviewed-by: Paul Durrant 

> 
> >  so a caller using this value may still get a resource acquisition failure 
> > unless the id is used to
> actually look up and check the ioreq server in question for the actual 
> maximum.
> 
> Yes, but that is potentially true of *any* acquisition attempt, even if
> the id matches, because maybe someone else has destroyed the ioreq
> server, or the domain, in the meantime.
> 
> 
> What we have is an mmap() where the caller needs to know to not try and
> map page 0 for an ioreq server where buf-ioreq doesn't exist.
> 
> This is a direct consequence of:
> 
> #define XENMEM_resource_ioreq_server_frame_bufioreq 0
> #define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n))
> 
> and in practice, what a qemu/demu/other needs to do to map just the
> ioreq frames (in a manner compatible with >127 vcpu HVM domains) is to
> query the resource size and map size-1 pages from offset 1.

Yes.

  Paul

> 
> ~Andrew




RE: [PATCH v2 04/11] xen/memory: Fix acquire_resource size semantics

2020-09-24 Thread Paul Durrant
> -Original Message-
> From: Andrew Cooper 
> Sent: 22 September 2020 19:25
> To: Xen-devel 
> Cc: Andrew Cooper ; George Dunlap 
> ; Ian
> Jackson ; Jan Beulich ; Stefano 
> Stabellini
> ; Wei Liu ; Julien Grall 
> ; Paul Durrant
> ; Michał Leszczyński ; Hubert 
> Jasudowicz
> ; Tamas K Lengyel 
> Subject: [PATCH v2 04/11] xen/memory: Fix acquire_resource size semantics
> 
> Calling XENMEM_acquire_resource with a NULL frame_list is a request for the
> size of the resource, but the returned 32 is bogus.
> 
> If someone tries to follow it for XENMEM_resource_ioreq_server, the acquire
> call will fail as IOREQ servers currently top out at 2 frames, and it is only
> half the size of the default grant table limit for guests.
> 
> Also, no users actually request a resource size, because it was never wired up
> in the sole implementaion of resource acquisition in Linux.
> 
> Introduce a new resource_max_frames() to calculate the size of a resource, and
> implement it the IOREQ and grant subsystems.
> 
> It is impossible to guarantee that a mapping call following a successful size
> call will succeed (e.g. The target IOREQ server gets destroyed, or the domain
> switches from grant v2 to v1).  Document the restriction, and use the
> flexibility to simplify the paths to be lockless.
> 
> Signed-off-by: Andrew Cooper 
> ---
> CC: George Dunlap 
> CC: Ian Jackson 
> CC: Jan Beulich 
> CC: Stefano Stabellini 
> CC: Wei Liu 
> CC: Julien Grall 
> CC: Paul Durrant 
> CC: Michał Leszczyński 
> CC: Hubert Jasudowicz 
> CC: Tamas K Lengyel 
> 
> v2:
>  * Spelling fixes
>  * Add more local variables.
>  * Don't return any status frames on ARM where v2 support is compiled out.
> ---
>  xen/arch/x86/mm.c | 20 
>  xen/common/grant_table.c  | 23 ++
>  xen/common/memory.c   | 55 
> +--
>  xen/include/asm-x86/mm.h  |  3 +++
>  xen/include/public/memory.h   | 16 +
>  xen/include/xen/grant_table.h |  8 +++
>  xen/include/xen/mm.h  |  6 +
>  7 files changed, 114 insertions(+), 17 deletions(-)
> 
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index d1cfc8fb4a..e82307bdae 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -4591,6 +4591,26 @@ int xenmem_add_to_physmap_one(
>  return rc;
>  }
> 
> +unsigned int arch_resource_max_frames(
> +struct domain *d, unsigned int type, unsigned int id)
> +{
> +unsigned int nr = 0;
> +
> +switch ( type )
> +{
> +#ifdef CONFIG_HVM
> +case XENMEM_resource_ioreq_server:
> +if ( !is_hvm_domain(d) )
> +break;
> +/* One frame for the buf-ioreq ring, and one frame per 128 vcpus. */
> +nr = 1 + DIV_ROUND_UP(d->max_vcpus * sizeof(struct ioreq), 
> PAGE_SIZE);

The buf-ioreq ring is optional so a caller using this value may still get a 
resource acquisition failure unless the id is used to actually look up and 
check the ioreq server in question for the actual maximum. So this needs to 
call into a new function in ioreq.c.

  Paul

> +break;
> +#endif
> +}
> +
> +return nr;
> +}
> +
>  int arch_acquire_resource(struct domain *d, unsigned int type,
>unsigned int id, unsigned long frame,
>unsigned int nr_frames, xen_pfn_t mfn_list[])
> diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
> index 912f07be47..8c401a5540 100644
> --- a/xen/common/grant_table.c
> +++ b/xen/common/grant_table.c
> @@ -4013,6 +4013,29 @@ static int gnttab_get_shared_frame_mfn(struct domain 
> *d,
>  return 0;
>  }
> 
> +unsigned int gnttab_resource_max_frames(struct domain *d, unsigned int id)
> +{
> +const struct grant_table *gt = d->grant_table;
> +unsigned int nr = 0;
> +
> +/* Don't need the grant lock.  This limit is fixed at domain create 
> time. */
> +switch ( id )
> +{
> +case XENMEM_resource_grant_table_id_shared:
> +nr = gt->max_grant_frames;
> +break;
> +
> +case XENMEM_resource_grant_table_id_status:
> +if ( GNTTAB_MAX_VERSION < 2 )
> +break;
> +
> +nr = grant_to_status_frames(gt->max_grant_frames);
> +break;
> +}
> +
> +return nr;
> +}
> +
>  int gnttab_acquire_resource(
>  struct domain *d, unsigned int id, unsigned long frame,
>  unsigned int nr_frames, xen_pfn_t mfn_list[])
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index 177fc378d9..c559935732 100644
> --- a/xen/common/memory.c
> +++ b/xen

[PATCH v2 04/11] xen/memory: Fix acquire_resource size semantics

2020-09-22 Thread Andrew Cooper
Calling XENMEM_acquire_resource with a NULL frame_list is a request for the
size of the resource, but the returned 32 is bogus.

If someone tries to follow it for XENMEM_resource_ioreq_server, the acquire
call will fail as IOREQ servers currently top out at 2 frames, and it is only
half the size of the default grant table limit for guests.

Also, no users actually request a resource size, because it was never wired up
in the sole implementaion of resource acquisition in Linux.

Introduce a new resource_max_frames() to calculate the size of a resource, and
implement it the IOREQ and grant subsystems.

It is impossible to guarantee that a mapping call following a successful size
call will succeed (e.g. The target IOREQ server gets destroyed, or the domain
switches from grant v2 to v1).  Document the restriction, and use the
flexibility to simplify the paths to be lockless.

Signed-off-by: Andrew Cooper 
---
CC: George Dunlap 
CC: Ian Jackson 
CC: Jan Beulich 
CC: Stefano Stabellini 
CC: Wei Liu 
CC: Julien Grall 
CC: Paul Durrant 
CC: Michał Leszczyński 
CC: Hubert Jasudowicz 
CC: Tamas K Lengyel 

v2:
 * Spelling fixes
 * Add more local variables.
 * Don't return any status frames on ARM where v2 support is compiled out.
---
 xen/arch/x86/mm.c | 20 
 xen/common/grant_table.c  | 23 ++
 xen/common/memory.c   | 55 +--
 xen/include/asm-x86/mm.h  |  3 +++
 xen/include/public/memory.h   | 16 +
 xen/include/xen/grant_table.h |  8 +++
 xen/include/xen/mm.h  |  6 +
 7 files changed, 114 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index d1cfc8fb4a..e82307bdae 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4591,6 +4591,26 @@ int xenmem_add_to_physmap_one(
 return rc;
 }
 
+unsigned int arch_resource_max_frames(
+struct domain *d, unsigned int type, unsigned int id)
+{
+unsigned int nr = 0;
+
+switch ( type )
+{
+#ifdef CONFIG_HVM
+case XENMEM_resource_ioreq_server:
+if ( !is_hvm_domain(d) )
+break;
+/* One frame for the buf-ioreq ring, and one frame per 128 vcpus. */
+nr = 1 + DIV_ROUND_UP(d->max_vcpus * sizeof(struct ioreq), PAGE_SIZE);
+break;
+#endif
+}
+
+return nr;
+}
+
 int arch_acquire_resource(struct domain *d, unsigned int type,
   unsigned int id, unsigned long frame,
   unsigned int nr_frames, xen_pfn_t mfn_list[])
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 912f07be47..8c401a5540 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -4013,6 +4013,29 @@ static int gnttab_get_shared_frame_mfn(struct domain *d,
 return 0;
 }
 
+unsigned int gnttab_resource_max_frames(struct domain *d, unsigned int id)
+{
+const struct grant_table *gt = d->grant_table;
+unsigned int nr = 0;
+
+/* Don't need the grant lock.  This limit is fixed at domain create time. 
*/
+switch ( id )
+{
+case XENMEM_resource_grant_table_id_shared:
+nr = gt->max_grant_frames;
+break;
+
+case XENMEM_resource_grant_table_id_status:
+if ( GNTTAB_MAX_VERSION < 2 )
+break;
+
+nr = grant_to_status_frames(gt->max_grant_frames);
+break;
+}
+
+return nr;
+}
+
 int gnttab_acquire_resource(
 struct domain *d, unsigned int id, unsigned long frame,
 unsigned int nr_frames, xen_pfn_t mfn_list[])
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 177fc378d9..c559935732 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -1007,6 +1007,26 @@ static long xatp_permission_check(struct domain *d, 
unsigned int space)
 return xsm_add_to_physmap(XSM_TARGET, current->domain, d);
 }
 
+/*
+ * Return 0 on any kind of error.  Caller converts to -EINVAL.
+ *
+ * All nonzero values should be repeatable (i.e. derived from some fixed
+ * property of the domain), and describe the full resource (i.e. mapping the
+ * result of this call will be the entire resource).
+ */
+static unsigned int resource_max_frames(struct domain *d,
+unsigned int type, unsigned int id)
+{
+switch ( type )
+{
+case XENMEM_resource_grant_table:
+return gnttab_resource_max_frames(d, id);
+
+default:
+return arch_resource_max_frames(d, type, id);
+}
+}
+
 static int acquire_resource(
 XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg)
 {
@@ -1018,6 +1038,7 @@ static int acquire_resource(
  * use-cases then per-CPU arrays or heap allocations may be required.
  */
 xen_pfn_t mfn_list[32];
+unsigned int max_frames;
 int rc;
 
 /*
@@ -1034,19 +1055,6 @@ static int acquire_resource(
 if ( xmar.pad != 0 )
 return -EINVAL;
 
-if ( guest_handle_is_null(xmar.frame_list) )
-{
-if ( xmar.nr_frames )
-re