Re: [RFC for-4.20 v1 1/1] x86/hvm: Introduce Xen-wide ASID Allocator

2024-06-27 Thread Jan Beulich
On 27.06.2024 15:41, Vaishali Thakkar wrote:
> On 6/13/24 1:04 PM, Jan Beulich wrote:
>> On 24.05.2024 14:31, Vaishali Thakkar wrote:
>>> -void hvm_asid_flush_core(void)
>>> +void hvm_asid_flush_all(void)
>>>   {
>>> -struct hvm_asid_data *data = &this_cpu(hvm_asid_data);
>>> +struct hvm_asid_data *data = &asid_data;
>>>   
>>> -if ( data->disabled )
>>> +if ( data->disabled)
>>>   return;
>>>   
>>> -if ( likely(++data->core_asid_generation != 0) )
>>> +if ( likely(++data->asid_generation != 0) )
>>>   return;
>>>   
>>>   /*
>>> - * ASID generations are 64 bit.  Overflow of generations never happens.
>>> - * For safety, we simply disable ASIDs, so correctness is established; 
>>> it
>>> - * only runs a bit slower.
>>> - */
>>> +* ASID generations are 64 bit.  Overflow of generations never happens.
>>> +* For safety, we simply disable ASIDs, so correctness is established; 
>>> it
>>> +* only runs a bit slower.
>>> +*/
>>
>> Please don't screw up indentation; this comment was well-formed before. What
>> I question is whether, with the ultimate purpose in mind, the comment 
>> actually
>> will continue to be correct. We can't simply disable ASIDs when we have SEV
>> VMs running, can we?
> 
> You're right about SEV VMs. But wouldn't we still want to have a way to 
> disable ASIDs when there are no SEV VMs are running?

Possibly. Yet that still would render this comment stale in the common case,
as the way it's written it suggests simply disabling ASIDs on the fly is an
okay thing to do.

>>> +c = &cpu_data[cpu];
>>> +/* Check for erratum #170, and leave ASIDs disabled if it's 
>>> present. */
>>> +if ( !cpu_has_amd_erratum(c, AMD_ERRATUM_170) )
>>> +nasids += cpuid_ebx(0x800aU);
>>
>> Why += ? Don't you mean to establish the minimum across all CPUs? Which would
>> be assuming there might be an asymmetry, which generally we assume there
>> isn't.
>> And if you invoke CPUID, you'll need to do so on the very CPU, not many times
>> in a row on the BSP.
> 
> Hmm, I'm not sure if I understand your point completely. Just to clarify, 
> do you mean even if it's assumed that asymmetry is not there, we should 
> find and establish min ASID count across all online CPUs and ensure that 
> CPUID instruction is executed on the respective CPU?

No, I mean that
- if we assume there may be asymmetry, CPUID will need invoking once on
  every CPU (including ones later being hot-onlined),
- if we assume no asymmetry, there's no need for accumulation.

>>> --- a/xen/arch/x86/mm/hap/hap.c
>>> +++ b/xen/arch/x86/mm/hap/hap.c
>>> @@ -739,13 +739,13 @@ static bool cf_check flush_tlb(const unsigned long 
>>> *vcpu_bitmap)
>>>   if ( !flush_vcpu(v, vcpu_bitmap) )
>>>   continue;
>>>   
>>> -hvm_asid_flush_vcpu(v);
>>> -
>>>   cpu = read_atomic(&v->dirty_cpu);
>>>   if ( cpu != this_cpu && is_vcpu_dirty_cpu(cpu) && v->is_running )
>>>   __cpumask_set_cpu(cpu, mask);
>>>   }
>>>   
>>> +hvm_asid_flush_domain(d);
>>
>> Hmm, that's potentially much more flushing than is needed here. There
>> surely wants to be a wait to flush at a granularity smaller than the
>> entire domain. (Likely applies elsewhere as well.)
> 
> I see, does it mean we still need a way to flush at the vcpu level in the
> case of HAP?

For correctness it's not really "need", but for performance I'm pretty sure
it's going to be "want".

Jan



Re: [RFC for-4.20 v1 1/1] x86/hvm: Introduce Xen-wide ASID Allocator

2024-06-27 Thread Vaishali Thakkar

On 6/13/24 1:04 PM, Jan Beulich wrote:

On 24.05.2024 14:31, Vaishali Thakkar wrote:

--- a/xen/arch/x86/hvm/asid.c
+++ b/xen/arch/x86/hvm/asid.c
@@ -27,8 +27,8 @@ boolean_param("asid", opt_asid_enabled);
   * the TLB.
   *
   * Sketch of the Implementation:
- *
- * ASIDs are a CPU-local resource.  As preemption of ASIDs is not possible,
+ * TODO(vaishali): Update this comment
+ * ASIDs are Xen-wide resource.  As preemption of ASIDs is not possible,
   * ASIDs are assigned in a round-robin scheme.  To minimize the overhead of
   * ASID invalidation, at the time of a TLB flush,  ASIDs are tagged with a
   * 64-bit generation.  Only on a generation overflow the code needs to
@@ -38,20 +38,21 @@ boolean_param("asid", opt_asid_enabled);
   * ASID useage to retain correctness.
   */
  
-/* Per-CPU ASID management. */

+/* Xen-wide ASID management */
  struct hvm_asid_data {
-   uint64_t core_asid_generation;
+   uint64_t asid_generation;
 uint32_t next_asid;
 uint32_t max_asid;
+   uint32_t min_asid;
 bool disabled;
  };
  
-static DEFINE_PER_CPU(struct hvm_asid_data, hvm_asid_data);

+static struct hvm_asid_data asid_data;
  
  void hvm_asid_init(int nasids)

  {
  static int8_t g_disabled = -1;
-struct hvm_asid_data *data = &this_cpu(hvm_asid_data);
+struct hvm_asid_data *data = &asid_data;
  
  data->max_asid = nasids - 1;

  data->disabled = !opt_asid_enabled || (nasids <= 1);
@@ -64,67 +65,73 @@ void hvm_asid_init(int nasids)
  }
  
  /* Zero indicates 'invalid generation', so we start the count at one. */

-data->core_asid_generation = 1;
+data->asid_generation = 1;
  
  /* Zero indicates 'ASIDs disabled', so we start the count at one. */

  data->next_asid = 1;


Both of these want to become compile-time initializers now, I think. The
comments would move there, but especially the second one also looks to
need updating.


Ack, will fix this.


  }
  
-void hvm_asid_flush_vcpu_asid(struct hvm_vcpu_asid *asid)

+void hvm_asid_flush_domain_asid(struct hvm_domain_asid *asid)
  {
  write_atomic(&asid->generation, 0);
  }
  
-void hvm_asid_flush_vcpu(struct vcpu *v)

+void hvm_asid_flush_domain(struct domain *d)
  {
-hvm_asid_flush_vcpu_asid(&v->arch.hvm.n1asid);
-hvm_asid_flush_vcpu_asid(&vcpu_nestedhvm(v).nv_n2asid);
+hvm_asid_flush_domain_asid(&d->arch.hvm.n1asid);
+//hvm_asid_flush_domain_asid(&vcpu_nestedhvm(v).nv_n2asid);


While in Lisbon Andrew indicated to not specifically bother about the nested
case, I don't think he meant by this to outright break it. Hence imo
something like this will need taking care of (which it being commented out
may or may not indicate is the plan).



I've already mentioned the reason for commenting out this code for v1 in 
the cover letter. But in case you missed it, TLDR: I wanted to know the 
plans from the nested virtualization project with regards to this before 
doing the changes. And it was commented out just to present the idea and 
for testing. Also, I have put few TODOs in this RFC for v2 which indicates 
the plan to handle the nested virt parts.


Anyway, as discussed at the Xen Summit and indicated by Andrew, in v2 I'll 
make the changes kind of similar to what's happening in the non-nested virt 
code.



  }
  
-void hvm_asid_flush_core(void)

+void hvm_asid_flush_all(void)
  {
-struct hvm_asid_data *data = &this_cpu(hvm_asid_data);
+struct hvm_asid_data *data = &asid_data;
  
-if ( data->disabled )

+if ( data->disabled)
  return;
  
-if ( likely(++data->core_asid_generation != 0) )

+if ( likely(++data->asid_generation != 0) )
  return;
  
  /*

- * ASID generations are 64 bit.  Overflow of generations never happens.
- * For safety, we simply disable ASIDs, so correctness is established; it
- * only runs a bit slower.
- */
+* ASID generations are 64 bit.  Overflow of generations never happens.
+* For safety, we simply disable ASIDs, so correctness is established; it
+* only runs a bit slower.
+*/


Please don't screw up indentation; this comment was well-formed before. What
I question is whether, with the ultimate purpose in mind, the comment actually
will continue to be correct. We can't simply disable ASIDs when we have SEV
VMs running, can we?


You're right about SEV VMs. But wouldn't we still want to have a way to 
disable ASIDs when there are no SEV VMs are running?



  printk("HVM: ASID generation overrun. Disabling ASIDs.\n");
  data->disabled = 1;
  }
  
-bool hvm_asid_handle_vmenter(struct hvm_vcpu_asid *asid)

+/* This function is called only when first vmenter happens after creating a 
new domain */
+bool hvm_asid_handle_vmenter(struct hvm_domain_asid *asid)


With what the comment says, the function likely wants giving a different
name.


Ack.


  {
-struct hvm_asid_data *data = &this_cpu(hvm_asid_data);
+struct hvm_asid_data *data = &asid_data;
  
  /* On erratum #170 systems 

Re: [RFC for-4.20 v1 1/1] x86/hvm: Introduce Xen-wide ASID Allocator

2024-06-13 Thread Jan Beulich
On 24.05.2024 14:31, Vaishali Thakkar wrote:
> --- a/xen/arch/x86/hvm/asid.c
> +++ b/xen/arch/x86/hvm/asid.c
> @@ -27,8 +27,8 @@ boolean_param("asid", opt_asid_enabled);
>   * the TLB.
>   *
>   * Sketch of the Implementation:
> - *
> - * ASIDs are a CPU-local resource.  As preemption of ASIDs is not possible,
> + * TODO(vaishali): Update this comment
> + * ASIDs are Xen-wide resource.  As preemption of ASIDs is not possible,
>   * ASIDs are assigned in a round-robin scheme.  To minimize the overhead of
>   * ASID invalidation, at the time of a TLB flush,  ASIDs are tagged with a
>   * 64-bit generation.  Only on a generation overflow the code needs to
> @@ -38,20 +38,21 @@ boolean_param("asid", opt_asid_enabled);
>   * ASID useage to retain correctness.
>   */
>  
> -/* Per-CPU ASID management. */
> +/* Xen-wide ASID management */
>  struct hvm_asid_data {
> -   uint64_t core_asid_generation;
> +   uint64_t asid_generation;
> uint32_t next_asid;
> uint32_t max_asid;
> +   uint32_t min_asid;
> bool disabled;
>  };
>  
> -static DEFINE_PER_CPU(struct hvm_asid_data, hvm_asid_data);
> +static struct hvm_asid_data asid_data;
>  
>  void hvm_asid_init(int nasids)
>  {
>  static int8_t g_disabled = -1;
> -struct hvm_asid_data *data = &this_cpu(hvm_asid_data);
> +struct hvm_asid_data *data = &asid_data;
>  
>  data->max_asid = nasids - 1;
>  data->disabled = !opt_asid_enabled || (nasids <= 1);
> @@ -64,67 +65,73 @@ void hvm_asid_init(int nasids)
>  }
>  
>  /* Zero indicates 'invalid generation', so we start the count at one. */
> -data->core_asid_generation = 1;
> +data->asid_generation = 1;
>  
>  /* Zero indicates 'ASIDs disabled', so we start the count at one. */
>  data->next_asid = 1;

Both of these want to become compile-time initializers now, I think. The
comments would move there, but especially the second one also looks to
need updating.

>  }
>  
> -void hvm_asid_flush_vcpu_asid(struct hvm_vcpu_asid *asid)
> +void hvm_asid_flush_domain_asid(struct hvm_domain_asid *asid)
>  {
>  write_atomic(&asid->generation, 0);
>  }
>  
> -void hvm_asid_flush_vcpu(struct vcpu *v)
> +void hvm_asid_flush_domain(struct domain *d)
>  {
> -hvm_asid_flush_vcpu_asid(&v->arch.hvm.n1asid);
> -hvm_asid_flush_vcpu_asid(&vcpu_nestedhvm(v).nv_n2asid);
> +hvm_asid_flush_domain_asid(&d->arch.hvm.n1asid);
> +//hvm_asid_flush_domain_asid(&vcpu_nestedhvm(v).nv_n2asid);

While in Lisbon Andrew indicated to not specifically bother about the nested
case, I don't think he meant by this to outright break it. Hence imo
something like this will need taking care of (which it being commented out
may or may not indicate is the plan).

>  }
>  
> -void hvm_asid_flush_core(void)
> +void hvm_asid_flush_all(void)
>  {
> -struct hvm_asid_data *data = &this_cpu(hvm_asid_data);
> +struct hvm_asid_data *data = &asid_data;
>  
> -if ( data->disabled )
> +if ( data->disabled)
>  return;
>  
> -if ( likely(++data->core_asid_generation != 0) )
> +if ( likely(++data->asid_generation != 0) )
>  return;
>  
>  /*
> - * ASID generations are 64 bit.  Overflow of generations never happens.
> - * For safety, we simply disable ASIDs, so correctness is established; it
> - * only runs a bit slower.
> - */
> +* ASID generations are 64 bit.  Overflow of generations never happens.
> +* For safety, we simply disable ASIDs, so correctness is established; it
> +* only runs a bit slower.
> +*/

Please don't screw up indentation; this comment was well-formed before. What
I question is whether, with the ultimate purpose in mind, the comment actually
will continue to be correct. We can't simply disable ASIDs when we have SEV
VMs running, can we?

>  printk("HVM: ASID generation overrun. Disabling ASIDs.\n");
>  data->disabled = 1;
>  }
>  
> -bool hvm_asid_handle_vmenter(struct hvm_vcpu_asid *asid)
> +/* This function is called only when first vmenter happens after creating a 
> new domain */
> +bool hvm_asid_handle_vmenter(struct hvm_domain_asid *asid)

With what the comment says, the function likely wants giving a different
name.

>  {
> -struct hvm_asid_data *data = &this_cpu(hvm_asid_data);
> +struct hvm_asid_data *data = &asid_data;
>  
>  /* On erratum #170 systems we must flush the TLB. 
>   * Generation overruns are taken here, too. */
>  if ( data->disabled )
>  goto disabled;
>  
> -/* Test if VCPU has valid ASID. */
> -if ( read_atomic(&asid->generation) == data->core_asid_generation )
> +/* Test if domain has valid ASID. */
> +if ( read_atomic(&asid->generation) == data->asid_generation )
>  return 0;
>  
>  /* If there are no free ASIDs, need to go to a new generation */
>  if ( unlikely(data->next_asid > data->max_asid) )
>  {
> -hvm_asid_flush_core();
> +// TODO(vaishali): Add a check to pick the asid from the reclaimable 
> 

[RFC for-4.20 v1 1/1] x86/hvm: Introduce Xen-wide ASID Allocator

2024-05-24 Thread Vaishali Thakkar
Currently ASID generation and management is done per-PCPU. This
scheme is incompatible with SEV technologies as SEV VMs need to
have a fixed ASID associated with all vcpus of the VM throughout
it's lifetime.

This commit introduces a Xen-wide allocator which initializes
the asids at the start of xen and allows to have a fixed asids
throughout the lifecycle of all domains. Having a fixed asid
for non-SEV domains also presents us with the opportunity to
further take use of AMD instructions like TLBSYNC and INVLPGB
for broadcasting the TLB invalidations.

Signed-off-by: Vaishali Thakkar 
---
 xen/arch/x86/flushtlb.c|  1 -
 xen/arch/x86/hvm/asid.c| 61 ++
 xen/arch/x86/hvm/emulate.c |  3 --
 xen/arch/x86/hvm/hvm.c |  2 +-
 xen/arch/x86/hvm/nestedhvm.c   |  4 +-
 xen/arch/x86/hvm/svm/asid.c| 28 +++-
 xen/arch/x86/hvm/svm/nestedsvm.c   |  2 +-
 xen/arch/x86/hvm/svm/svm.c | 33 ++
 xen/arch/x86/hvm/svm/svm.h |  1 -
 xen/arch/x86/hvm/vmx/vmcs.c|  2 +-
 xen/arch/x86/hvm/vmx/vmx.c | 15 +++
 xen/arch/x86/hvm/vmx/vvmx.c|  6 +--
 xen/arch/x86/include/asm/hvm/asid.h| 19 
 xen/arch/x86/include/asm/hvm/domain.h  |  1 +
 xen/arch/x86/include/asm/hvm/hvm.h |  2 +-
 xen/arch/x86/include/asm/hvm/svm/svm.h |  1 +
 xen/arch/x86/include/asm/hvm/vcpu.h|  6 +--
 xen/arch/x86/include/asm/hvm/vmx/vmx.h |  3 +-
 xen/arch/x86/mm/hap/hap.c  |  4 +-
 xen/arch/x86/mm/p2m.c  |  6 +--
 xen/arch/x86/mm/paging.c   |  2 +-
 xen/arch/x86/setup.c   |  7 +++
 22 files changed, 108 insertions(+), 101 deletions(-)

diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
index 18748b2bc8..69d72944d7 100644
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -124,7 +124,6 @@ void switch_cr3_cr4(unsigned long cr3, unsigned long cr4)
 
 if ( tlb_clk_enabled )
 t = pre_flush();
-hvm_flush_guest_tlbs();
 
 old_cr4 = read_cr4();
 ASSERT(!(old_cr4 & X86_CR4_PCIDE) || !(old_cr4 & X86_CR4_PGE));
diff --git a/xen/arch/x86/hvm/asid.c b/xen/arch/x86/hvm/asid.c
index 8d27b7dba1..f343bfcdb9 100644
--- a/xen/arch/x86/hvm/asid.c
+++ b/xen/arch/x86/hvm/asid.c
@@ -27,8 +27,8 @@ boolean_param("asid", opt_asid_enabled);
  * the TLB.
  *
  * Sketch of the Implementation:
- *
- * ASIDs are a CPU-local resource.  As preemption of ASIDs is not possible,
+ * TODO(vaishali): Update this comment
+ * ASIDs are Xen-wide resource.  As preemption of ASIDs is not possible,
  * ASIDs are assigned in a round-robin scheme.  To minimize the overhead of
  * ASID invalidation, at the time of a TLB flush,  ASIDs are tagged with a
  * 64-bit generation.  Only on a generation overflow the code needs to
@@ -38,20 +38,21 @@ boolean_param("asid", opt_asid_enabled);
  * ASID useage to retain correctness.
  */
 
-/* Per-CPU ASID management. */
+/* Xen-wide ASID management */
 struct hvm_asid_data {
-   uint64_t core_asid_generation;
+   uint64_t asid_generation;
uint32_t next_asid;
uint32_t max_asid;
+   uint32_t min_asid;
bool disabled;
 };
 
-static DEFINE_PER_CPU(struct hvm_asid_data, hvm_asid_data);
+static struct hvm_asid_data asid_data;
 
 void hvm_asid_init(int nasids)
 {
 static int8_t g_disabled = -1;
-struct hvm_asid_data *data = &this_cpu(hvm_asid_data);
+struct hvm_asid_data *data = &asid_data;
 
 data->max_asid = nasids - 1;
 data->disabled = !opt_asid_enabled || (nasids <= 1);
@@ -64,67 +65,73 @@ void hvm_asid_init(int nasids)
 }
 
 /* Zero indicates 'invalid generation', so we start the count at one. */
-data->core_asid_generation = 1;
+data->asid_generation = 1;
 
 /* Zero indicates 'ASIDs disabled', so we start the count at one. */
 data->next_asid = 1;
 }
 
-void hvm_asid_flush_vcpu_asid(struct hvm_vcpu_asid *asid)
+void hvm_asid_flush_domain_asid(struct hvm_domain_asid *asid)
 {
 write_atomic(&asid->generation, 0);
 }
 
-void hvm_asid_flush_vcpu(struct vcpu *v)
+void hvm_asid_flush_domain(struct domain *d)
 {
-hvm_asid_flush_vcpu_asid(&v->arch.hvm.n1asid);
-hvm_asid_flush_vcpu_asid(&vcpu_nestedhvm(v).nv_n2asid);
+hvm_asid_flush_domain_asid(&d->arch.hvm.n1asid);
+//hvm_asid_flush_domain_asid(&vcpu_nestedhvm(v).nv_n2asid);
 }
 
-void hvm_asid_flush_core(void)
+void hvm_asid_flush_all(void)
 {
-struct hvm_asid_data *data = &this_cpu(hvm_asid_data);
+struct hvm_asid_data *data = &asid_data;
 
-if ( data->disabled )
+if ( data->disabled)
 return;
 
-if ( likely(++data->core_asid_generation != 0) )
+if ( likely(++data->asid_generation != 0) )
 return;
 
 /*
- * ASID generations are 64 bit.  Overflow of generations never happens.
- * For safety, we simply disable ASIDs, so correctness is established; it
- * only runs a bit slower.
- */
+* ASI