Re: [RFC PATCH 3/7] x86: Grant AMX permission for guest

2022-01-21 Thread Yang Zhong
On Tue, Jan 18, 2022 at 02:06:55PM +0100, Paolo Bonzini wrote:
> Sorry, hit send on the wrong window.  This is the only patch that
> will require a bit more work.
> 
> On 1/18/22 13:52, Paolo Bonzini wrote:
> >>@@ -124,6 +150,8 @@ void x86_cpus_init(X86MachineState *x86ms,
> >>int default_cpu_version)
> >>  MachineState *ms = MACHINE(x86ms);
> >>  MachineClass *mc = MACHINE_GET_CLASS(x86ms);
> >>+    /* Request AMX pemission for guest */
> >>+    x86_xsave_req_perm();
> >>  x86_cpu_set_default_version(default_cpu_version);
> >
> >This should be done before creating a CPU with support for state
> >component 18.  It happens in kvm_init_vcpu, with the following
> >call stack:
> >
> > kvm_init_vcpu
> > kvm_vcpu_thread_fn
> > kvm_start_vcpu_thread
> > qemu_init_vcpu
> > x86_cpu_realizefn
> >
> >The issue however is that this has to be done before
> >KVM_GET_SUPPORTED_CPUID and KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2).
> >
> >For the former, you can assume that anything returned by
> >ARCH_GET_XCOMP_GUEST_PERM will be returned by
> >KVM_GET_SUPPORTED_CPUID in CPUID[0xD].EDX:EAX, so you can:
> >
> >- add it to kvm_arch_get_supported_cpuid
> 
> ... together with the other special cases (otherwise
> x86_cpu_get_supported_feature_word complains that XTILEDATA is not
> available)
> 
> - change kvm_cpu_xsave_init to use host_cpuid instead of
> kvm_arch_get_supported_cpuid.
> 
> - call ARCH_REQ_XCOMP_GUEST_PERM from
> x86_cpu_enable_xsave_components, with a conditional like
> 
> if (kvm_enabled()) {
> kvm_request_xsave_components(cpu, mask);
> }
> 
> KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) is actually not a problem; the
> ioctl is only called from kvm_arch_init_vcpu and therefore after
> x86_cpu_enable_xsave_components.
>
  
  Paolo, thanks too much for those detailed steps!
  I have completed the new patch according to those steps, and work well.

  Since this is only big change patch, the next version will be removed RFC.

  Thanks!
  Yang  
  
 
> Thanks,
> 
> Paolo



Re: [RFC PATCH 3/7] x86: Grant AMX permission for guest

2022-01-18 Thread Paolo Bonzini
Sorry, hit send on the wrong window.  This is the only patch that will 
require a bit more work.


On 1/18/22 13:52, Paolo Bonzini wrote:
@@ -124,6 +150,8 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)

  MachineState *ms = MACHINE(x86ms);
  MachineClass *mc = MACHINE_GET_CLASS(x86ms);
+    /* Request AMX pemission for guest */
+    x86_xsave_req_perm();
  x86_cpu_set_default_version(default_cpu_version);


This should be done before creating a CPU with support for state 
component 18.  It happens in kvm_init_vcpu, with the following call stack:


 kvm_init_vcpu
 kvm_vcpu_thread_fn
 kvm_start_vcpu_thread
 qemu_init_vcpu
 x86_cpu_realizefn

The issue however is that this has to be done before 
KVM_GET_SUPPORTED_CPUID and KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2).


For the former, you can assume that anything returned by 
ARCH_GET_XCOMP_GUEST_PERM will be returned by KVM_GET_SUPPORTED_CPUID in 
CPUID[0xD].EDX:EAX, so you can:


- add it to kvm_arch_get_supported_cpuid


... together with the other special cases (otherwise 
x86_cpu_get_supported_feature_word complains that XTILEDATA is not 
available)


- change kvm_cpu_xsave_init to use host_cpuid instead of 
kvm_arch_get_supported_cpuid.


- call ARCH_REQ_XCOMP_GUEST_PERM from x86_cpu_enable_xsave_components, 
with a conditional like


if (kvm_enabled()) {
kvm_request_xsave_components(cpu, mask);
}

KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) is actually not a problem; the ioctl 
is only called from kvm_arch_init_vcpu and therefore after 
x86_cpu_enable_xsave_components.


Thanks,

Paolo




Re: [RFC PATCH 3/7] x86: Grant AMX permission for guest

2022-01-18 Thread Paolo Bonzini

On 1/7/22 10:31, Yang Zhong wrote:

+static void x86_xsave_req_perm(void)
+{
+unsigned long bitmask;
+
+long rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
+  XSTATE_XTILE_DATA_BIT);
+if (rc) {
+/*
+ * The older kernel version(<5.15) can't support
+ * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
+ */
+return;
+}
+
+rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+if (rc) {
+error_report("prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+} else if (!(bitmask & XFEATURE_XTILE_MASK)) {
+error_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
+ "and bitmask=0x%lx", bitmask);
+exit(EXIT_FAILURE);
+}
+}
+
  void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
  {
  int i;
@@ -124,6 +150,8 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
  MachineState *ms = MACHINE(x86ms);
  MachineClass *mc = MACHINE_GET_CLASS(x86ms);
  
+/* Request AMX pemission for guest */

+x86_xsave_req_perm();
  x86_cpu_set_default_version(default_cpu_version);
  


This should be done before creating a CPU with support for state 
component 18.  It happens in kvm_init_vcpu, with the following call stack:


kvm_init_vcpu
kvm_vcpu_thread_fn
kvm_start_vcpu_thread
qemu_init_vcpu
x86_cpu_realizefn

The issue however is that this has to be done before 
KVM_GET_SUPPORTED_CPUID and KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2).


For the former, you can assume that anything returned by 
ARCH_GET_XCOMP_GUEST_PERM will be returned by KVM_GET_SUPPORTED_CPUID in 
CPUID[0xD].EDX:EAX, so you can:


- add it to kvm_arch_get_supported_cpuid



Re: [RFC PATCH 3/7] x86: Grant AMX permission for guest

2022-01-10 Thread Yang Zhong
On Mon, Jan 10, 2022 at 04:36:13PM +0800, Tian, Kevin wrote:
> > From: Zhong, Yang 
> > Sent: Friday, January 7, 2022 5:32 PM
> >
> > Kernel mechanism for dynamically enabled XSAVE features
> 
> there is no definition of "dynamically-enabled XSAVE features).
> 

  Thanks!


> > asks userspace VMM requesting guest permission if it wants
> > to expose the features. Only with the permission, kernel
> > can try to enable the features when detecting the intention
> > from guest in runtime.
> >
> > Qemu should request the permission for guest only once
> > before the first vCPU is created. KVM checks the guest
> > permission when Qemu advertises the features, and the
> > advertising operation fails w/o permission.
> 
> what about below?
> 
> "Kernel allocates 4K xstate buffer by default. For XSAVE features
> which require large state component (e.g. AMX), Linux kernel
> dynamically expands the xstate buffer only after the process has
> acquired the necessary permissions. Those are called dynamically-
> enabled XSAVE features (or dynamic xfeatures).
> 
> There are separate permissions for native tasks and guests.
> 
> Qemu should request the guest permissions for dynamic xfeatures
> which will be exposed to the guest. This only needs to be done
> once before the first vcpu is created."


  This is clearer. Will update this in new version, thanks!


> 
> >
> > Signed-off-by: Yang Zhong 
> > Signed-off-by: Jing Liu 
> > ---
> >  target/i386/cpu.h |  7 +++
> >  hw/i386/x86.c | 28 
> >  2 files changed, 35 insertions(+)
> >
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index 768a8218be..79023fe723 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -549,6 +549,13 @@ typedef enum X86Seg {
> >  #define XSTATE_ZMM_Hi256_MASK   (1ULL << XSTATE_ZMM_Hi256_BIT)
> >  #define XSTATE_Hi16_ZMM_MASK(1ULL << XSTATE_Hi16_ZMM_BIT)
> >  #define XSTATE_PKRU_MASK(1ULL << XSTATE_PKRU_BIT)
> > +#define XSTATE_XTILE_CFG_MASK   (1ULL << XSTATE_XTILE_CFG_BIT)
> > +#define XSTATE_XTILE_DATA_MASK  (1ULL << XSTATE_XTILE_DATA_BIT)
> > +#define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK \
> > + | XSTATE_XTILE_DATA_MASK)
> > +
> > +#define ARCH_GET_XCOMP_GUEST_PERM   0x1024
> > +#define ARCH_REQ_XCOMP_GUEST_PERM   0x1025
> >
> >  /* CPUID feature words */
> >  typedef enum FeatureWord {
> > diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> > index b84840a1bb..0a204c375e 100644
> > --- a/hw/i386/x86.c
> > +++ b/hw/i386/x86.c
> > @@ -41,6 +41,8 @@
> >  #include "sysemu/cpu-timers.h"
> >  #include "trace.h"
> >
> > +#include 
> > +
> >  #include "hw/i386/x86.h"
> >  #include "target/i386/cpu.h"
> >  #include "hw/i386/topology.h"
> > @@ -117,6 +119,30 @@ out:
> >  object_unref(cpu);
> >  }
> >
> > +static void x86_xsave_req_perm(void)
> > +{
> > +unsigned long bitmask;
> > +
> > +long rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
> > +  XSTATE_XTILE_DATA_BIT);
> 
> Should we do it based on the cpuid for the first vcpu?


  This permission is requested before vcpu init, so put it in
  x86_cpus_init(). If the host kernel does not include AMX changes, or
  the latest kernel(include AMX) install on previous generation x86
  platform, this syscall() will directly return. I ever put this
  permission request in the vcpu create function, but it's hard
  to find a good location to handle this. As for cpuid, you mean
  I need check host cpuid info? to check if this host cpu can support
  AMX? thanks!

  Yang   
   
> 
> > +if (rc) {
> > +/*
> > + * The older kernel version(<5.15) can't support
> > + * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
> > + */
> > +return;
> > +}
> > +
> > +rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
> > +if (rc) {
> > +error_report("prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
> > +} else if (!(bitmask & XFEATURE_XTILE_MASK)) {
> > +error_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
> > + "and bitmask=0x%lx", bitmask);
> > +exit(EXIT_FAILURE);
> > +}
> > +}
> > +
> >  void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
> >  {
> >  int i;
> > @@ -124,6 +150,8 @@ void x86_cpus_init(X86MachineState *x86ms, int
> > default_cpu_version)
> >  MachineState *ms = MACHINE(x86ms);
> >  MachineClass *mc = MACHINE_GET_CLASS(x86ms);
> >
> > +/* Request AMX pemission for guest */
> > +x86_xsave_req_perm();
> >  x86_cpu_set_default_version(default_cpu_version);
> >
> >  /*



RE: [RFC PATCH 3/7] x86: Grant AMX permission for guest

2022-01-10 Thread Tian, Kevin
> From: Zhong, Yang 
> Sent: Friday, January 7, 2022 5:32 PM
> 
> Kernel mechanism for dynamically enabled XSAVE features

there is no definition of "dynamically-enabled XSAVE features).

> asks userspace VMM requesting guest permission if it wants
> to expose the features. Only with the permission, kernel
> can try to enable the features when detecting the intention
> from guest in runtime.
> 
> Qemu should request the permission for guest only once
> before the first vCPU is created. KVM checks the guest
> permission when Qemu advertises the features, and the
> advertising operation fails w/o permission.

what about below?

"Kernel allocates 4K xstate buffer by default. For XSAVE features
which require large state component (e.g. AMX), Linux kernel 
dynamically expands the xstate buffer only after the process has
acquired the necessary permissions. Those are called dynamically-
enabled XSAVE features (or dynamic xfeatures).

There are separate permissions for native tasks and guests.

Qemu should request the guest permissions for dynamic xfeatures 
which will be exposed to the guest. This only needs to be done
once before the first vcpu is created."

> 
> Signed-off-by: Yang Zhong 
> Signed-off-by: Jing Liu 
> ---
>  target/i386/cpu.h |  7 +++
>  hw/i386/x86.c | 28 
>  2 files changed, 35 insertions(+)
> 
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 768a8218be..79023fe723 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -549,6 +549,13 @@ typedef enum X86Seg {
>  #define XSTATE_ZMM_Hi256_MASK   (1ULL << XSTATE_ZMM_Hi256_BIT)
>  #define XSTATE_Hi16_ZMM_MASK(1ULL << XSTATE_Hi16_ZMM_BIT)
>  #define XSTATE_PKRU_MASK(1ULL << XSTATE_PKRU_BIT)
> +#define XSTATE_XTILE_CFG_MASK   (1ULL << XSTATE_XTILE_CFG_BIT)
> +#define XSTATE_XTILE_DATA_MASK  (1ULL << XSTATE_XTILE_DATA_BIT)
> +#define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK \
> + | XSTATE_XTILE_DATA_MASK)
> +
> +#define ARCH_GET_XCOMP_GUEST_PERM   0x1024
> +#define ARCH_REQ_XCOMP_GUEST_PERM   0x1025
> 
>  /* CPUID feature words */
>  typedef enum FeatureWord {
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index b84840a1bb..0a204c375e 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -41,6 +41,8 @@
>  #include "sysemu/cpu-timers.h"
>  #include "trace.h"
> 
> +#include 
> +
>  #include "hw/i386/x86.h"
>  #include "target/i386/cpu.h"
>  #include "hw/i386/topology.h"
> @@ -117,6 +119,30 @@ out:
>  object_unref(cpu);
>  }
> 
> +static void x86_xsave_req_perm(void)
> +{
> +unsigned long bitmask;
> +
> +long rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
> +  XSTATE_XTILE_DATA_BIT);

Should we do it based on the cpuid for the first vcpu?

> +if (rc) {
> +/*
> + * The older kernel version(<5.15) can't support
> + * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
> + */
> +return;
> +}
> +
> +rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
> +if (rc) {
> +error_report("prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
> +} else if (!(bitmask & XFEATURE_XTILE_MASK)) {
> +error_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
> + "and bitmask=0x%lx", bitmask);
> +exit(EXIT_FAILURE);
> +}
> +}
> +
>  void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
>  {
>  int i;
> @@ -124,6 +150,8 @@ void x86_cpus_init(X86MachineState *x86ms, int
> default_cpu_version)
>  MachineState *ms = MACHINE(x86ms);
>  MachineClass *mc = MACHINE_GET_CLASS(x86ms);
> 
> +/* Request AMX pemission for guest */
> +x86_xsave_req_perm();
>  x86_cpu_set_default_version(default_cpu_version);
> 
>  /*



[RFC PATCH 3/7] x86: Grant AMX permission for guest

2022-01-07 Thread Yang Zhong
Kernel mechanism for dynamically enabled XSAVE features
asks userspace VMM requesting guest permission if it wants
to expose the features. Only with the permission, kernel
can try to enable the features when detecting the intention
from guest in runtime.

Qemu should request the permission for guest only once
before the first vCPU is created. KVM checks the guest
permission when Qemu advertises the features, and the
advertising operation fails w/o permission.

Signed-off-by: Yang Zhong 
Signed-off-by: Jing Liu 
---
 target/i386/cpu.h |  7 +++
 hw/i386/x86.c | 28 
 2 files changed, 35 insertions(+)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 768a8218be..79023fe723 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -549,6 +549,13 @@ typedef enum X86Seg {
 #define XSTATE_ZMM_Hi256_MASK   (1ULL << XSTATE_ZMM_Hi256_BIT)
 #define XSTATE_Hi16_ZMM_MASK(1ULL << XSTATE_Hi16_ZMM_BIT)
 #define XSTATE_PKRU_MASK(1ULL << XSTATE_PKRU_BIT)
+#define XSTATE_XTILE_CFG_MASK   (1ULL << XSTATE_XTILE_CFG_BIT)
+#define XSTATE_XTILE_DATA_MASK  (1ULL << XSTATE_XTILE_DATA_BIT)
+#define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK \
+ | XSTATE_XTILE_DATA_MASK)
+
+#define ARCH_GET_XCOMP_GUEST_PERM   0x1024
+#define ARCH_REQ_XCOMP_GUEST_PERM   0x1025
 
 /* CPUID feature words */
 typedef enum FeatureWord {
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index b84840a1bb..0a204c375e 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -41,6 +41,8 @@
 #include "sysemu/cpu-timers.h"
 #include "trace.h"
 
+#include 
+
 #include "hw/i386/x86.h"
 #include "target/i386/cpu.h"
 #include "hw/i386/topology.h"
@@ -117,6 +119,30 @@ out:
 object_unref(cpu);
 }
 
+static void x86_xsave_req_perm(void)
+{
+unsigned long bitmask;
+
+long rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
+  XSTATE_XTILE_DATA_BIT);
+if (rc) {
+/*
+ * The older kernel version(<5.15) can't support
+ * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
+ */
+return;
+}
+
+rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+if (rc) {
+error_report("prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+} else if (!(bitmask & XFEATURE_XTILE_MASK)) {
+error_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
+ "and bitmask=0x%lx", bitmask);
+exit(EXIT_FAILURE);
+}
+}
+
 void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
 {
 int i;
@@ -124,6 +150,8 @@ void x86_cpus_init(X86MachineState *x86ms, int 
default_cpu_version)
 MachineState *ms = MACHINE(x86ms);
 MachineClass *mc = MACHINE_GET_CLASS(x86ms);
 
+/* Request AMX pemission for guest */
+x86_xsave_req_perm();
 x86_cpu_set_default_version(default_cpu_version);
 
 /*