Dave Martin <dave.mar...@arm.com> writes:

> This patch adds the necessary API extensions to allow userspace to
> detect SVE support for guests and enable it.
>
> A new capability KVM_CAP_ARM_SVE is defined to allow userspace to
> detect the availability of the KVM SVE API extensions in the usual
> way.
>
> Userspace needs to enable SVE explicitly per vcpu and configure the
> set of SVE vector lengths available to the guest before the vcpu is
> allowed to run.  For these purposes, a new arm64-specific vcpu
> ioctl KVM_ARM_SVE_CONFIG is added, with the following subcommands
> (in rough order of expected use):
>
> KVM_ARM_SVE_CONFIG_QUERY: report the set of vector lengths
>     supported by this host.
>
>     The resulting set can be supplied directly to
>     KVM_ARM_SVE_CONFIG_SET in order to obtain the maximal possible
>     set, or used to inform userspace's decision on the appropriate
>     set of vector lengths (possibly taking into account the
>     configuration of other nodes in the cluster so that the VM can
>     migrate freely).
>
> KVM_ARM_SVE_CONFIG_SET: enable SVE for this vcpu and configure the
>     set of vector lengths it offers to the guest.
>
>     This can only be done once, before the vcpu is run.
>
> KVM_ARM_SVE_CONFIG_GET: report the set of vector lengths available
>     to the guest on this vcpu (for use when snapshotting or
>     migrating a VM).
>
> Signed-off-by: Dave Martin <dave.mar...@arm.com>
> ---
>
> Changes since RFCv1:
>
>  * The new feature bit for PREFERRED_TARGET / VCPU_INIT is gone in
>    favour of a capability and a new ioctl to enable/configure SVE.
>
>    Perhaps the SVE configuration could be done via device attributes,
>    but it still has to be done early, so crowbarring support for this
>    behind a generic API may cause more trouble than it solves.
>
>    This is still up for discussion if anybody feels strongly about it.
>
>  * An ioctl KVM_ARM_SVE_CONFIG has been added to report the set of
>    vector lengths available and configure SVE for a vcpu.
>
>    To reduce ioctl namespace pollution the new operations are grouped
>    as subcommands under a single ioctl, since they use the same
>    argument format anyway.
> ---
>  arch/arm64/include/asm/kvm_host.h |   8 +-
>  arch/arm64/include/uapi/asm/kvm.h |  14 ++++
>  arch/arm64/kvm/guest.c            | 164 
> +++++++++++++++++++++++++++++++++++++-
>  arch/arm64/kvm/reset.c            |  50 ++++++++++++
>  include/uapi/linux/kvm.h          |   4 +
>  5 files changed, 238 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index bbde597..5225485 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -52,6 +52,12 @@
>
>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>
> +#ifdef CONFIG_ARM64_SVE
> +bool kvm_sve_supported(void);
> +#else
> +static inline bool kvm_sve_supported(void) { return false; }
> +#endif
> +
>  int __attribute_const__ kvm_target_cpu(void);
>  int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
>  int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
> @@ -441,7 +447,7 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) 
> {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>
> -static inline void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
> +void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
>
>  void kvm_arm_init_debug(void);
>  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
> diff --git a/arch/arm64/include/uapi/asm/kvm.h 
> b/arch/arm64/include/uapi/asm/kvm.h
> index 1ff68fa..94f6932 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -32,6 +32,7 @@
>  #define KVM_NR_SPSR  5
>
>  #ifndef __ASSEMBLY__
> +#include <linux/kernel.h>
>  #include <linux/psci.h>
>  #include <linux/types.h>
>  #include <asm/ptrace.h>
> @@ -108,6 +109,19 @@ struct kvm_vcpu_init {
>       __u32 features[7];
>  };
>
> +/* Vector length set for KVM_ARM_SVE_CONFIG */
> +struct kvm_sve_vls {
> +     __u16 cmd;
> +     __u16 max_vq;
> +     __u16 _reserved[2];
> +     __u64 required_vqs[__KERNEL_DIV_ROUND_UP(SVE_VQ_MAX - SVE_VQ_MIN + 1, 
> 64)];
> +};
> +
> +/* values for cmd: */
> +#define KVM_ARM_SVE_CONFIG_QUERY     0 /* query what the host can support */
> +#define KVM_ARM_SVE_CONFIG_SET               1 /* enable SVE for vcpu and 
> set VLs */
> +#define KVM_ARM_SVE_CONFIG_GET               2 /* read the set of VLs for a 
> vcpu */
> +
>  struct kvm_sregs {
>  };
>
> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> index 331b85e..d96145a 100644
> --- a/arch/arm64/kvm/guest.c
> +++ b/arch/arm64/kvm/guest.c
> @@ -26,6 +26,9 @@
>  #include <linux/module.h>
>  #include <linux/vmalloc.h>
>  #include <linux/fs.h>
> +#include <linux/slab.h>
> +#include <linux/string.h>
> +#include <linux/types.h>
>  #include <kvm/arm_psci.h>
>  #include <asm/cputype.h>
>  #include <linux/uaccess.h>
> @@ -56,6 +59,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>       return 0;
>  }
>
> +void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
> +{
> +     kfree(vcpu->arch.sve_state);
> +}
> +
>  static u64 core_reg_offset_from_id(u64 id)
>  {
>       return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
> @@ -546,10 +554,164 @@ int kvm_vcpu_preferred_target(struct kvm_vcpu_init 
> *init)
>       return 0;
>  }
>
> +#define VQS_PER_U64 64
> +#define vq_word(vqs, vq) (&(vqs)[((vq) - SVE_VQ_MIN) / VQS_PER_U64])
> +#define vq_mask(vq) ((u64)1 << (((vq) - SVE_VQ_MIN) % VQS_PER_U64))
> +
> +static void set_vq(u64 *vqs, unsigned int vq)
> +{
> +     *vq_word(vqs, vq) |= vq_mask(vq);
> +}
> +
> +static bool vq_set(const u64 *vqs, unsigned int vq)
> +{
> +     return *vq_word(vqs, vq) & vq_mask(vq);
> +}
> +
> +static int kvm_vcpu_set_sve_vls(struct kvm_vcpu *vcpu, struct kvm_sve_vls 
> *vls,
> +             struct kvm_sve_vls __user *userp)
> +{
> +     unsigned int vq, max_vq;
> +     int ret;
> +
> +     if (vcpu->arch.has_run_once || vcpu_has_sve(vcpu))
> +             return -EBADFD; /* too late, or already configured */
> +
> +     BUG_ON(vcpu->arch.sve_max_vl || vcpu->arch.sve_state);
> +
> +     if (vls->max_vq < SVE_VQ_MIN || vls->max_vq > SVE_VQ_MAX)
> +             return -EINVAL;
> +
> +     max_vq = 0;
> +     for (vq = SVE_VQ_MIN; vq <= vls->max_vq; ++vq) {
> +             bool available = sve_vq_available(vq);
> +             bool required = vq_set(vls->required_vqs, vq);
> +
> +             if (required != available)
> +                     break;
> +
> +             if (required)
> +                     max_vq = vq;
> +     }
> +
> +     if (max_vq < SVE_VQ_MIN)
> +             return -EINVAL;
> +
> +     vls->max_vq = max_vq;
> +     ret = put_user(vls->max_vq, &userp->max_vq);
> +     if (ret)
> +             return ret;
> +
> +     /*
> +      * kvm_reset_vcpu() may already have run in KVM_VCPU_INIT, so we
> +      * rely on kzalloc() being sufficient to reset the guest SVE
> +      * state here for a new vcpu.
> +      *
> +      * Subsequent resets after vcpu initialisation are handled by
> +      * kvm_reset_sve().
> +      */
> +     vcpu->arch.sve_state = kzalloc(SVE_SIG_REGS_SIZE(vls->max_vq),
> +                                    GFP_KERNEL);
> +     if (!vcpu->arch.sve_state)
> +             return -ENOMEM;
> +
> +     vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
> +     vcpu->arch.sve_max_vl = sve_vl_from_vq(vls->max_vq);
> +
> +     return 0;
> +}
> +
> +static int __kvm_vcpu_query_sve_vls(struct kvm_sve_vls *vls,
> +             unsigned int max_vq, struct kvm_sve_vls __user *userp)
> +{
> +     unsigned int vq, max_available_vq;
> +
> +     memset(&vls->required_vqs, 0, sizeof(vls->required_vqs));
> +
> +     BUG_ON(max_vq < SVE_VQ_MIN || max_vq > SVE_VQ_MAX);
> +
> +     max_available_vq = 0;
> +     for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq)
> +             if (sve_vq_available(vq)) {
> +                     set_vq(vls->required_vqs, vq);
> +                     max_available_vq = vq;
> +             }
> +
> +     if (WARN_ON(max_available_vq < SVE_VQ_MIN))
> +             return -EIO;
> +
> +     vls->max_vq = max_available_vq;
> +     if (copy_to_user(userp, vls, sizeof(*vls)))
> +             return -EFAULT;
> +
> +     return 0;
> +}
> +
> +static int kvm_vcpu_query_sve_vls(struct kvm_vcpu *vcpu, struct kvm_sve_vls 
> *vls,
> +             struct kvm_sve_vls __user *userp)
> +{
> +     BUG_ON(!sve_vl_valid(sve_max_vl));
> +
> +     return __kvm_vcpu_query_sve_vls(vls,
> +                     sve_vq_from_vl(sve_max_vl), userp);
> +}
> +
> +static int kvm_vcpu_get_sve_vls(struct kvm_vcpu *vcpu, struct kvm_sve_vls 
> *vls,
> +             struct kvm_sve_vls __user *userp)
> +{
> +     if (!vcpu_has_sve(vcpu))
> +             return -EBADFD; /* not configured yet */
> +
> +     BUG_ON(!sve_vl_valid(vcpu->arch.sve_max_vl));
> +
> +     return __kvm_vcpu_query_sve_vls(vls,
> +                     sve_vq_from_vl(vcpu->arch.sve_max_vl), userp);
> +}
> +
> +static int kvm_vcpu_sve_config(struct kvm_vcpu *vcpu,
> +                            struct kvm_sve_vls __user *userp)
> +{
> +     struct kvm_sve_vls vls;
> +
> +     if (!kvm_sve_supported())
> +             return -EINVAL;
> +
> +     if (copy_from_user(&vls, userp, sizeof(vls)))
> +             return -EFAULT;
> +
> +     /*
> +      * For forwards compatibility, flush any set bits in _reserved[]
> +      * to tell userspace that we didn't look at them:
> +      */
> +     memset(&vls._reserved, 0, sizeof vls._reserved);
> +
> +     switch (vls.cmd) {
> +     case KVM_ARM_SVE_CONFIG_QUERY:
> +             return kvm_vcpu_query_sve_vls(vcpu, &vls, userp);
> +
> +     case KVM_ARM_SVE_CONFIG_SET:
> +             return kvm_vcpu_set_sve_vls(vcpu, &vls, userp);
> +
> +     case KVM_ARM_SVE_CONFIG_GET:
> +             return kvm_vcpu_get_sve_vls(vcpu, &vls, userp);
> +
> +     default:
> +             return -EINVAL;
> +     }
> +}
> +
>  int kvm_arm_arch_vcpu_ioctl(struct kvm_vcpu *vcpu,
>                           unsigned int ioctl, unsigned long arg)
>  {
> -     return -EINVAL;
> +     void __user *userp = (void __user *)arg;
> +
> +     switch (ioctl) {
> +     case KVM_ARM_SVE_CONFIG:
> +             return kvm_vcpu_sve_config(vcpu, userp);
> +
> +     default:
> +             return -EINVAL;
> +     }
>  }
>
>  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index e37c78b..c2edcde 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -19,10 +19,12 @@
>   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>   */
>
> +#include <linux/atomic.h>
>  #include <linux/errno.h>
>  #include <linux/kvm_host.h>
>  #include <linux/kvm.h>
>  #include <linux/hw_breakpoint.h>
> +#include <linux/string.h>
>
>  #include <kvm/arm_arch_timer.h>
>
> @@ -54,6 +56,31 @@ static bool cpu_has_32bit_el1(void)
>       return !!(pfr0 & 0x20);
>  }
>
> +#ifdef CONFIG_ARM64_SVE
> +bool kvm_sve_supported(void)
> +{
> +     static bool warn_printed = false;
> +
> +     if (!system_supports_sve())
> +             return false;
> +
> +     /*
> +      * For now, consider the hardware broken if implementation
> +      * differences between CPUs in the system result in the set of
> +      * vector lengths safely virtualisable for guests being less
> +      * than the set provided to userspace:
> +      */
> +     if (sve_max_virtualisable_vl != sve_max_vl) {
> +             if (!xchg(&warn_printed, true))
> +                     kvm_err("Hardware SVE implementations
> mismatched: suppressing SVE for guests.");

This seems like you are re-inventing WARN_ONCE for the sake of having
"kvm [%i]: " in your printk string.

> +
> +             return false;
> +     }
> +
> +     return true;
> +}
> +#endif
> +
>  /**
>   * kvm_arch_dev_ioctl_check_extension
>   *
> @@ -85,6 +112,9 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, 
> long ext)
>       case KVM_CAP_VCPU_EVENTS:
>               r = 1;
>               break;
> +     case KVM_CAP_ARM_SVE:
> +             r = kvm_sve_supported();
> +             break;

For debugging we actually use the return value to indicate how many
WP/BPs we have. We could do the same here for max number of VQs but I
guess KVM_ARM_SVE_CONFIG_QUERY reports a much richer set of information.
However this does beg the question of how useful all this extra
information is to the guest program?

A dumber implementation would be:

QEMU                 |        Kernel

KVM_CAP_ARM_SVE  --------->
                              Max VQ=n
           VQ/0  <---------

We want n < max VQ

KVM_ARM_SVE_CONFIG(n) ---->
                              Unsupported VQ
           EINVAL <--------

Weird HW can't support our choice of n.
Give up or try another value.

KVM_ARM_SVE_CONFIG(n-1) --->
                              That's OK
           0 (OK) <---------

It imposes more heavy lifting on the userspace side of things but I
would expect the "normal" case would be sane hardware supports all VLs
from Max VQ to 1. And for cases where it doesn't iterating through
several KVM_ARM_SVE_CONFIG steps is a start-up cost not a runtime one.

This would mean one capability and one SVE_CONFIG sub-command with a single
parameter. Would could always add the extend the interface later but I
wonder if we are gold plating the API too early here?

What do the maintainers thing?


>       default:
>               r = 0;
>       }
> @@ -92,6 +122,21 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, 
> long ext)
>       return r;
>  }
>
> +int kvm_reset_sve(struct kvm_vcpu *vcpu)
> +{
> +     if (!vcpu_has_sve(vcpu))
> +             return 0;
> +
> +     if (WARN_ON(!vcpu->arch.sve_state ||
> +                 !sve_vl_valid(vcpu->arch.sve_max_vl)))
> +             return -EIO;

For some reason using WARN_ON for side effects seems sketchy but while
BUG_ON can compile away to nothing it seems WARN_ON has been designed to
always give you a result of the condition so never mind...

> +
> +     memset(vcpu->arch.sve_state, 0,
> +            SVE_SIG_REGS_SIZE(sve_vq_from_vl(vcpu->arch.sve_max_vl)));
> +
> +     return 0;
> +}
> +
>  /**
>   * kvm_reset_vcpu - sets core registers and sys_regs to reset value
>   * @vcpu: The VCPU pointer
> @@ -103,6 +148,7 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, 
> long ext)
>  int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
>  {
>       const struct kvm_regs *cpu_reset;
> +     int ret;
>
>       switch (vcpu->arch.target) {
>       default:
> @@ -120,6 +166,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
>       /* Reset core registers */
>       memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset));
>
> +     ret = kvm_reset_sve(vcpu);
> +     if (ret)
> +             return ret;
> +
>       /* Reset system registers */
>       kvm_reset_sys_regs(vcpu);
>
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 7c3c5cc..488ca56 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -953,6 +953,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_NESTED_STATE 157
>  #define KVM_CAP_ARM_INJECT_SERROR_ESR 158
>  #define KVM_CAP_MSR_PLATFORM_INFO 159
> +#define KVM_CAP_ARM_SVE 160
>
>  #ifdef KVM_CAP_IRQ_ROUTING
>
> @@ -1400,6 +1401,9 @@ struct kvm_enc_region {
>  #define KVM_GET_NESTED_STATE         _IOWR(KVMIO, 0xbe, struct 
> kvm_nested_state)
>  #define KVM_SET_NESTED_STATE         _IOW(KVMIO,  0xbf, struct 
> kvm_nested_state)
>
> +/* Available with KVM_CAP_ARM_SVE */
> +#define KVM_ARM_SVE_CONFIG     _IOWR(KVMIO,  0xc0, struct kvm_sve_vls)
> +
>  /* Secure Encrypted Virtualization command */
>  enum sev_cmd_id {
>       /* Guest initialization commands */


--
Alex Bennée
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to