On 13/04/2017 22:19, Radim Krčmář wrote: > This patch allows userspace to tell how many VCPUs it is going to use, > which can save memory when allocating the kvm->vcpus array. This will > be done with a new KVM_CREATE_VM2 IOCTL. > > An alternative would be to redo kvm->vcpus as a list or protect the > array with RCU. RCU is slower and a list is not even practical as > kvm->vcpus are being used for index-based accesses. > > We could have an IOCTL that is called in between KVM_CREATE_VM and first > KVM_CREATE_VCPU and sets the size of the vcpus array, but we'd be making > one useless allocation. Knowing the desired number of VCPUs from the > beginning is seems best for now. > > This patch also prepares generic code for architectures that will set > KVM_CONFIGURABLE_MAX_VCPUS to a non-zero value.
Why is KVM_MAX_VCPU_ID or KVM_MAX_VCPUS not enough? Paolo > A disputable decision is that KVM_CREATE_VM2 actually works even if > KVM_CAP_CONFIGURABLE_MAX_VCPUS is 0, but uses that capability for its > detection. > > Signed-off-by: Radim Krčmář <rkrc...@redhat.com> > --- > Documentation/virtual/kvm/api.txt | 28 ++++++++++++++++++++++++ > include/linux/kvm_host.h | 3 +++ > include/uapi/linux/kvm.h | 8 +++++++ > virt/kvm/kvm_main.c | 45 > +++++++++++++++++++++++++++++++++------ > 4 files changed, 77 insertions(+), 7 deletions(-) > > diff --git a/Documentation/virtual/kvm/api.txt > b/Documentation/virtual/kvm/api.txt > index e60be91d8036..461130adbdc7 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -3259,6 +3259,34 @@ Otherwise, if the MCE is a corrected error, KVM will > just > store it in the corresponding bank (provided this bank is > not holding a previously reported uncorrected error). > > + > +4.107 KVM_CREATE_VM2 > + > +Capability: KVM_CAP_CONFIGURABLE_MAX_VCPUS > +Architectures: all > +Type: system ioctl > +Parameters: struct kvm_vm_config > +Returns: a VM fd that can be used to control the new virtual machine, > + -E2BIG if the value of max_vcpus is not supported > + > +This is an extension of KVM_CREATE_VM that allows the user to pass more > +information through > + > +struct kvm_vm_config { > + __u64 type; > + __u32 max_vcpus; > + __u8 reserved[52]; > +}; > + > +type is the argument to KVM_CREATE_VM > + > +max_vcpus is the desired maximal number of VCPUs, it must not exceed the > value > +returned by KVM_CAP_CONFIGURABLE_MAX_VCPUS. Value of 0 treated as if > userspace > +passed the value returned by KVM_CAP_MAX_VCPU instead. > + > +reserved is must be 0. > + > + > 5. The kvm_run structure > ------------------------ > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 6ba7bc831094..b875c0997328 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -39,6 +39,9 @@ > #ifndef KVM_MAX_VCPU_ID > #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS > #endif > +#ifndef KVM_CONFIGURABLE_MAX_VCPUS > +#define KVM_CONFIGURABLE_MAX_VCPUS 0U > +#endif > > /* > * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 6180ea50e9ef..8349c73b3517 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -83,6 +83,12 @@ struct kvm_debug_guest { > > /* *** End of deprecated interfaces *** */ > > +/* for KVM_CREATE_VM2 */ > +struct kvm_vm_config { > + __u64 type; > + __u32 max_vcpus; > + __u8 reserved[52]; > +}; > > /* for KVM_CREATE_MEMORY_REGION */ > struct kvm_memory_region { > @@ -713,6 +719,7 @@ struct kvm_ppc_resize_hpt { > */ > #define KVM_GET_API_VERSION _IO(KVMIO, 0x00) > #define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */ > +#define KVM_CREATE_VM2 _IOR(KVMIO, 0x01, struct kvm_vm_config) > #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) > > #define KVM_S390_ENABLE_SIE _IO(KVMIO, 0x06) > @@ -892,6 +899,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_MIPS_64BIT 139 > #define KVM_CAP_S390_GS 140 > #define KVM_CAP_S390_AIS 141 > +#define KVM_CAP_CONFIGURABLE_MAX_VCPUS 142 > > #ifdef KVM_CAP_IRQ_ROUTING > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 0f1579f118b4..9ef52fa006ec 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -629,11 +629,20 @@ static inline void kvm_free_vm(struct kvm *kvm) > kfree(kvm); > } > > -static struct kvm *kvm_create_vm(unsigned long type) > +static struct kvm *kvm_create_vm(struct kvm_vm_config *vm_config) > { > int r, i; > - struct kvm *kvm = kvm_alloc_vm(KVM_MAX_VCPUS); > + struct kvm *kvm; > > + if (!KVM_CONFIGURABLE_MAX_VCPUS && vm_config->max_vcpus) > + return ERR_PTR(-EINVAL); > + if (vm_config->max_vcpus > KVM_CONFIGURABLE_MAX_VCPUS) > + return ERR_PTR(-E2BIG); > + > + if (!vm_config->max_vcpus) > + vm_config->max_vcpus = KVM_MAX_VCPUS; > + > + kvm = kvm_alloc_vm(vm_config->max_vcpus); > if (!kvm) > return ERR_PTR(-ENOMEM); > > @@ -647,7 +656,7 @@ static struct kvm *kvm_create_vm(unsigned long type) > refcount_set(&kvm->users_count, 1); > INIT_LIST_HEAD(&kvm->devices); > > - r = kvm_arch_init_vm(kvm, type); > + r = kvm_arch_init_vm(kvm, vm_config->type); > if (r) > goto out_err_no_disable; > > @@ -2957,6 +2966,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct > kvm *kvm, long arg) > #endif > case KVM_CAP_MAX_VCPU_ID: > return KVM_MAX_VCPU_ID; > + case KVM_CAP_CONFIGURABLE_MAX_VCPUS: > + return KVM_CONFIGURABLE_MAX_VCPUS; > default: > break; > } > @@ -3182,13 +3193,13 @@ static struct file_operations kvm_vm_fops = { > .llseek = noop_llseek, > }; > > -static int kvm_dev_ioctl_create_vm(unsigned long type) > +static int kvm_dev_ioctl_create_vm(struct kvm_vm_config *vm_config) > { > int r; > struct kvm *kvm; > struct file *file; > > - kvm = kvm_create_vm(type); > + kvm = kvm_create_vm(vm_config); > if (IS_ERR(kvm)) > return PTR_ERR(kvm); > #ifdef CONFIG_KVM_MMIO > @@ -3223,6 +3234,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) > static long kvm_dev_ioctl(struct file *filp, > unsigned int ioctl, unsigned long arg) > { > + void __user *argp = (void __user *)arg; > long r = -EINVAL; > > switch (ioctl) { > @@ -3231,9 +3243,28 @@ static long kvm_dev_ioctl(struct file *filp, > goto out; > r = KVM_API_VERSION; > break; > - case KVM_CREATE_VM: > - r = kvm_dev_ioctl_create_vm(arg); > + case KVM_CREATE_VM: { > + struct kvm_vm_config vm_config = {.type = arg}; > + > + r = kvm_dev_ioctl_create_vm(&vm_config); > break; > + } > + case KVM_CREATE_VM2: { > + struct kvm_vm_config vm_config, check_reserved = {}; > + > + r = -EFAULT; > + if (copy_from_user(&vm_config, argp, sizeof vm_config)) > + goto out; > + > + r = -EINVAL; > + check_reserved.type = vm_config.type; > + check_reserved.max_vcpus = vm_config.max_vcpus; > + if (memcmp(&vm_config, &check_reserved, sizeof check_reserved)) > + goto out; > + > + r = kvm_dev_ioctl_create_vm(&vm_config); > + break; > + } > case KVM_CHECK_EXTENSION: > r = kvm_vm_ioctl_check_extension_generic(NULL, arg); > break; >