On 26/04/28 04:25PM, Ackerley Tng via B4 Relay wrote:
> From: Ackerley Tng <[email protected]>
> 
> Introduce basic support for KVM_SET_MEMORY_ATTRIBUTES2 in guest_memfd,
> which just updates attributes tracked by guest_memfd.
> 
> Validate input fields in general. Guard usage of KVM_SET_MEMORY_ATTRIBUTES2
> by making sure requested attributes are supported for this instance of kvm.
> 
> A new KVM_SET_MEMORY_ATTRIBUTES2 is defined to support writes (unlike
> KVM_SET_MEMORY_ATTRIBUTES) in addition to reads so it can provide error
> details to userspace. This will be used in a later patch.
> 
> The two ioctls use their corresponding structs with no overlap, but
> backward compatibility is baked in for future support of
> KVM_SET_MEMORY_ATTRIBUTES2 and struct kvm_memory_attributes2 in the VM
> ioctl.
> 
> The process of setting memory attributes is set up such that the later half
> will not fail due to allocation. Any necessary checks are performed before
> the point of no return.
> 
> Signed-off-by: Ackerley Tng <[email protected]>
> Co-developed-by: Vishal Annapurve <[email protected]>
> Signed-off-by: Vishal Annapurve <[email protected]>
> Co-developed-by: Sean Christoperson <[email protected]>
> Signed-off-by: Sean Christoperson <[email protected]>
> ---
>  include/uapi/linux/kvm.h |  13 ++++++
>  virt/kvm/Kconfig         |   1 +
>  virt/kvm/guest_memfd.c   | 114 
> +++++++++++++++++++++++++++++++++++++++++++++++
>  virt/kvm/kvm_main.c      |  12 +++++
>  4 files changed, 140 insertions(+)
> 
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 6c8afa2047bf3..e6bbf68a83813 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1648,6 +1648,19 @@ struct kvm_memory_attributes {
>       __u64 flags;
>  };
>  
> +#define KVM_SET_MEMORY_ATTRIBUTES2              _IOWR(KVMIO,  0xd2, struct 
> kvm_memory_attributes2)
> +
> +struct kvm_memory_attributes2 {
> +     union {
> +             __u64 address;
> +             __u64 offset;
> +     };
> +     __u64 size;
> +     __u64 attributes;
> +     __u64 flags;
> +     __u64 reserved[12];
> +};
> +
>  #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
>  
>  #define KVM_CREATE_GUEST_MEMFD       _IOWR(KVMIO,  0xd4, struct 
> kvm_create_guest_memfd)
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index 3fea89c45cfb4..e371e079e2c50 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -109,6 +109,7 @@ config KVM_VM_MEMORY_ATTRIBUTES
>  
>  config KVM_GUEST_MEMFD
>         select XARRAY_MULTI
> +       select KVM_MEMORY_ATTRIBUTES
>         bool
>  
>  config HAVE_KVM_ARCH_GMEM_PREPARE
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 506219e2359eb..9a26eca717047 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -552,11 +552,125 @@ unsigned long kvm_gmem_get_memory_attributes(struct 
> kvm *kvm, gfn_t gfn)
>  }
>  EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_get_memory_attributes);
>  
> +/*
> + * Preallocate memory for attributes to be stored on a maple tree, pointed to
> + * by mas.  Adjacent ranges with attributes identical to the new attributes
> + * will be merged.  Also sets mas's bounds up for storing attributes.
> + *
> + * This maintains the invariant that ranges with the same attributes will
> + * always be merged.
> + */
> +static int kvm_gmem_mas_preallocate(struct ma_state *mas, u64 attributes,
> +                                 pgoff_t start, size_t nr_pages)
> +{
> +     pgoff_t end = start + nr_pages;
> +     pgoff_t last = end - 1;
> +     void *entry;
> +
> +     /* Try extending range. entry is NULL on overflow/wrap-around. */
> +     mas_set_range(mas, end, end);
> +     entry = mas_find(mas, end);

Please read the documentation as I believe you have a bug here.  What
happens if there is another range stored higher than end + 1?

Do you have testing of these functions somewhere?

> +     if (entry && xa_to_value(entry) == attributes)
> +             last = mas->last;
> +
> +     if (start > 0) {
> +             mas_set_range(mas, start - 1, start - 1);
> +             entry = mas_find(mas, start - 1);
> +             if (entry && xa_to_value(entry) == attributes)
> +                     start = mas->index;
> +     }
> +
> +     mas_set_range(mas, start, last);
> +     return mas_preallocate(mas, xa_mk_value(attributes), GFP_KERNEL);
> +}
> +
> +static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
> +                                  size_t nr_pages, uint64_t attrs)
> +{
> +     struct address_space *mapping = inode->i_mapping;
> +     struct gmem_inode *gi = GMEM_I(inode);
> +     pgoff_t end = start + nr_pages;
> +     struct maple_tree *mt;
> +     struct ma_state mas;
> +     int r;
> +
> +     mt = &gi->attributes;
> +
> +     filemap_invalidate_lock(mapping);
> +
> +     mas_init(&mas, mt, start);
> +     r = kvm_gmem_mas_preallocate(&mas, attrs, start, nr_pages);
> +     if (r)
> +             goto out;
> +
> +     /*
> +      * From this point on guest_memfd has performed necessary
> +      * checks and can proceed to do guest-breaking changes.
> +      */
> +
> +     kvm_gmem_invalidate_begin(inode, start, end);
> +     mas_store_prealloc(&mas, xa_mk_value(attrs));
> +     kvm_gmem_invalidate_end(inode, start, end);
> +out:
> +     filemap_invalidate_unlock(mapping);
> +     return r;
> +}
> +
> +static long kvm_gmem_set_attributes(struct file *file, void __user *argp)
> +{
> +     struct gmem_file *f = file->private_data;
> +     struct inode *inode = file_inode(file);
> +     struct kvm_memory_attributes2 attrs;
> +     size_t nr_pages;
> +     pgoff_t index;
> +     int i;
> +
> +     if (copy_from_user(&attrs, argp, sizeof(attrs)))
> +             return -EFAULT;
> +
> +     if (attrs.flags)
> +             return -EINVAL;
> +     for (i = 0; i < ARRAY_SIZE(attrs.reserved); i++) {
> +             if (attrs.reserved[i])
> +                     return -EINVAL;
> +     }
> +     if (attrs.attributes & ~kvm_supported_mem_attributes(f->kvm))
> +             return -EINVAL;
> +     if (attrs.size == 0 || attrs.offset + attrs.size < attrs.offset)
> +             return -EINVAL;
> +     if (!PAGE_ALIGNED(attrs.offset) || !PAGE_ALIGNED(attrs.size))
> +             return -EINVAL;
> +
> +     if (attrs.offset >= i_size_read(inode) ||
> +         attrs.offset + attrs.size > i_size_read(inode))
> +             return -EINVAL;
> +
> +     nr_pages = attrs.size >> PAGE_SHIFT;
> +     index = attrs.offset >> PAGE_SHIFT;
> +     return __kvm_gmem_set_attributes(inode, index, nr_pages,
> +                                      attrs.attributes);
> +}
> +
> +static long kvm_gmem_ioctl(struct file *file, unsigned int ioctl,
> +                        unsigned long arg)
> +{
> +     switch (ioctl) {
> +     case KVM_SET_MEMORY_ATTRIBUTES2:
> +             if (vm_memory_attributes)
> +                     return -ENOTTY;
> +
> +             return kvm_gmem_set_attributes(file, (void __user *)arg);
> +     default:
> +             return -ENOTTY;
> +     }
> +}
> +
>  static struct file_operations kvm_gmem_fops = {
>       .mmap           = kvm_gmem_mmap,
>       .open           = generic_file_open,
>       .release        = kvm_gmem_release,
>       .fallocate      = kvm_gmem_fallocate,
> +     .unlocked_ioctl = kvm_gmem_ioctl,
>  };
>  
>  static int kvm_gmem_migrate_folio(struct address_space *mapping,
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index ff20e63143642..4d7bf52b7b717 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -110,6 +110,18 @@ 
> EXPORT_SYMBOL_FOR_KVM_INTERNAL(STATIC_CALL_KEY(__kvm_get_memory_attributes));
>  
> EXPORT_SYMBOL_FOR_KVM_INTERNAL(STATIC_CALL_TRAMP(__kvm_get_memory_attributes));
>  #endif
>  
> +#define MEMORY_ATTRIBUTES_MATCH(one, two)                            \
> +     static_assert(offsetof(struct kvm_memory_attributes, one) ==    \
> +                   offsetof(struct kvm_memory_attributes2, two));    \
> +     static_assert(sizeof_field(struct kvm_memory_attributes, one) ==\
> +                   sizeof_field(struct kvm_memory_attributes2, two))
> +
> +/* Ensure the common parts of the two structs are identical. */
> +MEMORY_ATTRIBUTES_MATCH(address, address);
> +MEMORY_ATTRIBUTES_MATCH(size, size);
> +MEMORY_ATTRIBUTES_MATCH(attributes, attributes);
> +MEMORY_ATTRIBUTES_MATCH(flags, flags);
> +
>  /*
>   * Ordering of locks:
>   *
> 
> -- 
> 2.54.0.545.g6539524ca2-goog
> 
> 
> 

Reply via email to