On Thu, Mar 10, 2022, Chao Peng wrote:
> Add 'notifier' to memslot to make it a memfile_notifier node and then
> register it to memory backing store via memfile_register_notifier() when
> memslot gets created. When memslot is deleted, do the reverse with
> memfile_unregister_notifier(). Note each KVM memslot can be registered
> to different memory backing stores (or the same backing store but at
> different offset) independently.
> 
> Signed-off-by: Yu Zhang <yu.c.zh...@linux.intel.com>
> Signed-off-by: Chao Peng <chao.p.p...@linux.intel.com>
> ---
>  include/linux/kvm_host.h |  1 +
>  virt/kvm/kvm_main.c      | 75 ++++++++++++++++++++++++++++++++++++----
>  2 files changed, 70 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6e1d770d6bf8..9b175aeca63f 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -567,6 +567,7 @@ struct kvm_memory_slot {
>       struct file *private_file;
>       loff_t private_offset;
>       struct memfile_pfn_ops *pfn_ops;
> +     struct memfile_notifier notifier;
>  };
>  
>  static inline bool kvm_slot_is_private(const struct kvm_memory_slot *slot)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index d11a2628b548..67349421eae3 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -840,6 +840,37 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
>  
>  #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
>  
> +#ifdef CONFIG_MEMFILE_NOTIFIER
> +static inline int kvm_memfile_register(struct kvm_memory_slot *slot)

This is a good oppurtunity to hide away the memfile details a bit.  Maybe
kvm_private_mem_{,un}register()?

> +{
> +     return memfile_register_notifier(file_inode(slot->private_file),
> +                                      &slot->notifier,
> +                                      &slot->pfn_ops);
> +}
> +
> +static inline void kvm_memfile_unregister(struct kvm_memory_slot *slot)
> +{
> +     if (slot->private_file) {
> +             memfile_unregister_notifier(file_inode(slot->private_file),
> +                                         &slot->notifier);
> +             fput(slot->private_file);

This should not do fput(), it makes the helper imbalanced with respect to the
register path and will likely lead to double fput().  Indeed, if preparing the
region fails, __kvm_set_memory_region() will double up on fput() due to checking
its local "file" for null, not slot->private for null.

> +             slot->private_file = NULL;
> +     }
> +}
> +
> +#else /* !CONFIG_MEMFILE_NOTIFIER */
> +
> +static inline int kvm_memfile_register(struct kvm_memory_slot *slot)
> +{

This should WARN_ON_ONCE().  Ditto for unregister.

> +     return -EOPNOTSUPP;
> +}
> +
> +static inline void kvm_memfile_unregister(struct kvm_memory_slot *slot)
> +{
> +}
> +
> +#endif /* CONFIG_MEMFILE_NOTIFIER */
> +
>  #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
>  static int kvm_pm_notifier_call(struct notifier_block *bl,
>                               unsigned long state,
> @@ -884,6 +915,9 @@ static void kvm_destroy_dirty_bitmap(struct 
> kvm_memory_slot *memslot)
>  /* This does not remove the slot from struct kvm_memslots data structures */
>  static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
>  {
> +     if (slot->flags & KVM_MEM_PRIVATE)
> +             kvm_memfile_unregister(slot);

With fput() move out of unregister, this needs to be:

        if (slot->flags & KVM_MEM_PRIVATE) {
                kvm_private_mem_unregister(slot);
                fput(slot->private_file);
        }
> +
>       kvm_destroy_dirty_bitmap(slot);
>  
>       kvm_arch_free_memslot(kvm, slot);
> @@ -1738,6 +1772,12 @@ static int kvm_set_memslot(struct kvm *kvm,
>               kvm_invalidate_memslot(kvm, old, invalid_slot);
>       }
>  
> +     if (new->flags & KVM_MEM_PRIVATE && change == KVM_MR_CREATE) {
> +             r = kvm_memfile_register(new);
> +             if (r)
> +                     return r;
> +     }

This belongs in kvm_prepare_memory_region().  The shenanigans for DELETE and 
MOVE
are special.

> +
>       r = kvm_prepare_memory_region(kvm, old, new, change);
>       if (r) {
>               /*
> @@ -1752,6 +1792,10 @@ static int kvm_set_memslot(struct kvm *kvm,
>               } else {
>                       mutex_unlock(&kvm->slots_arch_lock);
>               }
> +
> +             if (new->flags & KVM_MEM_PRIVATE && change == KVM_MR_CREATE)
> +                     kvm_memfile_unregister(new);
> +
>               return r;
>       }
>  
> @@ -1817,6 +1861,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
>       enum kvm_mr_change change;
>       unsigned long npages;
>       gfn_t base_gfn;
> +     struct file *file = NULL;

Nit, naming this private_file would help understand its use.  Though I think 
it's
easier to not have a local variable.  More below.

>       int as_id, id;
>       int r;
>  
> @@ -1890,14 +1935,24 @@ int __kvm_set_memory_region(struct kvm *kvm,
>                       return 0;
>       }
>  
> +     if (mem->flags & KVM_MEM_PRIVATE) {
> +             file = fdget(region_ext->private_fd).file;

This can use fget() instead of fdget().

> +             if (!file)
> +                     return -EINVAL;
> +     }
> +
>       if ((change == KVM_MR_CREATE || change == KVM_MR_MOVE) &&
> -         kvm_check_memslot_overlap(slots, id, base_gfn, base_gfn + npages))
> -             return -EEXIST;
> +         kvm_check_memslot_overlap(slots, id, base_gfn, base_gfn + npages)) {
> +             r = -EEXIST;
> +             goto out;
> +     }
>  
>       /* Allocate a slot that will persist in the memslot. */
>       new = kzalloc(sizeof(*new), GFP_KERNEL_ACCOUNT);
> -     if (!new)
> -             return -ENOMEM;
> +     if (!new) {
> +             r = -ENOMEM;
> +             goto out;
> +     }
>  
>       new->as_id = as_id;
>       new->id = id;
> @@ -1905,10 +1960,18 @@ int __kvm_set_memory_region(struct kvm *kvm,
>       new->npages = npages;
>       new->flags = mem->flags;
>       new->userspace_addr = mem->userspace_addr;
> +     new->private_file = file;
> +     new->private_offset = mem->flags & KVM_MEM_PRIVATE ?
> +                           region_ext->private_offset : 0;

"new" is zero-allocated, so all the private stuff, including the fget(), can be
wrapped in a single KVM_MEM_PRIVATE check.  Moving fget() eliminates the number
of gotos needed (the above -EEXIST and -ENOMEM paths don't need to be modified).

>       r = kvm_set_memslot(kvm, old, new, change);
> -     if (r)
> -             kfree(new);
> +     if (!r)
> +             return r;

Use goto, e.g.

        if (r)
                goto out;

        return 0;

Burying the happy path in a taken if-statement is confusing and error prone,
mostly because it breaks well-established kernel patterns.  Note, there's no 
need
for a separate out_free since new->private_file will be NULL in either case.  I
don't have a strong preference, I just find it easier to read code that's more
explicit, but I'm a-ok collapsing them into a single label.

        if ((change == KVM_MR_CREATE || change == KVM_MR_MOVE) &&
            kvm_check_memslot_overlap(slots, id, base_gfn, base_gfn + npages))
                return -EEXIST;

        /* Allocate a slot that will persist in the memslot. */
        new = kzalloc(sizeof(*new), GFP_KERNEL_ACCOUNT);
        if (!new)
                return -ENOMEM;

        new->as_id = as_id;
        new->id = id;
        new->base_gfn = base_gfn;
        new->npages = npages;
        new->flags = mem->flags;
        new->userspace_addr = mem->userspace_addr;

        if (mem->flags & KVM_MEM_PRIVATE) {
                new->private_file = fget(mem->private_fd);
                if (!new->private_file) {
                        r = -EINVAL;
                        goto out_free;
                }
                new->private_offset = mem->private_offset;
        }

        r = kvm_set_memslot(kvm, old, new, change);
        if (r)
                goto out;

        return 0;

out:
        if (new->private_file)
                fput(new->private_file);

out_free:
        kfree(new);
        return r;

Reply via email to