Sean Christopherson <sea...@google.com> writes: > On Mon, Aug 07, 2023, Ackerley Tng wrote: >> KVM_LINK_GUEST_MEMFD will link a gmem fd's underlying inode to a new >> file (and fd). >> >> Signed-off-by: Ackerley Tng <ackerley...@google.com> >> --- >> include/uapi/linux/kvm.h | 8 +++++ >> virt/kvm/guest_mem.c | 73 ++++++++++++++++++++++++++++++++++++++++ >> virt/kvm/kvm_main.c | 10 ++++++ >> virt/kvm/kvm_mm.h | 7 ++++ >> 4 files changed, 98 insertions(+) >> >> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h >> index eb900344a054..d0e2a2ce0df2 100644 >> --- a/include/uapi/linux/kvm.h >> +++ b/include/uapi/linux/kvm.h >> @@ -2299,4 +2299,12 @@ struct kvm_create_guest_memfd { >> __u64 reserved[6]; >> }; >> >> +#define KVM_LINK_GUEST_MEMFD _IOWR(KVMIO, 0xd5, struct >> kvm_link_guest_memfd) >> + >> +struct kvm_link_guest_memfd { >> + __u64 fd; >> + __u64 flags; >> + __u64 reserved[6]; >> +}; >> + >> #endif /* __LINUX_KVM_H */ >> diff --git a/virt/kvm/guest_mem.c b/virt/kvm/guest_mem.c >> index 30d0ab8745ee..1b3df273f785 100644 >> --- a/virt/kvm/guest_mem.c >> +++ b/virt/kvm/guest_mem.c >> @@ -477,6 +477,79 @@ int kvm_gmem_create(struct kvm *kvm, struct >> kvm_create_guest_memfd *args) >> return __kvm_gmem_create(kvm, size, flags, kvm_gmem_mnt); >> } >> >> +static inline void __kvm_gmem_do_link(struct inode *inode) >> +{ >> + /* Refer to simple_link() */ >> + >> + inode->i_ctime = current_time(inode); >> + inc_nlink(inode); >> + >> + /* >> + * ihold() to add additional reference to inode for reference in dentry, >> + * created in kvm_gmem_alloc_file() -> alloc_file_pseudo(). This is not >> + * necessary when creating a new file because alloc_inode() creates >> + * inodes with i_count = 1, which is the refcount for the dentry in the >> + * file. >> + */ >> + ihold(inode); >> + >> + /* >> + * dget() and d_instantiate() complete the setup of a dentry, but those >> + * have already been done in kvm_gmem_alloc_file() -> >> + * alloc_file_pseudo() >> + */ >> +}
Thanks Sean, we're just circling back to this series, working on a next revision. > > Does this have to be done before the fd is exposed to userspace, or can it be > done after? Does "exposed to userspace" mean the call to get_unused_fd_flags(), where an fd is reserved? Do you mean to make this reservation as late as possible? > If it can be done after, I'd prefer to have the allocation helper > also install the fd, and also rename it to something that better conveys that > it's allocating more than just the file, e.g. that it allocates and initialize > kvm_gmem too. > > Completely untested, but this is what I'm thinkin/hoping. > > static int kvm_gmem_alloc_view(struct kvm *kvm, struct inode *inode, > struct vfsmount *mnt) Will rename this kvm_gmem_alloc_view(), that naming totally makes sense, and attaches a meaning to the struct file as a view into the memory. > { > struct file *file; > struct kvm_gmem *gmem; > > gmem = kzalloc(sizeof(*gmem), GFP_KERNEL); > if (!gmem) > return -ENOMEM; > > fd = get_unused_fd_flags(0); > if (fd < 0) { > r = fd; > goto err_fd; > } Do you see the fd as part of the view? I thought the fd is just a handle to the view (struct file). > > file = alloc_file_pseudo(inode, mnt, "kvm-gmem", O_RDWR, > &kvm_gmem_fops); > if (IS_ERR(file)) { > r = PTR_ERR(file); > goto err_file; > } > > file->f_flags |= O_LARGEFILE; > file->f_mapping = inode->i_mapping; > > kvm_get_kvm(kvm); > gmem->kvm = kvm; > xa_init(&gmem->bindings); > > file->private_data = gmem; > > list_add(&gmem->entry, &inode->i_mapping->private_list); > > fd_install(fd, file); > > return 0; > err: > put_unused_fd(fd); > err_fd: > kfree(gmem); > return r; > } > > static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags, > struct vfsmount *mnt) > { > const char *anon_name = "[kvm-gmem]"; > const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name)); > struct inode *inode; > struct file *file; > int fd, err; > > inode = alloc_anon_inode(mnt->mnt_sb); > if (IS_ERR(inode)) > return PTR_ERR(inode); > > err = security_inode_init_security_anon(inode, &qname, NULL); > if (err) > goto err; > > inode->i_private = (void *)(unsigned long)flags; > inode->i_op = &kvm_gmem_iops; > inode->i_mapping->a_ops = &kvm_gmem_aops; > inode->i_mode |= S_IFREG; > inode->i_size = size; > mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); > mapping_set_large_folios(inode->i_mapping); > mapping_set_unevictable(inode->i_mapping); > mapping_set_unmovable(inode->i_mapping); > > fd = kvm_gmem_alloc_view(kvm, inode, mnt); > if (fd < 0) { > err = fd; > goto err; > } > return fd; > err: > iput(inode); > return err; > }