From: John Groves <[email protected]> - The new GET_DAXDEV message/response is added - The famfs.c:famfs_teardown() function is added as a primary teardown function for famfs. - The command it triggered by the update_daxdev_table() call, if there are any daxdevs in the subject fmap that are not represented in the daxdev_table yet. - fs/namei.c: export may_open_dev()
Signed-off-by: John Groves <[email protected]> --- fs/fuse/famfs.c | 230 +++++++++++++++++++++++++++++++++++++- fs/fuse/famfs_kfmap.h | 26 +++++ fs/fuse/fuse_i.h | 19 ++++ fs/fuse/inode.c | 7 +- fs/namei.c | 1 + include/uapi/linux/fuse.h | 20 ++++ 6 files changed, 301 insertions(+), 2 deletions(-) diff --git a/fs/fuse/famfs.c b/fs/fuse/famfs.c index a9728e11f1dd..7aa2eb2e99bf 100644 --- a/fs/fuse/famfs.c +++ b/fs/fuse/famfs.c @@ -21,6 +21,231 @@ #include "famfs_kfmap.h" #include "fuse_i.h" +/* + * famfs_teardown() + * + * Deallocate famfs metadata for a fuse_conn + */ +void +famfs_teardown(struct fuse_conn *fc) +{ + struct famfs_dax_devlist *devlist = fc->dax_devlist; + int i; + + fc->dax_devlist = NULL; + + if (!devlist) + return; + + if (!devlist->devlist) + goto out; + + /* Close & release all the daxdevs in our table */ + for (i = 0; i < devlist->nslots; i++) { + struct famfs_daxdev *dd = &devlist->devlist[i]; + + if (!dd->valid) + continue; + + /* Release reference from dax_dev_get() */ + if (dd->devp) + put_dax(dd->devp); + + kfree(dd->name); + } + kfree(devlist->devlist); + +out: + kfree(devlist); +} + +static int +famfs_verify_daxdev(const char *pathname, dev_t *devno) +{ + struct inode *inode; + struct path path; + int err; + + if (!pathname || !*pathname) + return -EINVAL; + + err = kern_path(pathname, LOOKUP_FOLLOW, &path); + if (err) + return err; + + inode = d_backing_inode(path.dentry); + if (!S_ISCHR(inode->i_mode)) { + err = -EINVAL; + goto out_path_put; + } + + if (!may_open_dev(&path)) { /* had to export this */ + err = -EACCES; + goto out_path_put; + } + + *devno = inode->i_rdev; + +out_path_put: + path_put(&path); + return err; +} + +/** + * famfs_fuse_get_daxdev() - Retrieve info for a DAX device from fuse server + * + * Send a GET_DAXDEV message to the fuse server to retrieve info on a + * dax device. + * + * @fm: fuse_mount + * @index: the index of the dax device; daxdevs are referred to by index + * in fmaps, and the server resolves the index to a particular daxdev + * + * Returns: 0=success + * -errno=failure + */ +static int +famfs_fuse_get_daxdev(struct fuse_mount *fm, const u64 index) +{ + struct fuse_daxdev_out daxdev_out = { 0 }; + struct fuse_conn *fc = fm->fc; + struct famfs_daxdev *daxdev; + int rc; + + FUSE_ARGS(args); + + /* Store the daxdev in our table */ + if (index >= fc->dax_devlist->nslots) { + pr_err("%s: index(%lld) > nslots(%d)\n", + __func__, index, fc->dax_devlist->nslots); + return -EINVAL; + } + + args.opcode = FUSE_GET_DAXDEV; + args.nodeid = index; + + args.in_numargs = 0; + + args.out_numargs = 1; + args.out_args[0].size = sizeof(daxdev_out); + args.out_args[0].value = &daxdev_out; + + /* Send GET_DAXDEV command */ + rc = fuse_simple_request(fm, &args); + if (rc) { + pr_err("%s: rc=%d from fuse_simple_request()\n", + __func__, rc); + /* Error will be that the payload is smaller than FMAP_BUFSIZE, + * which is the max we can handle. Empty payload handled below. + */ + return rc; + } + + scoped_guard(rwsem_write, &fc->famfs_devlist_sem) { + daxdev = &fc->dax_devlist->devlist[index]; + + /* Abort if daxdev is now valid (races are possible here) */ + if (daxdev->valid) { + pr_debug("%s: daxdev already known\n", __func__); + return 0; + } + + /* Verify dev is valid and can be opened and gets the devno */ + rc = famfs_verify_daxdev(daxdev_out.name, &daxdev->devno); + if (rc) { + pr_err("%s: rc=%d from famfs_verify_daxdev()\n", + __func__, rc); + return rc; + } + + daxdev->name = kstrdup(daxdev_out.name, GFP_KERNEL); + if (!daxdev->name) + return -ENOMEM; + + /* This will fail if it's not a dax device */ + daxdev->devp = dax_dev_get(daxdev->devno); + if (!daxdev->devp) { + pr_warn("%s: device %s not found or not dax\n", + __func__, daxdev_out.name); + kfree(daxdev->name); + daxdev->name = NULL; + return -ENODEV; + } + + wmb(); /* All other fields must be visible before valid */ + daxdev->valid = 1; + } + + return 0; +} + +/** + * famfs_update_daxdev_table() - Update the daxdev table + * @fm: fuse_mount + * @meta: famfs_file_meta, in-memory format, built from a GET_FMAP response + * + * This function is called for each new file fmap, to verify whether all + * referenced daxdevs are already known (i.e. in the table). Any daxdev + * indices referenced in @meta but not in the table will be retrieved via + * famfs_fuse_get_daxdev() and added to the table + * + * Return: 0=success + * -errno=failure + */ +static int +famfs_update_daxdev_table( + struct fuse_mount *fm, + const struct famfs_file_meta *meta) +{ + struct famfs_dax_devlist *local_devlist; + struct fuse_conn *fc = fm->fc; + int indices_to_fetch[MAX_DAXDEVS]; + int n_to_fetch = 0; + int err; + + /* First time through we will need to allocate the dax_devlist */ + if (!fc->dax_devlist) { + local_devlist = kcalloc(1, sizeof(*fc->dax_devlist), GFP_KERNEL); + if (!local_devlist) + return -ENOMEM; + + local_devlist->nslots = MAX_DAXDEVS; + + local_devlist->devlist = kcalloc(MAX_DAXDEVS, + sizeof(struct famfs_daxdev), + GFP_KERNEL); + if (!local_devlist->devlist) { + kfree(local_devlist); + return -ENOMEM; + } + + /* We don't need famfs_devlist_sem here because we use cmpxchg */ + if (cmpxchg(&fc->dax_devlist, NULL, local_devlist) != NULL) { + kfree(local_devlist->devlist); + kfree(local_devlist); /* another thread beat us to it */ + } + } + + /* Collect indices that need fetching while holding read lock */ + scoped_guard(rwsem_read, &fc->famfs_devlist_sem) { + unsigned long i; + + for_each_set_bit(i, (unsigned long *)&meta->dev_bitmap, MAX_DAXDEVS) { + if (!(fc->dax_devlist->devlist[i].valid)) + indices_to_fetch[n_to_fetch++] = i; + } + } + + /* Fetch needed daxdevs outside the read lock */ + for (int j = 0; j < n_to_fetch; j++) { + err = famfs_fuse_get_daxdev(fm, indices_to_fetch[j]); + if (err) + pr_err("%s: failed to get daxdev=%d\n", + __func__, indices_to_fetch[j]); + } + + return 0; +} /***************************************************************************/ @@ -184,7 +409,7 @@ famfs_fuse_meta_alloc( /* ie_in = one interleaved extent in fmap_buf */ ie_in = fmap_buf + next_offset; - /* Move past one interleaved extent header in fmap_buf */ + /* Move past 1 interleaved extent header in fmap_buf */ next_offset += sizeof(*ie_in); if (next_offset > fmap_buf_size) { pr_err("%s:%d: fmap_buf underflow offset/size %ld/%ld\n", @@ -329,6 +554,9 @@ famfs_file_init_dax( if (rc) goto errout; + /* Make sure this fmap doesn't reference any unknown daxdevs */ + famfs_update_daxdev_table(fm, meta); + /* Publish the famfs metadata on fi->famfs_meta */ inode_lock(inode); diff --git a/fs/fuse/famfs_kfmap.h b/fs/fuse/famfs_kfmap.h index 18ab22bcc5a1..eb9f70b5cb81 100644 --- a/fs/fuse/famfs_kfmap.h +++ b/fs/fuse/famfs_kfmap.h @@ -64,4 +64,30 @@ struct famfs_file_meta { }; }; +/* + * famfs_daxdev - tracking struct for a daxdev within a famfs file system + * + * This is the in-memory daxdev metadata that is populated by parsing + * the responses to GET_FMAP messages + */ +struct famfs_daxdev { + /* Include dev uuid? */ + bool valid; + bool error; + dev_t devno; + struct dax_device *devp; + char *name; +}; + +#define MAX_DAXDEVS 24 + +/* + * famfs_dax_devlist - list of famfs_daxdev's + */ +struct famfs_dax_devlist { + int nslots; + int ndevs; + struct famfs_daxdev *devlist; +}; + #endif /* FAMFS_KFMAP_H */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index dbfec5b9c6e1..83e24cee994b 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1006,6 +1006,11 @@ struct fuse_conn { /* Request timeout (in jiffies). 0 = no timeout */ unsigned int req_timeout; } timeout; + +#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX) + struct rw_semaphore famfs_devlist_sem; + struct famfs_dax_devlist *dax_devlist; +#endif }; /* @@ -1647,6 +1652,8 @@ int famfs_file_init_dax(struct fuse_mount *fm, size_t fmap_size); void __famfs_meta_free(void *map); +void famfs_teardown(struct fuse_conn *fc); + /* Set fi->famfs_meta = NULL regardless of prior value */ static inline void famfs_meta_init(struct fuse_inode *fi) { @@ -1668,6 +1675,11 @@ static inline void famfs_meta_free(struct fuse_inode *fi) } } +static inline void famfs_init_devlist_sem(struct fuse_conn *fc) +{ + init_rwsem(&fc->famfs_devlist_sem); +} + static inline int fuse_file_famfs(struct fuse_inode *fi) { return (READ_ONCE(fi->famfs_meta) != NULL); @@ -1677,6 +1689,9 @@ int fuse_get_fmap(struct fuse_mount *fm, struct inode *inode); #else /* !CONFIG_FUSE_FAMFS_DAX */ +static inline void famfs_teardown(struct fuse_conn *fc) +{ +} static inline struct fuse_backing *famfs_meta_set(struct fuse_inode *fi, void *meta) { @@ -1687,6 +1702,10 @@ static inline void famfs_meta_free(struct fuse_inode *fi) { } +static inline void famfs_init_devlist_sem(struct fuse_conn *fc) +{ +} + static inline int fuse_file_famfs(struct fuse_inode *fi) { return 0; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index b9933d0fbb9f..c5c7f2aeda3f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1047,6 +1047,9 @@ void fuse_conn_put(struct fuse_conn *fc) WARN_ON(atomic_read(&bucket->count) != 1); kfree(bucket); } + if (IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)) + famfs_teardown(fc); + if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) fuse_backing_files_free(fc); call_rcu(&fc->rcu, delayed_release); @@ -1476,8 +1479,10 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, u64 in_flags = ((u64)ia->in.flags2 << 32) | ia->in.flags; - if (in_flags & FUSE_DAX_FMAP) + if (in_flags & FUSE_DAX_FMAP) { + famfs_init_devlist_sem(fc); fc->famfs_iomap = 1; + } } } else { ra_pages = fc->max_read / PAGE_SIZE; diff --git a/fs/namei.c b/fs/namei.c index cf16b6822dd3..99ac58975394 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4171,6 +4171,7 @@ bool may_open_dev(const struct path *path) return !(path->mnt->mnt_flags & MNT_NODEV) && !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV); } +EXPORT_SYMBOL(may_open_dev); static int may_open(struct mnt_idmap *idmap, const struct path *path, int acc_mode, int flag) diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index cf678bebbfe0..1b82895108be 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -247,6 +247,9 @@ * - struct fuse_famfs_simple_ext * - struct fuse_famfs_iext * - struct fuse_famfs_fmap_header + * - Add the following structs for the GET_DAXDEV message and reply + * - struct fuse_get_daxdev_in + * - struct fuse_get_daxdev_out * - Add the following enumerated types * - enum fuse_famfs_file_type * - enum famfs_ext_type @@ -678,6 +681,7 @@ enum fuse_opcode { /* Famfs / devdax opcodes */ FUSE_GET_FMAP = 54, + FUSE_GET_DAXDEV = 55, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -1369,6 +1373,22 @@ struct fuse_famfs_fmap_header { uint64_t reserved1; }; +struct fuse_get_daxdev_in { + uint32_t daxdev_num; +}; + +#define DAXDEV_NAME_MAX 256 + +/* fuse_daxdev_out has enough space for a uuid if we need it */ +struct fuse_daxdev_out { + uint16_t index; + uint16_t reserved; + uint32_t reserved2; + uint64_t reserved3; + uint64_t reserved4; + char name[DAXDEV_NAME_MAX]; +}; + static inline int32_t fmap_msg_min_size(void) { /* Smallest fmap message is a header plus one simple extent */ -- 2.52.0
