Add support to create a cdev multipath device. The functionality is much
the same as NVMe, where the cdev is created when a mpath device is set
live.

The driver must provide a mpath_head_template.cdev_ioctl callback to
actually handle the ioctl.

Structure mpath_generic_chr_fops would be used for setting the cdev fops in
the mpath_head_template.add_cdev callback.

NVMe cdev iotcl handler has special handling for NVMe controller commands.
In this case, the SRCU read lock is dropped before executing the ioctl.
For reference, see nvme_ns_head_ctrl_ioctl(). This makes having the SRCU
lock when calling not always possible. To handle this scenario, add template
callbacks .ioctl_begin and .ioctl_finish to be called around the before and
after the ioctl callback - if the .ioctl_begin returns data then we know
to drop the SRCU lock before calling the ioctl callback, and then later
call .ioctl_finish callback with that same data. For NVMe using
libmultipath, we would take a reference to the controller structure and
pass a pointer to the controller structure back in .ioctl_begin callback
and use that same data in the .ioctl_finish callback to put the reference
to the controller.

Signed-off-by: John Garry <[email protected]>
---
 include/linux/multipath.h |  18 ++++++
 lib/multipath.c           | 129 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 147 insertions(+)

diff --git a/include/linux/multipath.h b/include/linux/multipath.h
index 72186ab220083..3ac77c089a58c 100644
--- a/include/linux/multipath.h
+++ b/include/linux/multipath.h
@@ -4,8 +4,11 @@
 
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
+#include <linux/cdev.h>
 #include <linux/srcu.h>
+#include <linux/io_uring/cmd.h>
 
+extern const struct file_operations mpath_chr_fops;
 extern const struct block_device_operations mpath_ops;
 
 enum mpath_iopolicy_e {
@@ -37,12 +40,24 @@ struct mpath_device {
 
 struct mpath_head_template {
        bool (*available_path)(struct mpath_device *);
+       int (*add_cdev)(struct mpath_head *);
+       void (*del_cdev)(struct mpath_head *);
        bool (*is_disabled)(struct mpath_device *);
        bool (*is_optimized)(struct mpath_device *);
        int (*get_nr_active)(struct mpath_device *);
+       long (*cdev_ioctl)(struct mpath_device *, unsigned int cmd,
+                               unsigned long arg, bool open_for_write);
+       int (*chr_uring_cmd)(struct mpath_device *,
+                               struct io_uring_cmd *ioucmd,
+                               unsigned int issue_flags);
+       int (*chr_uring_cmd_iopoll)(struct io_uring_cmd *ioucmd,
+                                struct io_comp_batch *iob,
+                                unsigned int poll_flags);
        enum mpath_iopolicy_e (*get_iopolicy)(struct mpath_head *);
        struct bio *(*clone_bio)(struct bio *);
        const struct attribute_group **device_groups;
+       void (*ioctl_begin)(struct mpath_device *, unsigned int cmd, void **);
+       void (*ioctl_finish)(void *opaque);
 };
 
 #define MPATH_HEAD_DISK_LIVE                   0
@@ -58,6 +73,9 @@ struct mpath_head {
        spinlock_t              requeue_lock;
        struct work_struct      requeue_work; /* work struct for requeue */
 
+       struct cdev             cdev;
+       struct device           cdev_device;
+
        void                    *drvdata;
        unsigned long           flags;
        struct gendisk          *disk;
diff --git a/lib/multipath.c b/lib/multipath.c
index 1232e057199ae..69e48ca3169c2 100644
--- a/lib/multipath.c
+++ b/lib/multipath.c
@@ -462,6 +462,122 @@ const struct block_device_operations mpath_ops = {
 };
 EXPORT_SYMBOL_GPL(mpath_ops);
 
+static int mpath_chr_open(struct inode *inode, struct file *file)
+{
+       struct cdev *cdev = file_inode(file)->i_cdev;
+       struct mpath_head *mpath_head =
+                       container_of(cdev, struct mpath_head, cdev);
+
+       return mpath_get_head(mpath_head);
+}
+
+static int mpath_chr_release(struct inode *inode, struct file *file)
+{
+       struct cdev *cdev = file_inode(file)->i_cdev;
+       struct mpath_head *mpath_head =
+                       container_of(cdev, struct mpath_head, cdev);
+
+       mpath_put_head(mpath_head);
+       return 0;
+}
+
+static long mpath_chr_ioctl(struct file *file, unsigned int cmd,
+               unsigned long arg)
+{
+       struct cdev *cdev = file_inode(file)->i_cdev;
+       struct mpath_head *mpath_head =
+                       container_of(cdev, struct mpath_head, cdev);
+       struct mpath_device *mpath_device;
+       int srcu_idx, err = -EWOULDBLOCK;
+       void *unlocked_ioctl_data = NULL;
+
+       srcu_idx = srcu_read_lock(&mpath_head->srcu);
+       mpath_device = mpath_find_path(mpath_head);
+       if (!mpath_device)
+               goto out_unlock;
+       if (mpath_head->mpdt->ioctl_begin)
+               mpath_head->mpdt->ioctl_begin(mpath_device, cmd,
+                                       &unlocked_ioctl_data);
+       if (unlocked_ioctl_data)
+               srcu_read_unlock(&mpath_head->srcu, srcu_idx);
+       err = mpath_head->mpdt->cdev_ioctl(mpath_device, cmd, arg,
+                                       file->f_mode & FMODE_WRITE);
+       if (unlocked_ioctl_data) {
+               mpath_head->mpdt->ioctl_finish(unlocked_ioctl_data);
+               return err;
+       }
+
+out_unlock:
+       srcu_read_unlock(&mpath_head->srcu, srcu_idx);
+       return err;
+}
+
+static int mpath_chr_uring_cmd(struct io_uring_cmd *ioucmd,
+               unsigned int issue_flags)
+{
+       struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
+       struct mpath_head *mpath_head =
+                       container_of(cdev, struct mpath_head, cdev);
+       struct mpath_device *mpath_device;
+       /* error code copied from nvme_ns_head_chr_uring_cmd */
+       int srcu_idx, ret = -EINVAL;
+
+       srcu_idx = srcu_read_lock(&mpath_head->srcu);
+       mpath_device = mpath_find_path(mpath_head);
+
+       if (!mpath_device)
+               goto out_unlock;
+
+       if (!mpath_head->mpdt->chr_uring_cmd) {
+               ret = -EOPNOTSUPP;
+               goto out_unlock;
+       }
+
+       ret = mpath_head->mpdt->chr_uring_cmd(mpath_device, ioucmd,
+                       issue_flags);
+out_unlock:
+       srcu_read_unlock(&mpath_head->srcu, srcu_idx);
+       return ret;
+}
+
+static int mpath_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
+                                struct io_comp_batch *iob,
+                                unsigned int poll_flags)
+{
+       struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
+       struct mpath_head *mpath_head =
+                       container_of(cdev, struct mpath_head, cdev);
+
+       if (!mpath_head->mpdt->chr_uring_cmd_iopoll)
+               return -EOPNOTSUPP;
+
+       return mpath_head->mpdt->chr_uring_cmd_iopoll(ioucmd, iob, poll_flags);
+}
+
+const struct file_operations mpath_chr_fops = {
+       .owner          = THIS_MODULE,
+       .open           = mpath_chr_open,
+       .release        = mpath_chr_release,
+       .unlocked_ioctl = mpath_chr_ioctl,
+       .compat_ioctl   = compat_ptr_ioctl,
+       .uring_cmd      = mpath_chr_uring_cmd,
+       .uring_cmd_iopoll = mpath_chr_uring_cmd_iopoll,
+};
+EXPORT_SYMBOL_GPL(mpath_chr_fops);
+
+static int mpath_head_add_cdev(struct mpath_head *mpath_head)
+{
+       if (mpath_head->mpdt->add_cdev)
+               return mpath_head->mpdt->add_cdev(mpath_head);
+       return 0;
+}
+
+static void mpath_head_del_cdev(struct mpath_head *mpath_head)
+{
+       if (mpath_head->mpdt->del_cdev)
+               mpath_head->mpdt->del_cdev(mpath_head);
+}
+
 static void multipath_partition_scan_work(struct work_struct *work)
 {
        struct mpath_head *mpath_head =
@@ -504,6 +620,7 @@ void mpath_remove_disk(struct mpath_head *mpath_head)
                 */
                mpath_schedule_requeue_work(mpath_head);
 
+               mpath_head_del_cdev(mpath_head);
                mpath_synchronize(mpath_head);
                del_gendisk(disk);
        }
@@ -526,6 +643,16 @@ EXPORT_SYMBOL_GPL(mpath_put_disk);
 int mpath_alloc_head_disk(struct mpath_head *mpath_head,
                        struct queue_limits *lim, int numa_node)
 {
+       /* Do limited sanity checks on template */
+       if (!mpath_head->mpdt->ioctl_begin ^ !mpath_head->mpdt->ioctl_finish)
+               return -EINVAL;
+
+       if (!mpath_head->mpdt->add_cdev ^ !mpath_head->mpdt->del_cdev)
+               return -EINVAL;
+
+       if (!mpath_head->mpdt->add_cdev ^ !mpath_head->mpdt->cdev_ioctl)
+               return -EINVAL;
+
        mpath_head->disk = blk_alloc_disk(lim, numa_node);
        if (IS_ERR(mpath_head->disk))
                return PTR_ERR(mpath_head->disk);
@@ -555,6 +682,8 @@ void mpath_device_set_live(struct mpath_device 
*mpath_device)
                        clear_bit(MPATH_HEAD_DISK_LIVE, &mpath_head->flags);
                        return;
                }
+
+               mpath_head_add_cdev(mpath_head);
                queue_work(mpath_wq, &mpath_head->partition_scan_work);
        }
 
-- 
2.43.5


Reply via email to