Introduce IORING_OP_IOCTL_PT for async ioctl. It skips entering into
block-layer and reaches to underlying block-driver managing the
block-device. This is done by calling newly introduced "async_ioctl"
block-device operation.
The requested operation may be completed synchronously, and in that case
CQE is updated on the fly. For asynchronous update, lower-layer calls
the completion-callback supplied by io-uring.

Signed-off-by: Kanchan Joshi <josh...@samsung.com>
Signed-off-by: Anuj Gupta <anuj2...@samsung.com>
---
 fs/io_uring.c                 | 77 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  7 +++-
 2 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 985a9e3f976d..c15852dfb727 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -468,6 +468,19 @@ struct io_rw {
        u64                             len;
 };
 
+/*
+ * passthru ioctl skips block-layer and reaches to block device driver via
+ * async_ioctl() block-dev operation.
+ */
+struct io_pt_ioctl {
+       struct file                     *file;
+       /* arg and cmd like regular ioctl */
+       u64                             arg;
+       u32                             cmd;
+       /* defined by block layer */
+       struct pt_ioctl_ctx             ioctx;
+};
+
 struct io_connect {
        struct file                     *file;
        struct sockaddr __user          *addr;
@@ -699,6 +712,7 @@ struct io_kiocb {
                struct io_shutdown      shutdown;
                struct io_rename        rename;
                struct io_unlink        unlink;
+               struct io_pt_ioctl      ptioctl;
                /* use only after cleaning per-op data, see io_clean_op() */
                struct io_completion    compl;
        };
@@ -824,6 +838,10 @@ static const struct io_op_def io_op_defs[] = {
                .needs_file             = 1,
                .work_flags             = IO_WQ_WORK_BLKCG,
        },
+       [IORING_OP_IOCTL_PT] = {
+               .needs_file             = 1,
+               .work_flags             = IO_WQ_WORK_MM,
+       },
        [IORING_OP_READ_FIXED] = {
                .needs_file             = 1,
                .unbound_nonreg_file    = 1,
@@ -3704,6 +3722,60 @@ static int io_write(struct io_kiocb *req, bool 
force_nonblock,
        return ret;
 }
 
+static int io_pt_ioctl_prep(struct io_kiocb *req,
+                           const struct io_uring_sqe *sqe)
+{
+       unsigned int cmd = READ_ONCE(sqe->ioctl_cmd);
+       unsigned long arg = READ_ONCE(sqe->ioctl_arg);
+       struct io_ring_ctx *ctx = req->ctx;
+       struct block_device *bdev = I_BDEV(req->file->f_mapping->host);
+       struct gendisk *disk = NULL;
+
+       disk = bdev->bd_disk;
+       if (!disk || !disk->fops || !disk->fops->async_ioctl)
+               return -EOPNOTSUPP;
+       /* for sqpoll, use sqo_task */
+       if (ctx->flags & IORING_SETUP_SQPOLL)
+               req->ptioctl.ioctx.task = ctx->sqo_task;
+       else
+               req->ptioctl.ioctx.task = current;
+
+       req->ptioctl.arg = arg;
+       req->ptioctl.cmd = cmd;
+       return 0;
+}
+
+void pt_complete(struct pt_ioctl_ctx *ptioc, long ret)
+{
+       struct io_kiocb *req = container_of(ptioc, struct io_kiocb, 
ptioctl.ioctx);
+
+       if (ret < 0)
+               req_set_fail_links(req);
+       io_req_complete(req, ret);
+}
+
+static int io_pt_ioctl(struct io_kiocb *req, bool force_nonblock)
+{
+       long ret = 0;
+       struct block_device *bdev = I_BDEV(req->file->f_mapping->host);
+       fmode_t mode = req->file->f_mode;
+       struct gendisk *disk = NULL;
+
+       disk = bdev->bd_disk;
+       /* set up callback for async */
+       req->ptioctl.ioctx.pt_complete = pt_complete;
+
+       ret = disk->fops->async_ioctl(bdev, mode, req->ptioctl.cmd,
+                               req->ptioctl.arg, &req->ptioctl.ioctx);
+       if (ret == -EIOCBQUEUED) /*async completion */
+               return 0;
+       if (ret < 0)
+               req_set_fail_links(req);
+
+       io_req_complete(req, ret);
+       return 0;
+}
+
 static int io_renameat_prep(struct io_kiocb *req,
                            const struct io_uring_sqe *sqe)
 {
@@ -6078,6 +6150,8 @@ static int io_req_prep(struct io_kiocb *req, const struct 
io_uring_sqe *sqe)
                return io_renameat_prep(req, sqe);
        case IORING_OP_UNLINKAT:
                return io_unlinkat_prep(req, sqe);
+       case IORING_OP_IOCTL_PT:
+               return io_pt_ioctl_prep(req, sqe);
        }
 
        printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6337,6 +6411,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool 
force_nonblock,
        case IORING_OP_UNLINKAT:
                ret = io_unlinkat(req, force_nonblock);
                break;
+       case IORING_OP_IOCTL_PT:
+               ret = io_pt_ioctl(req, force_nonblock);
+               break;
        default:
                ret = -EINVAL;
                break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index d31a2a1e8ef9..60671e2b00ba 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -22,12 +22,16 @@ struct io_uring_sqe {
        union {
                __u64   off;    /* offset into file */
                __u64   addr2;
+               __u64   ioctl_arg;
        };
        union {
                __u64   addr;   /* pointer to buffer or iovecs */
                __u64   splice_off_in;
        };
-       __u32   len;            /* buffer size or number of iovecs */
+       union {
+               __u32   len;    /* buffer size or number of iovecs */
+               __u32   ioctl_cmd;
+       };
        union {
                __kernel_rwf_t  rw_flags;
                __u32           fsync_flags;
@@ -137,6 +141,7 @@ enum {
        IORING_OP_SHUTDOWN,
        IORING_OP_RENAMEAT,
        IORING_OP_UNLINKAT,
+       IORING_OP_IOCTL_PT,
 
        /* this goes last, obviously */
        IORING_OP_LAST,
-- 
2.25.1

Reply via email to