On 2020/06/18 2:27, Kanchan Joshi wrote:
> From: Selvakumar S <selvakuma...@samsung.com>
> 
> Introduce three new opcodes for zone-append -
> 
>    IORING_OP_ZONE_APPEND     : non-vectord, similiar to IORING_OP_WRITE
>    IORING_OP_ZONE_APPENDV    : vectored, similar to IORING_OP_WRITEV
>    IORING_OP_ZONE_APPEND_FIXED : append using fixed-buffers
> 
> Repurpose cqe->flags to return zone-relative offset.
> 
> Signed-off-by: SelvaKumar S <selvakuma...@samsung.com>
> Signed-off-by: Kanchan Joshi <josh...@samsung.com>
> Signed-off-by: Nitesh Shetty <nj.she...@samsung.com>
> Signed-off-by: Javier Gonzalez <javier.g...@samsung.com>
> ---
>  fs/io_uring.c                 | 72 
> +++++++++++++++++++++++++++++++++++++++++--
>  include/uapi/linux/io_uring.h |  8 ++++-
>  2 files changed, 77 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 155f3d8..c14c873 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -649,6 +649,10 @@ struct io_kiocb {
>       unsigned long           fsize;
>       u64                     user_data;
>       u32                     result;
> +#ifdef CONFIG_BLK_DEV_ZONED
> +     /* zone-relative offset for append, in bytes */
> +     u32                     append_offset;

this can overflow. u64 is needed.

> +#endif
>       u32                     sequence;
>  
>       struct list_head        link_list;
> @@ -875,6 +879,26 @@ static const struct io_op_def io_op_defs[] = {
>               .hash_reg_file          = 1,
>               .unbound_nonreg_file    = 1,
>       },
> +     [IORING_OP_ZONE_APPEND] = {
> +             .needs_mm               = 1,
> +             .needs_file             = 1,
> +             .unbound_nonreg_file    = 1,
> +             .pollout                = 1,
> +     },
> +     [IORING_OP_ZONE_APPENDV] = {
> +            .async_ctx              = 1,
> +            .needs_mm               = 1,
> +            .needs_file             = 1,
> +            .hash_reg_file          = 1,
> +            .unbound_nonreg_file    = 1,
> +            .pollout                 = 1,
> +     },
> +     [IORING_OP_ZONE_APPEND_FIXED] = {
> +            .needs_file             = 1,
> +            .hash_reg_file          = 1,
> +            .unbound_nonreg_file    = 1,
> +            .pollout                 = 1,
> +     },
>  };
>  
>  static void io_wq_submit_work(struct io_wq_work **workptr);
> @@ -1285,7 +1309,16 @@ static void __io_cqring_fill_event(struct io_kiocb 
> *req, long res, long cflags)
>       if (likely(cqe)) {
>               WRITE_ONCE(cqe->user_data, req->user_data);
>               WRITE_ONCE(cqe->res, res);
> +#ifdef CONFIG_BLK_DEV_ZONED
> +             if (req->opcode == IORING_OP_ZONE_APPEND ||
> +                             req->opcode == IORING_OP_ZONE_APPENDV ||
> +                             req->opcode == IORING_OP_ZONE_APPEND_FIXED)
> +                     WRITE_ONCE(cqe->res2, req->append_offset);
> +             else
> +                     WRITE_ONCE(cqe->flags, cflags);
> +#else
>               WRITE_ONCE(cqe->flags, cflags);
> +#endif
>       } else if (ctx->cq_overflow_flushed) {
>               WRITE_ONCE(ctx->rings->cq_overflow,
>                               atomic_inc_return(&ctx->cached_cq_overflow));
> @@ -1961,6 +1994,9 @@ static void io_complete_rw_common(struct kiocb *kiocb, 
> long res)
>  static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
>  {
>       struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
> +#ifdef CONFIG_BLK_DEV_ZONED
> +     req->append_offset = (u32)res2;
> +#endif
>  
>       io_complete_rw_common(kiocb, res);
>       io_put_req(req);
> @@ -1976,6 +2012,9 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, 
> long res, long res2)
>       if (res != req->result)
>               req_set_fail_links(req);
>       req->result = res;
> +#ifdef CONFIG_BLK_DEV_ZONED
> +     req->append_offset = (u32)res2;
> +#endif
>       if (res != -EAGAIN)
>               WRITE_ONCE(req->iopoll_completed, 1);
>  }
> @@ -2408,7 +2447,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb 
> *req,
>       u8 opcode;
>  
>       opcode = req->opcode;
> -     if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
> +     if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED ||
> +                     opcode == IORING_OP_ZONE_APPEND_FIXED) {
>               *iovec = NULL;
>               return io_import_fixed(req, rw, iter);
>       }
> @@ -2417,7 +2457,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb 
> *req,
>       if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT))
>               return -EINVAL;
>  
> -     if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
> +     if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE ||
> +                     opcode == IORING_OP_ZONE_APPEND) {
>               if (req->flags & REQ_F_BUFFER_SELECT) {
>                       buf = io_rw_buffer_select(req, &sqe_len, needs_lock);
>                       if (IS_ERR(buf)) {
> @@ -2704,6 +2745,9 @@ static int io_write(struct io_kiocb *req, bool 
> force_nonblock)
>               req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
>  
>       req->result = 0;
> +#ifdef CONFIG_BLK_DEV_ZONED
> +     req->append_offset = 0;
> +#endif
>       io_size = ret;
>       if (req->flags & REQ_F_LINK_HEAD)
>               req->result = io_size;
> @@ -2738,6 +2782,13 @@ static int io_write(struct io_kiocb *req, bool 
> force_nonblock)
>                       __sb_writers_release(file_inode(req->file)->i_sb,
>                                               SB_FREEZE_WRITE);
>               }
> +#ifdef CONFIG_BLK_DEV_ZONED
> +             if (req->opcode == IORING_OP_ZONE_APPEND ||
> +                             req->opcode == IORING_OP_ZONE_APPENDV ||
> +                             req->opcode == IORING_OP_ZONE_APPEND_FIXED)
> +                     kiocb->ki_flags |= IOCB_ZONE_APPEND;
> +#endif
> +
>               kiocb->ki_flags |= IOCB_WRITE;
>  
>               if (!force_nonblock)
> @@ -4906,6 +4957,12 @@ static int io_req_defer_prep(struct io_kiocb *req,
>       case IORING_OP_WRITEV:
>       case IORING_OP_WRITE_FIXED:
>       case IORING_OP_WRITE:
> +#ifdef CONFIG_BLK_DEV_ZONED
> +     fallthrough;
> +     case IORING_OP_ZONE_APPEND:
> +     case IORING_OP_ZONE_APPENDV:
> +     case IORING_OP_ZONE_APPEND_FIXED:
> +#endif
>               ret = io_write_prep(req, sqe, true);
>               break;
>       case IORING_OP_POLL_ADD:
> @@ -5038,6 +5095,12 @@ static void io_cleanup_req(struct io_kiocb *req)
>       case IORING_OP_WRITEV:
>       case IORING_OP_WRITE_FIXED:
>       case IORING_OP_WRITE:
> +#ifdef CONFIG_BLK_DEV_ZONED
> +     fallthrough;
> +     case IORING_OP_ZONE_APPEND:
> +     case IORING_OP_ZONE_APPENDV:
> +     case IORING_OP_ZONE_APPEND_FIXED:
> +#endif
>               if (io->rw.iov != io->rw.fast_iov)
>                       kfree(io->rw.iov);
>               break;
> @@ -5086,6 +5149,11 @@ static int io_issue_sqe(struct io_kiocb *req, const 
> struct io_uring_sqe *sqe,
>               }
>               ret = io_read(req, force_nonblock);
>               break;
> +#ifdef CONFIG_BLK_DEV_ZONED
> +     case IORING_OP_ZONE_APPEND:
> +     case IORING_OP_ZONE_APPENDV:
> +     case IORING_OP_ZONE_APPEND_FIXED:
> +#endif
>       case IORING_OP_WRITEV:
>       case IORING_OP_WRITE_FIXED:
>       case IORING_OP_WRITE:
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 92c2269..6c8e932 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -130,6 +130,9 @@ enum {
>       IORING_OP_PROVIDE_BUFFERS,
>       IORING_OP_REMOVE_BUFFERS,
>       IORING_OP_TEE,
> +     IORING_OP_ZONE_APPEND,
> +     IORING_OP_ZONE_APPENDV,
> +     IORING_OP_ZONE_APPEND_FIXED,
>  
>       /* this goes last, obviously */
>       IORING_OP_LAST,
> @@ -157,7 +160,10 @@ enum {
>  struct io_uring_cqe {
>       __u64   user_data;      /* sqe->data submission passed back */
>       __s32   res;            /* result code for this event */
> -     __u32   flags;
> +     union {
> +             __u32   res2; /* res2 like aio, currently used for zone-append 
> */
> +             __u32   flags;
> +     };
>  };
>  
>  /*
> 


-- 
Damien Le Moal
Western Digital Research

Reply via email to