On Wed, Jul 16, 2025 at 02:38:24PM -0400, Brian Song wrote: > This work provides an initial implementation of fuse-over-io_uring > support for QEMU export. According to the fuse-over-io_uring protocol > specification, the userspace side must create the same number of queues > as the number of CPUs (nr_cpu), just like the kernel. Currently, each > queue contains only a single SQE entry, which is used to validate the > correctness of the fuse-over-io_uring functionality. > > All FUSE read and write operations interact with the kernel via io > vectors embedded in the SQE entry during submission and CQE fetching. > The req_header and op_payload members of each entry are included as > parts of the io vector: req_header carries the FUSE operation header, > and op_payload carries the data payload, such as file attributes in a > getattr reply, file content in a read reply, or file content being > written to the FUSE client in a write operation. > > At present, multi-threading support is still incomplete. In addition, > handling connection termination and managing the "drained" state of a > FUSE block export in QEMU remain as pending work. > > Suggested-by: Kevin Wolf <kw...@redhat.com> > Suggested-by: Stefan Hajnoczi <stefa...@redhat.com> > Signed-off-by: Brian Song <hibrians...@gmail.com> > > --- > block/export/fuse.c | 423 +++++++++++++++++++++++++-- > docs/tools/qemu-storage-daemon.rst | 10 +- > qapi/block-export.json | 6 +- > storage-daemon/qemu-storage-daemon.c | 1 + > util/fdmon-io_uring.c | 5 +- > 5 files changed, 420 insertions(+), 25 deletions(-)
Here is feedback from a first pass over this patch. > > diff --git a/block/export/fuse.c b/block/export/fuse.c > index c0ad4696ce..637d36186a 100644 > --- a/block/export/fuse.c > +++ b/block/export/fuse.c > @@ -48,6 +48,11 @@ > #include <linux/fs.h> > #endif > > +#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 > + > +/* room needed in buffer to accommodate header */ > +#define FUSE_BUFFER_HEADER_SIZE 0x1000 > + > /* Prevent overly long bounce buffer allocations */ > #define FUSE_MAX_READ_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 1 * 1024 * 1024)) > /* > @@ -64,6 +69,26 @@ > > typedef struct FuseExport FuseExport; > > +struct FuseQueue; Use "typedef struct FuseQueue FuseQueue;" here... > + > +typedef struct FuseRingEnt { > + /* back pointer */ > + struct FuseQueue *q; ...and then this can be "FuseQueue *q;" so that QEMU coding style is followed. > + > + /* commit id of a fuse request */ > + uint64_t req_commit_id; > + > + /* fuse request header and payload */ > + struct fuse_uring_req_header *req_header; > + void *op_payload; > + size_t req_payload_sz; > + > + /* The vector passed to the kernel */ > + struct iovec iov[2]; > + > + CqeHandler fuse_cqe_handler; > +} FuseRingEnt; > + > /* > * One FUSE "queue", representing one FUSE FD from which requests are fetched > * and processed. Each queue is tied to an AioContext. > @@ -73,6 +98,7 @@ typedef struct FuseQueue { > > AioContext *ctx; > int fuse_fd; > + int qid; Could this go inside #ifdef CONFIG_LINUX_IO_URING? It seems to be specific to FUSE-over-io_uring. > > /* > * The request buffer must be able to hold a full write, and/or at least > @@ -109,6 +135,17 @@ typedef struct FuseQueue { > * Free this buffer with qemu_vfree(). > */ > void *spillover_buf; > + > +#ifdef CONFIG_LINUX_IO_URING > + FuseRingEnt ent; > + > + /* > + * TODO > + * Support multi-threaded FUSE over io_uring by using eventfd and > allocating > + * an extra SQE for each thread to be notified when the connection > + * shuts down. > + */ eventfd and the extra SQE won't be necessary because aio_bh_schedule_oneshot() can be used to cause threads to execute a function. (I think this comment effectively says that connection shutdown still needs to be implemented. The implementation details don't matter at this point.) > +#endif > } FuseQueue; > > /* > @@ -148,6 +185,7 @@ struct FuseExport { > bool growable; > /* Whether allow_other was used as a mount option or not */ > bool allow_other; > + bool is_uring; > > mode_t st_mode; > uid_t st_uid; > @@ -257,6 +295,126 @@ static const BlockDevOps fuse_export_blk_dev_ops = { > .drained_poll = fuse_export_drained_poll, > }; > > +#ifdef CONFIG_LINUX_IO_URING > +static void coroutine_fn fuse_uring_co_process_request(FuseRingEnt *ent); > + > +static void coroutine_fn co_fuse_uring_queue_handle_cqes(void *opaque) > +{ > + CqeHandler *cqe_handler = opaque; > + FuseRingEnt *ent = container_of(cqe_handler, FuseRingEnt, > fuse_cqe_handler); Passing ent in opaque instead of cqe_handler would simplify this. > + FuseExport *exp = ent->q->exp; > + > + fuse_uring_co_process_request(ent); > + > + fuse_dec_in_flight(exp); > +} > + > +static void fuse_uring_cqe_handler(CqeHandler *cqe_handler) > +{ > + FuseRingEnt *ent = container_of(cqe_handler, FuseRingEnt, > fuse_cqe_handler); > + FuseQueue *q = ent->q; > + Coroutine *co; > + FuseExport *exp = ent->q->exp; > + > + int err = cqe_handler->cqe.res; > + if (err != 0) { > + /* TODO end_conn support */ > + > + /* -ENOTCONN is ok on umount */ > + if (err != -EINTR && err != -EOPNOTSUPP && > + err != -EAGAIN && err != -ENOTCONN) { > + fuse_export_halt(exp); > + } > + } else { > + co = qemu_coroutine_create(co_fuse_uring_queue_handle_cqes, > + cqe_handler); > + /* Decremented by co_fuse_uring_queue_handle_cqes() */ > + fuse_inc_in_flight(q->exp); Can this be moved inside co_fuse_uring_queue_handle_cqes() to avoid calling inc/dec from different functions? That would make the code easier to understand and more robust against future bugs. > + qemu_coroutine_enter(co); > + } > +} > + > +static void fuse_uring_sqe_set_req_data(struct fuse_uring_cmd_req *req, > + const unsigned int qid, > + const unsigned int commit_id) > +{ > + req->qid = qid; > + req->commit_id = commit_id; > + req->flags = 0; > +} > + > +static void fuse_uring_sqe_prepare(struct io_uring_sqe *sqe, FuseRingEnt > *ent, > + __u32 cmd_op) > +{ > + sqe->opcode = IORING_OP_URING_CMD; > + > + sqe->fd = ent->q->fuse_fd; > + sqe->rw_flags = 0; > + sqe->ioprio = 0; > + sqe->off = 0; > + > + sqe->cmd_op = cmd_op; > + sqe->__pad1 = 0; > +} > + > +static void fuse_uring_prep_sqe_register(struct io_uring_sqe *sqe, void > *opaque) > +{ > + FuseQueue *q = opaque; > + struct fuse_uring_cmd_req *req = (void *)&sqe->cmd[0]; > + > + fuse_uring_sqe_prepare(sqe, &q->ent, FUSE_IO_URING_CMD_REGISTER); > + > + sqe->addr = (uint64_t)(q->ent.iov); > + sqe->len = 2; > + > + fuse_uring_sqe_set_req_data(req, q->qid, 0); > +} > + > +static void fuse_uring_start(FuseExport *exp, struct fuse_init_out *out) > +{ > + /* > + * Since we didn't enable the FUSE_MAX_PAGES feature, the value of > + * fc->max_pages should be FUSE_DEFAULT_MAX_PAGES_PER_REQ, which is set > by > + * the kernel by default. Also, max_write should not exceed > + * FUSE_DEFAULT_MAX_PAGES_PER_REQ * PAGE_SIZE. > + */ > + size_t bufsize = out->max_write + FUSE_BUFFER_HEADER_SIZE; > + > + if (!(out->flags & FUSE_MAX_PAGES)) { > + /* > + * bufsize = MIN(FUSE_DEFAULT_MAX_PAGES_PER_REQ * > + * qemu_real_host_page_size() + FUSE_BUFFER_HEADER_SIZE, > bufsize); > + */ > + bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * qemu_real_host_page_size() > + + FUSE_BUFFER_HEADER_SIZE; > + } > + > + for (int i = 0; i < exp->num_queues; i++) { > + FuseQueue *q = &exp->queues[i]; > + > + q->ent.q = q; > + > + q->ent.req_header = g_malloc0(sizeof(struct fuse_uring_req_header)); It's probably easier to embed the header as a FuseRingEnt field instead of heap allocating it. > + q->ent.req_payload_sz = bufsize - FUSE_BUFFER_HEADER_SIZE; > + q->ent.op_payload = g_malloc0(q->ent.req_payload_sz); > + > + q->ent.iov[0] = (struct iovec) { > + q->ent.req_header, > + sizeof(struct fuse_uring_req_header) > + }; > + q->ent.iov[1] = (struct iovec) { > + q->ent.op_payload, > + q->ent.req_payload_sz > + }; > + > + exp->queues[i].ent.fuse_cqe_handler.cb = fuse_uring_cqe_handler; > + > + aio_add_sqe(fuse_uring_prep_sqe_register, &(exp->queues[i]), > + &(exp->queues[i].ent.fuse_cqe_handler)); > + } > +} > +#endif > + > static int fuse_export_create(BlockExport *blk_exp, > BlockExportOptions *blk_exp_args, > AioContext *const *multithread, > @@ -280,6 +438,7 @@ static int fuse_export_create(BlockExport *blk_exp, > > for (size_t i = 0; i < mt_count; i++) { > exp->queues[i] = (FuseQueue) { > + .qid = i, > .exp = exp, > .ctx = multithread[i], > .fuse_fd = -1, > @@ -293,6 +452,7 @@ static int fuse_export_create(BlockExport *blk_exp, > exp->num_queues = 1; > exp->queues = g_new(FuseQueue, 1); > exp->queues[0] = (FuseQueue) { > + .qid = 0, > .exp = exp, > .ctx = exp->common.ctx, > .fuse_fd = -1, > @@ -312,6 +472,8 @@ static int fuse_export_create(BlockExport *blk_exp, > } > } > > + exp->is_uring = args->uring ? true : false; > + > blk_set_dev_ops(exp->common.blk, &fuse_export_blk_dev_ops, exp); > > /* > @@ -597,6 +759,22 @@ static void read_from_fuse_fd(void *opaque) > qemu_coroutine_enter(co); > } > > +#ifdef CONFIG_LINUX_IO_URING > +static void fuse_export_delete_uring(FuseExport *exp) > +{ > + exp->is_uring = false; > + > + /* > + * TODO > + * end_conn handling > + */ > + for (size_t qid = 0; qid < exp->num_queues; qid++) { > + g_free(exp->queues[qid].ent.req_header); > + g_free(exp->queues[qid].ent.op_payload); > + } > +} > +#endif > + > static void fuse_export_shutdown(BlockExport *blk_exp) > { > FuseExport *exp = container_of(blk_exp, FuseExport, common); > @@ -618,6 +796,11 @@ static void fuse_export_delete(BlockExport *blk_exp) > { > FuseExport *exp = container_of(blk_exp, FuseExport, common); > > +#ifdef CONFIG_LINUX_IO_URING > + if (exp->is_uring) > + fuse_export_delete_uring(exp); > +#endif > + > for (int i = 0; i < exp->num_queues; i++) { > FuseQueue *q = &exp->queues[i]; > > @@ -687,15 +870,22 @@ static ssize_t coroutine_fn > fuse_co_init(FuseExport *exp, struct fuse_init_out *out, > uint32_t max_readahead, uint32_t flags) > { > - const uint32_t supported_flags = FUSE_ASYNC_READ | FUSE_ASYNC_DIO; > + const uint32_t supported_flags = FUSE_ASYNC_READ | FUSE_ASYNC_DIO > + | FUSE_INIT_EXT; > + uint64_t outargflags = flags; > + > +#ifdef CONFIG_LINUX_IO_URING > + if (exp->is_uring) > + outargflags |= FUSE_OVER_IO_URING; > +#endif > > *out = (struct fuse_init_out) { > .major = FUSE_KERNEL_VERSION, > .minor = FUSE_KERNEL_MINOR_VERSION, > .max_readahead = max_readahead, > .max_write = FUSE_MAX_WRITE_BYTES, > - .flags = flags & supported_flags, > - .flags2 = 0, > + .flags = outargflags & supported_flags, > + .flags2 = outargflags >> 32, > > /* libfuse maximum: 2^16 - 1 */ > .max_background = UINT16_MAX, > @@ -943,6 +1133,9 @@ fuse_co_read(FuseExport *exp, void **bufptr, uint64_t > offset, uint32_t size) > * Data in @in_place_buf is assumed to be overwritten after yielding, so will > * be copied to a bounce buffer beforehand. @spillover_buf in contrast is > * assumed to be exclusively owned and will be used as-is. > + * In FUSE-over-io_uring mode, the actual op_payload content is stored in > + * @spillover_buf. To ensure this buffer is used for writing, @in_place_buf > + * is explicitly set to NULL. > * Return the number of bytes written to *out on success, and -errno on > error. > */ > static ssize_t coroutine_fn > @@ -950,8 +1143,8 @@ fuse_co_write(FuseExport *exp, struct fuse_write_out > *out, > uint64_t offset, uint32_t size, > const void *in_place_buf, const void *spillover_buf) > { > - size_t in_place_size; > - void *copied; > + size_t in_place_size = 0; > + void *copied = NULL; > int64_t blk_len; > int ret; > struct iovec iov[2]; > @@ -966,10 +1159,12 @@ fuse_co_write(FuseExport *exp, struct fuse_write_out > *out, > return -EACCES; > } > > - /* Must copy to bounce buffer before potentially yielding */ > - in_place_size = MIN(size, FUSE_IN_PLACE_WRITE_BYTES); > - copied = blk_blockalign(exp->common.blk, in_place_size); > - memcpy(copied, in_place_buf, in_place_size); > + if (in_place_buf) { > + /* Must copy to bounce buffer before potentially yielding */ > + in_place_size = MIN(size, FUSE_IN_PLACE_WRITE_BYTES); > + copied = blk_blockalign(exp->common.blk, in_place_size); > + memcpy(copied, in_place_buf, in_place_size); > + } > > /** > * Clients will expect short writes at EOF, so we have to limit > @@ -993,26 +1188,37 @@ fuse_co_write(FuseExport *exp, struct fuse_write_out > *out, > } > } > > - iov[0] = (struct iovec) { > - .iov_base = copied, > - .iov_len = in_place_size, > - }; > - if (size > FUSE_IN_PLACE_WRITE_BYTES) { > - assert(size - FUSE_IN_PLACE_WRITE_BYTES <= FUSE_SPILLOVER_BUF_SIZE); > - iov[1] = (struct iovec) { > - .iov_base = (void *)spillover_buf, > - .iov_len = size - FUSE_IN_PLACE_WRITE_BYTES, > + if (in_place_buf) { > + iov[0] = (struct iovec) { > + .iov_base = copied, > + .iov_len = in_place_size, > }; > - qemu_iovec_init_external(&qiov, iov, 2); > + if (size > FUSE_IN_PLACE_WRITE_BYTES) { > + assert(size - FUSE_IN_PLACE_WRITE_BYTES <= > FUSE_SPILLOVER_BUF_SIZE); > + iov[1] = (struct iovec) { > + .iov_base = (void *)spillover_buf, > + .iov_len = size - FUSE_IN_PLACE_WRITE_BYTES, > + }; > + qemu_iovec_init_external(&qiov, iov, 2); > + } else { > + qemu_iovec_init_external(&qiov, iov, 1); > + } > } else { > + /* fuse over io_uring */ > + iov[0] = (struct iovec) { > + .iov_base = (void *)spillover_buf, > + .iov_len = size, > + }; > qemu_iovec_init_external(&qiov, iov, 1); > } > + > ret = blk_co_pwritev(exp->common.blk, offset, size, &qiov, 0); > if (ret < 0) { > goto fail_free_buffer; > } > > - qemu_vfree(copied); > + if (in_place_buf) > + qemu_vfree(copied); > > *out = (struct fuse_write_out) { > .size = size, > @@ -1020,7 +1226,9 @@ fuse_co_write(FuseExport *exp, struct fuse_write_out > *out, > return sizeof(*out); > > fail_free_buffer: > - qemu_vfree(copied); > + if (in_place_buf) { > + qemu_vfree(copied); > + } > return ret; > } > > @@ -1409,6 +1617,12 @@ fuse_co_process_request(FuseQueue *q, void > *spillover_buf) > const struct fuse_init_in *in = FUSE_IN_OP_STRUCT(init, q); > ret = fuse_co_init(exp, FUSE_OUT_OP_STRUCT(init, out_buf), > in->max_readahead, in->flags); > +#ifdef CONFIG_LINUX_IO_URING > + /* Set up fuse over io_uring after replying to the first FUSE_INIT */ > + if (exp->is_uring) { > + fuse_uring_start(exp, FUSE_OUT_OP_STRUCT(init, out_buf)); > + } > +#endif > break; > } > > @@ -1515,6 +1729,173 @@ fuse_co_process_request(FuseQueue *q, void > *spillover_buf) > qemu_vfree(spillover_buf); > } > > +#ifdef CONFIG_LINUX_IO_URING > +static void fuse_uring_prep_sqe_commit(struct io_uring_sqe *sqe, void > *opaque) > +{ > + FuseRingEnt *ent = opaque; > + struct fuse_uring_cmd_req *req = (void *)&sqe->cmd[0]; > + > + fuse_uring_sqe_prepare(sqe, ent, FUSE_IO_URING_CMD_COMMIT_AND_FETCH); > + fuse_uring_sqe_set_req_data(req, ent->q->qid, > + ent->req_commit_id); > +} > + > +static void > +fuse_uring_write_response(FuseRingEnt *ent, uint32_t req_id, ssize_t ret, > + const void *out_op_hdr, const void *buf) > +{ > + struct fuse_uring_req_header *rrh = ent->req_header; > + struct fuse_out_header *out_header = (struct fuse_out_header > *)&rrh->in_out; > + struct fuse_uring_ent_in_out *ent_in_out = > + (struct fuse_uring_ent_in_out *)&rrh->ring_ent_in_out; > + > + if (buf) { > + memcpy(ent->op_payload, buf, ret); > + } else if (ret > 0) { > + if (ret > ent->req_payload_sz) { > + warn_report("data size %zu exceeds payload buffer size %zu", > + ret, ent->req_payload_sz); > + ret = -EINVAL; > + } else { > + memcpy(ent->op_payload, out_op_hdr, ret); > + } > + } > + > + out_header->error = ret < 0 ? ret : 0; > + out_header->unique = req_id; > + /* out_header->len = ret > 0 ? ret : 0; */ > + ent_in_out->payload_sz = ret > 0 ? ret : 0; > + > + aio_add_sqe(fuse_uring_prep_sqe_commit, ent, > + &ent->fuse_cqe_handler); > +} > + > +static void coroutine_fn fuse_uring_co_process_request(FuseRingEnt *ent) > +{ > + FuseQueue *q = ent->q; > + FuseExport *exp = q->exp; > + struct fuse_uring_req_header *rrh = ent->req_header; > + struct fuse_uring_ent_in_out *ent_in_out = > + (struct fuse_uring_ent_in_out *)&rrh->ring_ent_in_out; > + > + char out_op_hdr[MAX_CONST(sizeof(struct fuse_init_out), > + MAX_CONST(sizeof(struct fuse_open_out), > + MAX_CONST(sizeof(struct fuse_attr_out), > + MAX_CONST(sizeof(struct fuse_write_out), > + sizeof(struct fuse_lseek_out)))))]; A union would be cleaner because the casts below could be avoided and MAX_CONST() would be unnecessary: union { struct fuse_init_out init; struct fuse_open out open; struct fuse_attr_out attr; struct fuse_write_out write; struct fuse_lseek_out lseek; } out_op_hdr; > + > + void *out_data_buffer = NULL; > + > + uint32_t opcode; > + uint64_t req_id; > + > + struct fuse_in_header *in_hdr = (struct fuse_in_header *)&rrh->in_out; > + opcode = in_hdr->opcode; > + req_id = in_hdr->unique; > + > + ent->req_commit_id = ent_in_out->commit_id; > + > + if (unlikely(ent->req_commit_id == 0)) { > + /* > + * If this happens kernel will not find the response - it will > + * be stuck forever - better to abort immediately. > + */ > + error_report("If this happens kernel will not find the response" > + " - it will be stuck forever - better to abort immediately."); > + fuse_export_halt(exp); > + fuse_dec_in_flight(exp); > + return; > + } > + > + ssize_t ret; > + > + switch (opcode) { > + case FUSE_OPEN: > + ret = fuse_co_open(exp, (struct fuse_open_out *)out_op_hdr); > + break; > + > + case FUSE_RELEASE: > + ret = 0; > + break; > + > + case FUSE_LOOKUP: > + ret = -ENOENT; /* There is no node but the root node */ > + break; > + > + case FUSE_GETATTR: > + ret = fuse_co_getattr(exp, (struct fuse_attr_out *)out_op_hdr); > + break; > + > + case FUSE_SETATTR: { > + const struct fuse_setattr_in *in = > + (const struct fuse_setattr_in *)&rrh->op_in; > + ret = fuse_co_setattr(exp, (struct fuse_attr_out *)out_op_hdr, > + in->valid, in->size, in->mode, in->uid, > in->gid); > + break; > + } > + > + case FUSE_READ: { > + const struct fuse_read_in *in = > + (const struct fuse_read_in *)&rrh->op_in; > + ret = fuse_co_read(exp, &out_data_buffer, in->offset, in->size); > + break; > + } > + > + case FUSE_WRITE: { > + const struct fuse_write_in *in = > + (const struct fuse_write_in *)&rrh->op_in; > + > + assert(in->size == ent_in_out->payload_sz); > + > + /* > + * poll_fuse_fd() has checked that in_hdr->len matches the number of > + * bytes read, which cannot exceed the max_write value we set > + * (FUSE_MAX_WRITE_BYTES). So we know that FUSE_MAX_WRITE_BYTES >= > + * in_hdr->len >= in->size + X, so this assertion must hold. > + */ > + assert(in->size <= FUSE_MAX_WRITE_BYTES); > + > + ret = fuse_co_write(exp, (struct fuse_write_out *)out_op_hdr, > + in->offset, in->size, NULL, ent->op_payload); > + break; > + } > + > + case FUSE_FALLOCATE: { > + const struct fuse_fallocate_in *in = > + (const struct fuse_fallocate_in *)&rrh->op_in; > + ret = fuse_co_fallocate(exp, in->offset, in->length, in->mode); > + break; > + } > + > + case FUSE_FSYNC: > + ret = fuse_co_fsync(exp); > + break; > + > + case FUSE_FLUSH: > + ret = fuse_co_flush(exp); > + break; > + > +#ifdef CONFIG_FUSE_LSEEK > + case FUSE_LSEEK: { > + const struct fuse_lseek_in *in = > + (const struct fuse_lseek_in *)&rrh->op_in; > + ret = fuse_co_lseek(exp, (struct fuse_lseek_out *)out_op_hdr, > + in->offset, in->whence); > + break; > + } > +#endif > + > + default: > + ret = -ENOSYS; > + } It would be nice to reuse the non-io_uring code rather than duplicating the switch statement that covers each FUSE opcode. Is the memory layout so different that the code cannot be shared? > + > + fuse_uring_write_response(ent, req_id, ret, out_op_hdr, out_data_buffer); > + > + if (out_data_buffer) > + qemu_vfree(out_data_buffer); > +} > +#endif > + > const BlockExportDriver blk_exp_fuse = { > .type = BLOCK_EXPORT_TYPE_FUSE, > .instance_size = sizeof(FuseExport), > diff --git a/docs/tools/qemu-storage-daemon.rst > b/docs/tools/qemu-storage-daemon.rst > index 35ab2d7807..4ec0648e95 100644 > --- a/docs/tools/qemu-storage-daemon.rst > +++ b/docs/tools/qemu-storage-daemon.rst > @@ -78,7 +78,7 @@ Standard options: > .. option:: --export > [type=]nbd,id=<id>,node-name=<node-name>[,name=<export-name>][,writable=on|off][,bitmap=<name>] > --export > [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=unix,addr.path=<socket-path>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>] > --export > [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=fd,addr.str=<fd>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>] > - --export > [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>[,growable=on|off][,writable=on|off][,allow-other=on|off|auto] > + --export > [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>[,growable=on|off][,writable=on|off][,allow-other=on|off|auto][,uring=on|off] > --export > [type=]vduse-blk,id=<id>,node-name=<node-name>,name=<vduse-name>[,writable=on|off][,num-queues=<num-queues>][,queue-size=<queue-size>][,logical-block-size=<block-size>][,serial=<serial-number>] > > is a block export definition. ``node-name`` is the block node that should > be > @@ -111,7 +111,13 @@ Standard options: > that enabling this option as a non-root user requires enabling the > user_allow_other option in the global fuse.conf configuration file. > Setting > ``allow-other`` to auto (the default) will try enabling this option, and on > - error fall back to disabling it. > + error fall back to disabling it. Once ``uring`` is enabled > + (off by default), the initialization of FUSE-over-io_uring-related settings > + will be performed in the FUSE_INIT request handler. This setup bypasses > + the traditional /dev/fuse communication mechanism and instead uses io_uring > + for handling FUSE operations. > + > + > > The ``vduse-blk`` export type takes a ``name`` (must be unique across the > host) > to create the VDUSE device. > diff --git a/qapi/block-export.json b/qapi/block-export.json > index 9ae703ad01..7d14f3f1ba 100644 > --- a/qapi/block-export.json > +++ b/qapi/block-export.json > @@ -184,12 +184,16 @@ > # mount the export with allow_other, and if that fails, try again > # without. (since 6.1; default: auto) > # > +# @uring: If we enable uring option, it will enable FUSE over io_uring > +# feature for QEMU FUSE export. (default: false) > +# > # Since: 6.0 > ## > { 'struct': 'BlockExportOptionsFuse', > 'data': { 'mountpoint': 'str', > '*growable': 'bool', > - '*allow-other': 'FuseExportAllowOther' }, > + '*allow-other': 'FuseExportAllowOther', > + '*uring': 'bool' }, > 'if': 'CONFIG_FUSE' } > > ## > diff --git a/storage-daemon/qemu-storage-daemon.c > b/storage-daemon/qemu-storage-daemon.c > index eb72561358..803538db29 100644 > --- a/storage-daemon/qemu-storage-daemon.c > +++ b/storage-daemon/qemu-storage-daemon.c > @@ -107,6 +107,7 @@ static void help(void) > #ifdef CONFIG_FUSE > " --export [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>\n" > " [,growable=on|off][,writable=on|off][,allow-other=on|off|auto]\n" > +" [,fuse-over-uring=on|off]" > " export the specified block node over FUSE\n" > "\n" > #endif /* CONFIG_FUSE */ > diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c > index d2433d1d99..68d3fe8e01 100644 > --- a/util/fdmon-io_uring.c > +++ b/util/fdmon-io_uring.c > @@ -452,10 +452,13 @@ static const FDMonOps fdmon_io_uring_ops = { > void fdmon_io_uring_setup(AioContext *ctx, Error **errp) > { > int ret; > + int flags; > > ctx->io_uring_fd_tag = NULL; > + flags = IORING_SETUP_SQE128; > > - ret = io_uring_queue_init(FDMON_IO_URING_ENTRIES, &ctx->fdmon_io_uring, > 0); > + ret = io_uring_queue_init(FDMON_IO_URING_ENTRIES, > + &ctx->fdmon_io_uring, flags); > if (ret != 0) { > error_setg_errno(errp, -ret, "Failed to initialize io_uring"); > return; > -- > 2.50.1 >
signature.asc
Description: PGP signature