On Wed, May 28, 2025 at 03:09:13PM -0400, Stefan Hajnoczi wrote: > io_uring may not be available at runtime due to system policies (e.g. > the io_uring_disabled sysctl) or creation could fail due to file > descriptor resource limits. > > Handle failure scenarios as follows: > > If another AioContext already has io_uring, then fail AioContext > creation so that the aio_add_sqe() API is available uniformly from all > QEMU threads. Otherwise fall back to epoll(7) if io_uring is > unavailable. > > Notes: > - Update the comment about selecting the fastest fdmon implementation. > At this point it's not about speed anymore, it's about aio_add_sqe() > API availability. > - Uppercase the error message when converting from error_report() to > error_setg_errno() for consistency (but there are instances of > lowercase in the codebase). > - It's easier to move the #ifdefs from aio-posix.h to aio-posix.c. > > Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com> > --- > util/aio-posix.h | 12 ++---------- > util/aio-posix.c | 29 ++++++++++++++++++++++++++--- > util/fdmon-io_uring.c | 8 ++++---- > 3 files changed, 32 insertions(+), 17 deletions(-) > > diff --git a/util/aio-posix.h b/util/aio-posix.h > index f9994ed79e..6f9d97d866 100644 > --- a/util/aio-posix.h > +++ b/util/aio-posix.h > @@ -18,6 +18,7 @@ > #define AIO_POSIX_H > > #include "block/aio.h" > +#include "qapi/error.h" > > struct AioHandler { > GPollFD pfd; > @@ -72,17 +73,8 @@ static inline void fdmon_epoll_disable(AioContext *ctx) > #endif /* !CONFIG_EPOLL_CREATE1 */ > > #ifdef CONFIG_LINUX_IO_URING > -bool fdmon_io_uring_setup(AioContext *ctx); > +void fdmon_io_uring_setup(AioContext *ctx, Error **errp);
Why change the return type to void? That forces you to have to check errp. If you still return bool (true for errp unchanged, false when errp set), callers might have a simpler interface. > void fdmon_io_uring_destroy(AioContext *ctx); > -#else > -static inline bool fdmon_io_uring_setup(AioContext *ctx) > -{ > - return false; > -} > - > -static inline void fdmon_io_uring_destroy(AioContext *ctx) > -{ > -} > #endif /* !CONFIG_LINUX_IO_URING */ > > #endif /* AIO_POSIX_H */ > diff --git a/util/aio-posix.c b/util/aio-posix.c > index fa047fc7ad..44b3df61f9 100644 > --- a/util/aio-posix.c > +++ b/util/aio-posix.c > @@ -16,6 +16,7 @@ > #include "qemu/osdep.h" > #include "block/block.h" > #include "block/thread-pool.h" > +#include "qapi/error.h" > #include "qemu/main-loop.h" > #include "qemu/lockcnt.h" > #include "qemu/rcu.h" > @@ -717,17 +718,39 @@ void aio_context_setup(AioContext *ctx, Error **errp) > ctx->epollfd = -1; > ctx->epollfd_tag = NULL; > > - /* Use the fastest fd monitoring implementation if available */ > - if (fdmon_io_uring_setup(ctx)) { > - return; > +#ifdef CONFIG_LINUX_IO_URING > + { > + static bool need_io_uring; > + Error *local_err = NULL; /* ERRP_GUARD() doesn't handle error_abort > */ Really? I thought that was part of why we added ERRP_GUARD, so that error_abort is pinned closer to the spot where the error is triggered rather than where it was chained. But your use of errp is a bit different than usual here, so you may be correct that it doesn't handle error_abort in the way that you want (allowing a graceful downgrade to epoll if it is the first failure, but aborting if it is the second AioContext that fails). > + > + /* io_uring takes precedence because it provides aio_add_sqe() > support */ > + fdmon_io_uring_setup(ctx, &local_err); > + if (!local_err) { > + /* > + * If one AioContext gets io_uring, then all AioContexts need > io_uring > + * so that aio_add_sqe() support is available across all threads. > + */ > + need_io_uring = true; > + return; > + } > + if (need_io_uring) { > + error_propagate(errp, local_err); > + return; > + } > + > + warn_report_err_once(local_err); /* frees local_err */ > + local_err = NULL; > } > +#endif /* CONFIG_LINUX_IO_URING */ > > fdmon_epoll_setup(ctx); > } > > void aio_context_destroy(AioContext *ctx) > { > +#ifdef CONFIG_LINUX_IO_URING > fdmon_io_uring_destroy(ctx); > +#endif > > qemu_lockcnt_lock(&ctx->list_lock); > fdmon_epoll_disable(ctx); > diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c > index 2092d08d24..ef1a866a03 100644 > --- a/util/fdmon-io_uring.c > +++ b/util/fdmon-io_uring.c > @@ -45,6 +45,7 @@ > > #include "qemu/osdep.h" > #include <poll.h> > +#include "qapi/error.h" > #include "qemu/rcu_queue.h" > #include "aio-posix.h" > > @@ -361,7 +362,7 @@ static const FDMonOps fdmon_io_uring_ops = { > .gsource_dispatch = fdmon_io_uring_gsource_dispatch, > }; > > -bool fdmon_io_uring_setup(AioContext *ctx) > +void fdmon_io_uring_setup(AioContext *ctx, Error **errp) > { > int ret; > > @@ -369,15 +370,14 @@ bool fdmon_io_uring_setup(AioContext *ctx) > > ret = io_uring_queue_init(FDMON_IO_URING_ENTRIES, &ctx->fdmon_io_uring, > 0); > if (ret != 0) { > - return false; > + error_setg_errno(errp, -ret, "Failed to initialize io_uring"); > + return; This is where I question whether you should still return bool. > } > > QSLIST_INIT(&ctx->submit_list); > ctx->fdmon_ops = &fdmon_io_uring_ops; > ctx->io_uring_fd_tag = g_source_add_unix_fd(&ctx->source, > ctx->fdmon_io_uring.ring_fd, G_IO_IN); > - > - return true; > } > > void fdmon_io_uring_destroy(AioContext *ctx) > -- > 2.49.0 > > -- Eric Blake, Principal Software Engineer Red Hat, Inc. Virtualization: qemu.org | libguestfs.org