This patch introduces a Linux-aio backend that is disabled by default. To use this backend effectively, the user should disable caching and select it with the appropriate -aio option. For instance:
qemu-system-x86_64 -drive foo.img,cache=off -aio linux There's no universal way to asynchronous wait with linux-aio. At some point, signals were added to signal completion. More recently, and eventfd interface was added. This patch relies on the later. We try hard to detect whether the right support is available in configure to avoid compile failures. Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> diff --git a/Makefile.target b/Makefile.target index f635d68..289887c 100644 --- a/Makefile.target +++ b/Makefile.target @@ -487,6 +487,9 @@ OBJS+=block-raw-win32.o else OBJS+=block-raw-posix.o aio-posix.o endif +ifdef CONFIG_LINUX_AIO +OBJS+=aio-linux.o +endif LIBS+=-lz ifdef CONFIG_ALSA diff --git a/aio-linux.c b/aio-linux.c new file mode 100644 index 0000000..f5c222b --- /dev/null +++ b/aio-linux.c @@ -0,0 +1,210 @@ +/* + * QEMU Linux AIO Support + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori <[EMAIL PROTECTED]> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "qemu-char.h" +#include "block.h" +#include "block_int.h" +#include "block-aio.h" +#include "sysemu.h" + +#include <sys/types.h> +#include <sys/syscall.h> +#include <linux/aio_abi.h> + +int eventfd(unsigned int initval) +{ + return syscall(SYS_eventfd, initval); +} + +int io_setup(unsigned nr_reqs, aio_context_t *ctx_id) +{ + return syscall(SYS_io_setup, nr_reqs, ctx_id); +} + +int io_destroy(aio_context_t ctx_id) +{ + return syscall(SYS_io_destroy, ctx_id); +} + +int io_getevents(aio_context_t ctx_id, long min_nr, long nr, + struct io_event *events, struct timespec *timeout) +{ + return syscall(SYS_io_getevents, ctx_id, min_nr, nr, events, timeout); +} + +int io_submit(aio_context_t ctx_id, long nr, struct iocb **iocb) +{ + return syscall(SYS_io_submit, ctx_id, nr, iocb); +} + +int io_cancel(aio_context_t ctx_id, struct iocb *iocb, struct io_event *result) +{ + return syscall(SYS_io_cancel, ctx_id, iocb, result); +} + +typedef struct LinuxAIOCB { + BlockDriverAIOCB common; + struct iocb iocb; +} LinuxAIOCB; + +static int aio_efd; +static aio_context_t aio_ctxt_id; +static int outstanding_requests; + +static BlockDriverAIOCB *la_submit(BlockDriverState *bs, + int fd, int64_t sector_num, + void *buf, int nb_sectors, int write, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + LinuxAIOCB *aiocb; + struct iocb *iocbs[1]; + int err; + + aiocb = qemu_aio_get(bs, cb, opaque); + if (!aiocb) { + printf("returning null??\n"); + return NULL; + } + + if (write) + aiocb->iocb.aio_lio_opcode = IOCB_CMD_PWRITE; + else + aiocb->iocb.aio_lio_opcode = IOCB_CMD_PREAD; + + aiocb->iocb.aio_data = (unsigned long)aiocb; + aiocb->iocb.aio_fildes = fd; + aiocb->iocb.aio_flags = IOCB_FLAG_RESFD; + aiocb->iocb.aio_resfd = aio_efd; + aiocb->iocb.aio_buf = (unsigned long)buf; + aiocb->iocb.aio_nbytes = nb_sectors * 512; + aiocb->iocb.aio_offset = sector_num * 512; + + iocbs[0] = &aiocb->iocb; + + do { + err = io_submit(aio_ctxt_id, 1, iocbs); + } while (err == -1 && errno == EINTR); + + if (err != 1) { + fprintf(stderr, "failed to submit aio request: %m\n"); + exit(1); + } + + outstanding_requests++; + + return &aiocb->common; +} + +static void la_wait(void) +{ + main_loop_wait(10); +} + +static void la_flush(void) +{ + while (outstanding_requests) + la_wait(); +} + +static void la_cancel(BlockDriverAIOCB *baiocb) +{ + LinuxAIOCB *aiocb = (void *)baiocb; + struct io_event result; + int err; + + do { + err = io_cancel(aio_ctxt_id, &aiocb->iocb, &result); + } while (err == -1 && errno == EINTR); + + /* it may have happened... we probably should check and complete */ + + outstanding_requests--; + + qemu_aio_release(aiocb); +} + +static void la_completion(void *opaque) +{ + struct io_event events[256]; + struct timespec ts = {0, 0}; + uint64_t count; + int i, ret; + + do { + ret = read(aio_efd, &count, sizeof(count)); + } while (ret == -1 && errno == EINTR); + + if (ret != 8) { + fprintf(stderr, "bad read from eventfd\n"); + exit(1); + } + + do { + ret = io_getevents(aio_ctxt_id, count, ARRAY_SIZE(events), + events, &ts); + } while (ret == -1 && errno == EINTR); + + if (ret < count) { + fprintf(stderr, "io_getevents failed\n"); + exit(1); + } + + for (i = 0; i < ret; i++) { + LinuxAIOCB *aiocb; + int res; + + aiocb = (LinuxAIOCB *)(unsigned long)events[i].data; + res = events[i].res; + + if (res > 0) + res = 0; + + aiocb->common.cb(aiocb->common.opaque, res); + qemu_aio_release(aiocb); + + outstanding_requests--; + } +} + +static void la_init(void) +{ + aio_efd = eventfd(0); + if (aio_efd == -1) { + fprintf(stderr, "failed to allocate aio fd\n"); + exit(1); + } + + if (io_setup(256, &aio_ctxt_id) == -1) { + fprintf(stderr, "failed to initialize linux aio\n"); + exit(1); + } + + qemu_set_fd_handler2(aio_efd, NULL, la_completion, NULL, NULL); +} + +static AIODriver linux_aio_drv = { + .name = "linux", + .aiocb_size = sizeof(LinuxAIOCB), + .aio_init = la_init, + .aio_wait = la_wait, + .aio_flush = la_flush, + .aio_submit = la_submit, + .aio_cancel = la_cancel, +}; + +int linux_aio_init(void) +{ + return qemu_register_aio(&linux_aio_drv); +} diff --git a/block-aio.h b/block-aio.h index 2fe8c58..6e82cb5 100644 --- a/block-aio.h +++ b/block-aio.h @@ -42,5 +42,6 @@ int qemu_set_aio_driver(const char *name); extern AIODriver *aio_drv; int posix_aio_init(void); +int linux_aio_init(void); #endif diff --git a/block.c b/block.c index 44cb747..259bf3a 100644 --- a/block.c +++ b/block.c @@ -1349,6 +1349,11 @@ void bdrv_init(void) bdrv_register(&bdrv_qcow2); bdrv_register(&bdrv_parallels); #ifndef _WIN32 +#ifndef QEMU_IMG +#ifdef CONFIG_LINUX_AIO + linux_aio_init(); +#endif +#endif posix_aio_init(); #endif } diff --git a/configure b/configure index 85cb68a..95fb88f 100755 --- a/configure +++ b/configure @@ -109,6 +109,7 @@ darwin_user="no" build_docs="no" uname_release="" curses="yes" +linux_aio="yes" # OS specific targetos=`uname -s` @@ -326,6 +327,8 @@ for opt do ;; --disable-curses) curses="no" ;; + --disable-linux-aio) linux_aio="no" + ;; *) echo "ERROR: unknown option $opt"; show_help="yes" ;; esac @@ -418,6 +421,7 @@ echo " --enable-fmod enable FMOD audio driver" echo " --enable-dsound enable DirectSound audio driver" echo " --disable-vnc-tls disable TLS encryption for VNC server" echo " --disable-curses disable curses output" +echo " --disable-linux-aio disable Linux AIO support" echo " --enable-system enable all system emulation targets" echo " --disable-system disable all system emulation targets" echo " --enable-linux-user enable all linux usermode emulation targets" @@ -687,6 +691,24 @@ EOF fi fi # test "$curses" +# linux aio probe + +if test "$linux_aio" = "yes" ; then + linux_aio=no + cat > $TMPC <<EOF +#include <linux/aio_abi.h> +#include <unistd.h> +#include <sys/syscall.h> +#ifndef SYS_eventfd +#error No eventfd support +#endif +int main(void) { struct iocb iocb; (void)iocb.aio_resfd; return 0; } +EOF + if $cc $ARCH_CFLAGS -o $TMPE $TMPC 2> /dev/null ; then + linux_aio=yes + fi +fi + # Check if tools are available to build documentation. if [ -x "`which texi2html 2>/dev/null`" ] && \ [ -x "`which pod2man 2>/dev/null`" ]; then @@ -738,6 +760,7 @@ echo "SDL support $sdl" if test "$sdl" != "no" ; then echo "SDL static link $sdl_static" fi +echo "Linux AIO support $linux_aio" echo "curses support $curses" echo "mingw32 support $mingw32" echo "Adlib support $adlib" @@ -1001,6 +1024,10 @@ if test "$curses" = "yes" ; then echo "CONFIG_CURSES=yes" >> $config_mak echo "CURSES_LIBS=-lcurses" >> $config_mak fi +if test "$linux_aio" = "yes" ; then + echo "#define CONFIG_LINUX_AIO 1" >> $config_h + echo "CONFIG_LINUX_AIO=yes" >> $config_mak +fi # XXX: suppress that if [ "$bsd" = "yes" ] ; then ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel