[Qemu-block] [PATCH v4 3/6] qemu-io: Allow unaligned access by default
There's no reason to require the user to specify a flag just so they can pass in unaligned numbers. Keep 'read -p' and 'write -p' as no-ops so that I don't have to hunt down and update all users of qemu-io, but otherwise make their behavior default as 'read' and 'write'. Also fix 'write -z', 'readv', 'writev', 'writev', 'aio_read', 'aio_write', and 'aio_write -z'. For now, 'read -b', 'multiwrite', 'write -b', and 'write -c' still require alignment. qemu-iotest 23 is updated to match, as the only test that was previously explicitly expecting an error on an unaligned request. Signed-off-by: Eric Blake--- qemu-io-cmds.c | 63 +- tests/qemu-iotests/023.out | 2160 +--- 2 files changed, 1452 insertions(+), 771 deletions(-) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index dc6b0dc..8bcf742 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -395,12 +395,6 @@ create_iovec(BlockBackend *blk, QEMUIOVector *qiov, char **argv, int nr_iov, goto fail; } -if (len & 0x1ff) { -printf("length argument %" PRId64 - " is not sector aligned\n", len); -goto fail; -} - sizes[i] = len; count += len; } @@ -634,7 +628,7 @@ static void read_help(void) " -b, -- read from the VM state rather than the virtual disk\n" " -C, -- report statistics in a machine parsable format\n" " -l, -- length for pattern verification (only with -P)\n" -" -p, -- allow unaligned access\n" +" -p, -- ignored for back-compat\n" " -P, -- use a pattern to verify read data\n" " -q, -- quiet mode, do not show I/O statistics\n" " -s, -- start offset for pattern verification (only with -P)\n" @@ -650,7 +644,7 @@ static const cmdinfo_t read_cmd = { .cfunc = read_f, .argmin = 2, .argmax = -1, -.args = "[-abCpqv] [-P pattern [-s off] [-l len]] off len", +.args = "[-abCqv] [-P pattern [-s off] [-l len]] off len", .oneline= "reads a number of bytes at a specified offset", .help = read_help, }; @@ -658,7 +652,7 @@ static const cmdinfo_t read_cmd = { static int read_f(BlockBackend *blk, int argc, char **argv) { struct timeval t1, t2; -bool Cflag = false, pflag = false, qflag = false, vflag = false; +bool Cflag = false, qflag = false, vflag = false; bool Pflag = false, sflag = false, lflag = false, bflag = false; int c, cnt; char *buf; @@ -686,7 +680,7 @@ static int read_f(BlockBackend *blk, int argc, char **argv) } break; case 'p': -pflag = true; +/* Ignored for back-compat */ break; case 'P': Pflag = true; @@ -718,11 +712,6 @@ static int read_f(BlockBackend *blk, int argc, char **argv) return qemuio_command_usage(_cmd); } -if (bflag && pflag) { -printf("-b and -p cannot be specified at the same time\n"); -return 0; -} - offset = cvtnum(argv[optind]); if (offset < 0) { print_cvtnum_err(offset, argv[optind]); @@ -753,7 +742,7 @@ static int read_f(BlockBackend *blk, int argc, char **argv) return 0; } -if (!pflag) { +if (bflag) { if (offset & 0x1ff) { printf("offset %" PRId64 " is not sector aligned\n", offset); @@ -890,12 +879,6 @@ static int readv_f(BlockBackend *blk, int argc, char **argv) } optind++; -if (offset & 0x1ff) { -printf("offset %" PRId64 " is not sector aligned\n", - offset); -return 0; -} - nr_iov = argc - optind; buf = create_iovec(blk, , [optind], nr_iov, 0xab); if (buf == NULL) { @@ -952,7 +935,7 @@ static void write_help(void) " filled with a set pattern (0xcdcdcdcd).\n" " -b, -- write to the VM state rather than the virtual disk\n" " -c, -- write compressed data with blk_write_compressed\n" -" -p, -- allow unaligned access\n" +" -p, -- ignored for back-compat\n" " -P, -- use different pattern to fill file\n" " -C, -- report statistics in a machine parsable format\n" " -q, -- quiet mode, do not show I/O statistics\n" @@ -968,7 +951,7 @@ static const cmdinfo_t write_cmd = { .cfunc = write_f, .argmin = 2, .argmax = -1, -.args = "[-bcCpqz] [-P pattern ] off len", +.args = "[-bcCqz] [-P pattern ] off len", .oneline= "writes a number of bytes at a specified offset", .help = write_help, }; @@ -976,7 +959,7 @@ static const cmdinfo_t write_cmd = { static int write_f(BlockBackend *blk, int argc, char **argv) { struct timeval t1, t2; -bool Cflag = false, pflag = false, qflag = false, bflag = false; +bool Cflag = false, qflag = false, bflag = false; bool Pflag = false, zflag = false, cflag = false; int c, cnt; char *buf = NULL; @@ -998,7 +981,7 @@ static int write_f(BlockBackend *blk, int argc,
[Qemu-block] [PATCH v4 4/6] qemu-io: Add 'write -f' to test FUA flag
Make it easier to test block drivers with BDRV_REQ_FUA in .supported_write_flags, by adding the '-f' flag to qemu-io to conditionally pass the flag through to specific writes ('write', 'write -z', 'writev', 'aio_write', 'aio_write -z'). You'll want to use 'qemu-io -t none' to actually make -f useful (as otherwise, the default writethrough mode automatically sets the FUA bit on every write). Signed-off-by: Eric Blake--- qemu-io-cmds.c | 57 + 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 8bcf742..ba811fe 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -428,13 +428,13 @@ static int do_pread(BlockBackend *blk, char *buf, int64_t offset, } static int do_pwrite(BlockBackend *blk, char *buf, int64_t offset, - int64_t count, int64_t *total) + int64_t count, int flags, int64_t *total) { if (count > INT_MAX) { return -ERANGE; } -*total = blk_pwrite(blk, offset, (uint8_t *)buf, count, 0); +*total = blk_pwrite(blk, offset, (uint8_t *)buf, count, flags); if (*total < 0) { return *total; } @@ -446,6 +446,7 @@ typedef struct { int64_t offset; int64_t count; int64_t *total; +int flags; int ret; bool done; } CoWriteZeroes; @@ -454,7 +455,8 @@ static void coroutine_fn co_write_zeroes_entry(void *opaque) { CoWriteZeroes *data = opaque; -data->ret = blk_co_write_zeroes(data->blk, data->offset, data->count, 0); +data->ret = blk_co_write_zeroes(data->blk, data->offset, data->count, +data->flags); data->done = true; if (data->ret < 0) { *data->total = data->ret; @@ -465,7 +467,7 @@ static void coroutine_fn co_write_zeroes_entry(void *opaque) } static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int64_t count, - int64_t *total) + int flags, int64_t *total) { Coroutine *co; CoWriteZeroes data = { @@ -473,6 +475,7 @@ static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int64_t count, .offset = offset, .count = count, .total = total, +.flags = flags, .done = false, }; @@ -558,11 +561,11 @@ static int do_aio_readv(BlockBackend *blk, QEMUIOVector *qiov, } static int do_aio_writev(BlockBackend *blk, QEMUIOVector *qiov, - int64_t offset, int *total) + int64_t offset, int flags, int *total) { int async_ret = NOT_DONE; -blk_aio_pwritev(blk, offset, qiov, 0, aio_rw_done, _ret); +blk_aio_pwritev(blk, offset, qiov, flags, aio_rw_done, _ret); while (async_ret == NOT_DONE) { main_loop_wait(false); } @@ -935,6 +938,7 @@ static void write_help(void) " filled with a set pattern (0xcdcdcdcd).\n" " -b, -- write to the VM state rather than the virtual disk\n" " -c, -- write compressed data with blk_write_compressed\n" +" -f, -- use Force Unit Access semantics\n" " -p, -- ignored for back-compat\n" " -P, -- use different pattern to fill file\n" " -C, -- report statistics in a machine parsable format\n" @@ -951,7 +955,7 @@ static const cmdinfo_t write_cmd = { .cfunc = write_f, .argmin = 2, .argmax = -1, -.args = "[-bcCqz] [-P pattern ] off len", +.args = "[-bcCfqz] [-P pattern ] off len", .oneline= "writes a number of bytes at a specified offset", .help = write_help, }; @@ -961,6 +965,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv) struct timeval t1, t2; bool Cflag = false, qflag = false, bflag = false; bool Pflag = false, zflag = false, cflag = false; +int flags = 0; int c, cnt; char *buf = NULL; int64_t offset; @@ -969,7 +974,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv) int64_t total = 0; int pattern = 0xcd; -while ((c = getopt(argc, argv, "bcCpP:qz")) != -1) { +while ((c = getopt(argc, argv, "bcCfpP:qz")) != -1) { switch (c) { case 'b': bflag = true; @@ -980,6 +985,9 @@ static int write_f(BlockBackend *blk, int argc, char **argv) case 'C': Cflag = true; break; +case 'f': +flags |= BDRV_REQ_FUA; +break; case 'p': /* Ignored for back-compat */ break; @@ -1010,6 +1018,11 @@ static int write_f(BlockBackend *blk, int argc, char **argv) return 0; } +if ((flags & BDRV_REQ_FUA) && (bflag + cflag)) { +printf("-f and -b or -c cannot be specified at the same time\n"); +return 0; +} + if (zflag && Pflag) { printf("-z and -P cannot be specified at the same time\n"); return 0; @@ -1054,11 +1067,11 @@ static int write_f(BlockBackend *blk, int
[Qemu-block] [PATCH v4 5/6] qemu-io: Add 'open -u' to set BDRV_O_UNMAP after the fact
When opening a file from the command line, qemu-io defaults to BDRV_O_UNMAP but allows -d to give full control to disable unmaps. But when opening via the 'open' command, qemu-io did not set BDRV_O_UNMAP, and had no way to allow it. Make it at least possible to symmetrically test things: 'qemu-io -d ignore' at the CLI now matches 'qemu-io> open' in batch mode, and 'qemu-io' or 'qemu-io -d unmap' at the CLI matches 'qemu-io> open -u'. Signed-off-by: Eric Blake--- qemu-io.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/qemu-io.c b/qemu-io.c index 4aba7e0..2196159 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -107,6 +107,7 @@ static void open_help(void) " -r, -- open file read-only\n" " -s, -- use snapshot file\n" " -n, -- disable host cache\n" +" -u, -- allow discard and zero operations to unmap\n" " -o, -- options to be given to the block driver" "\n"); } @@ -120,7 +121,7 @@ static const cmdinfo_t open_cmd = { .argmin = 1, .argmax = -1, .flags = CMD_NOFILE_OK, -.args = "[-Crsn] [-o options] [path]", +.args = "[-Crsnu] [-o options] [path]", .oneline= "open the file specified by path", .help = open_help, }; @@ -144,7 +145,7 @@ static int open_f(BlockBackend *blk, int argc, char **argv) QemuOpts *qopts; QDict *opts; -while ((c = getopt(argc, argv, "snrgo:")) != -1) { +while ((c = getopt(argc, argv, "snrguo:")) != -1) { switch (c) { case 's': flags |= BDRV_O_SNAPSHOT; @@ -156,6 +157,9 @@ static int open_f(BlockBackend *blk, int argc, char **argv) case 'r': readonly = 1; break; +case 'u': +flags |= BDRV_O_UNMAP; +break; case 'o': if (imageOpts) { printf("--image-opts and 'open -o' are mutually exclusive\n"); -- 2.5.5
[Qemu-block] [PATCH v4 0/6] qemu-io: UI enhancements
While working on NBD, I found myself cursing the qemu-io UI for not letting me test various scenarios, particularly after fixing NBD to serve at byte granularity [1]. And in the process of writing these qemu-io enhancements, I also managed to flush out several other bugs in the block layer proper, with fixes posted separately, such as loss of BDRV_REQ_FUA during write_zeroes [2] 2.7 material, depends on Kevin's block-next: git://repo.or.cz/qemu/kevin.git block-next and on my pending "block: kill sector-based blk_write/read" https://lists.gnu.org/archive/html/qemu-devel/2016-05/msg00557.html Previously posted as part of a larger NBD series [3] at v3, hence this series starts life at v4. [1] commit df7b97ff [2] https://lists.gnu.org/archive/html/qemu-devel/2016-05/msg00285.html [3] https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg03526.html Also available as a tag at this location: git fetch git://repo.or.cz/qemu/ericb.git nbd-qemu-io-v4 Changes since then: More cleanups Include readv/writev and aio_read/aio_write. Update qemu-iotests 23 to match new 'write' semantics [kwolf] 001/6:[] [--] 'qemu-io: Add missing option documentation' 002/6:[down] 'qemu-io: Use bool for command line flags' 003/6:[down] 'qemu-io: Allow unaligned access by default' 004/6:[0056] [FC] 'qemu-io: Add 'write -f' to test FUA flag' 005/6:[] [--] 'qemu-io: Add 'open -u' to set BDRV_O_UNMAP after the fact' 006/6:[0026] [FC] 'qemu-io: Add 'write -z -u' to test MAY_UNMAP flag' Eric Blake (6): qemu-io: Add missing option documentation qemu-io: Use bool for command line flags qemu-io: Allow unaligned access by default qemu-io: Add 'write -f' to test FUA flag qemu-io: Add 'open -u' to set BDRV_O_UNMAP after the fact qemu-io: Add 'write -z -u' to test MAY_UNMAP flag qemu-io-cmds.c | 222 ++--- qemu-io.c | 12 +- tests/qemu-iotests/023.out | 2160 +--- 3 files changed, 1562 insertions(+), 832 deletions(-) -- 2.5.5
[Qemu-block] [PATCH v4 6/6] qemu-io: Add 'write -z -u' to test MAY_UNMAP flag
Make it easier to control whether the BDRV_REQ_MAY_UNMAP flag can be passed through a write_zeroes command, by adding the '-u' flag to qemu-io 'write -z' and 'aio_write -z'. To be useful, the device has to be opened with 'qemu-io -d unmap' (or the just-added 'open -u' subcommand). Signed-off-by: Eric Blake--- qemu-io-cmds.c | 24 +--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index ba811fe..e71bc5c 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -943,6 +943,7 @@ static void write_help(void) " -P, -- use different pattern to fill file\n" " -C, -- report statistics in a machine parsable format\n" " -q, -- quiet mode, do not show I/O statistics\n" +" -u, -- with -z, allow unmapping\n" " -z, -- write zeroes using blk_co_write_zeroes\n" "\n"); } @@ -955,7 +956,7 @@ static const cmdinfo_t write_cmd = { .cfunc = write_f, .argmin = 2, .argmax = -1, -.args = "[-bcCfqz] [-P pattern ] off len", +.args = "[-bcCfquz] [-P pattern ] off len", .oneline= "writes a number of bytes at a specified offset", .help = write_help, }; @@ -974,7 +975,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv) int64_t total = 0; int pattern = 0xcd; -while ((c = getopt(argc, argv, "bcCfpP:qz")) != -1) { +while ((c = getopt(argc, argv, "bcCfpP:quz")) != -1) { switch (c) { case 'b': bflag = true; @@ -1001,6 +1002,9 @@ static int write_f(BlockBackend *blk, int argc, char **argv) case 'q': qflag = true; break; +case 'u': +flags |= BDRV_REQ_MAY_UNMAP; +break; case 'z': zflag = true; break; @@ -1023,6 +1027,11 @@ static int write_f(BlockBackend *blk, int argc, char **argv) return 0; } +if ((flags & BDRV_REQ_MAY_UNMAP) && !zflag) { +printf("-u requires -z to be specified\n"); +return 0; +} + if (zflag && Pflag) { printf("-z and -P cannot be specified at the same time\n"); return 0; @@ -1561,6 +1570,7 @@ static void aio_write_help(void) " -C, -- report statistics in a machine parsable format\n" " -f, -- use Force Unit Access semantics\n" " -q, -- quiet mode, do not show I/O statistics\n" +" -u, -- with -z, allow unmapping\n" " -z, -- write zeroes using blk_aio_write_zeroes\n" "\n"); } @@ -1572,7 +1582,7 @@ static const cmdinfo_t aio_write_cmd = { .cfunc = aio_write_f, .argmin = 2, .argmax = -1, -.args = "[-Cfqz] [-P pattern ] off len [len..]", +.args = "[-Cfquz] [-P pattern ] off len [len..]", .oneline= "asynchronously writes a number of bytes", .help = aio_write_help, }; @@ -1596,6 +1606,9 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv) case 'q': ctx->qflag = true; break; +case 'u': +flags |= BDRV_REQ_MAY_UNMAP; +break; case 'P': pattern = parse_pattern(optarg); if (pattern < 0) { @@ -1623,6 +1636,11 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv) return 0; } +if ((flags & BDRV_REQ_MAY_UNMAP) && !ctx->zflag) { +printf("-u requires -z to be specified\n"); +return 0; +} + if (ctx->zflag && ctx->Pflag) { printf("-z and -P cannot be specified at the same time\n"); g_free(ctx); -- 2.5.5
[Qemu-block] [PATCH v4 1/6] qemu-io: Add missing option documentation
Commit 499afa2 added --image-opts, but forgot to document it in --help. Likewise for commit 9e8f183 and -d/--discard. Finally, commit 10d9d75 removed -g/--growable, but forgot to cull it from the valid short options. Signed-off-by: Eric Blake--- qemu-io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/qemu-io.c b/qemu-io.c index 0598251..4aba7e0 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -221,6 +221,7 @@ static void usage(const char *name) "\n" " --object OBJECTDEF define an object such as 'secret' for\n" " passwords and/or encryption keys\n" +" --image-opts treat file as option string\n" " -c, --cmd STRING execute command with its arguments\n" " from the given string\n" " -f, --format FMT specifies the block driver to use\n" @@ -230,6 +231,7 @@ static void usage(const char *name) " -m, --misalign misalign allocations for O_DIRECT\n" " -k, --native-aio use kernel AIO implementation (on Linux only)\n" " -t, --cache=MODE use the given cache mode for the image\n" +" -d, --discard=MODE use the given discard mode for the image\n" " -T, --trace FILE enable trace events listed in the given file\n" " -h, --help display this help and exit\n" " -V, --versionoutput version information and exit\n" @@ -410,7 +412,7 @@ static QemuOptsList file_opts = { int main(int argc, char **argv) { int readonly = 0; -const char *sopt = "hVc:d:f:rsnmgkt:T:"; +const char *sopt = "hVc:d:f:rsnmkt:T:"; const struct option lopt[] = { { "help", no_argument, NULL, 'h' }, { "version", no_argument, NULL, 'V' }, -- 2.5.5
[Qemu-block] [PATCH v4 2/6] qemu-io: Use bool for command line flags
We require a C99 compiler; let's use it to express what we really mean. Signed-off-by: Eric Blake--- qemu-io-cmds.c | 94 +- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 0bbbc72..dc6b0dc 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -345,7 +345,7 @@ static void dump_buffer(const void *buffer, int64_t offset, int64_t len) } static void print_report(const char *op, struct timeval *t, int64_t offset, - int64_t count, int64_t total, int cnt, int Cflag) + int64_t count, int64_t total, int cnt, bool Cflag) { char s1[64], s2[64], ts[64]; @@ -658,8 +658,8 @@ static const cmdinfo_t read_cmd = { static int read_f(BlockBackend *blk, int argc, char **argv) { struct timeval t1, t2; -int Cflag = 0, pflag = 0, qflag = 0, vflag = 0; -int Pflag = 0, sflag = 0, lflag = 0, bflag = 0; +bool Cflag = false, pflag = false, qflag = false, vflag = false; +bool Pflag = false, sflag = false, lflag = false, bflag = false; int c, cnt; char *buf; int64_t offset; @@ -672,13 +672,13 @@ static int read_f(BlockBackend *blk, int argc, char **argv) while ((c = getopt(argc, argv, "bCl:pP:qs:v")) != -1) { switch (c) { case 'b': -bflag = 1; +bflag = true; break; case 'C': -Cflag = 1; +Cflag = true; break; case 'l': -lflag = 1; +lflag = true; pattern_count = cvtnum(optarg); if (pattern_count < 0) { print_cvtnum_err(pattern_count, optarg); @@ -686,20 +686,20 @@ static int read_f(BlockBackend *blk, int argc, char **argv) } break; case 'p': -pflag = 1; +pflag = true; break; case 'P': -Pflag = 1; +Pflag = true; pattern = parse_pattern(optarg); if (pattern < 0) { return 0; } break; case 'q': -qflag = 1; +qflag = true; break; case 's': -sflag = 1; +sflag = true; pattern_offset = cvtnum(optarg); if (pattern_offset < 0) { print_cvtnum_err(pattern_offset, optarg); @@ -707,7 +707,7 @@ static int read_f(BlockBackend *blk, int argc, char **argv) } break; case 'v': -vflag = 1; +vflag = true; break; default: return qemuio_command_usage(_cmd); @@ -844,7 +844,7 @@ static const cmdinfo_t readv_cmd = { static int readv_f(BlockBackend *blk, int argc, char **argv) { struct timeval t1, t2; -int Cflag = 0, qflag = 0, vflag = 0; +bool Cflag = false, qflag = false, vflag = false; int c, cnt; char *buf; int64_t offset; @@ -853,25 +853,25 @@ static int readv_f(BlockBackend *blk, int argc, char **argv) int nr_iov; QEMUIOVector qiov; int pattern = 0; -int Pflag = 0; +bool Pflag = false; while ((c = getopt(argc, argv, "CP:qv")) != -1) { switch (c) { case 'C': -Cflag = 1; +Cflag = true; break; case 'P': -Pflag = 1; +Pflag = true; pattern = parse_pattern(optarg); if (pattern < 0) { return 0; } break; case 'q': -qflag = 1; +qflag = true; break; case 'v': -vflag = 1; +vflag = true; break; default: return qemuio_command_usage(_cmd); @@ -976,8 +976,8 @@ static const cmdinfo_t write_cmd = { static int write_f(BlockBackend *blk, int argc, char **argv) { struct timeval t1, t2; -int Cflag = 0, pflag = 0, qflag = 0, bflag = 0, Pflag = 0, zflag = 0; -int cflag = 0; +bool Cflag = false, pflag = false, qflag = false, bflag = false; +bool Pflag = false, zflag = false, cflag = false; int c, cnt; char *buf = NULL; int64_t offset; @@ -989,29 +989,29 @@ static int write_f(BlockBackend *blk, int argc, char **argv) while ((c = getopt(argc, argv, "bcCpP:qz")) != -1) { switch (c) { case 'b': -bflag = 1; +bflag = true; break; case 'c': -cflag = 1; +cflag = true; break; case 'C': -Cflag = 1; +Cflag = true; break; case 'p': -pflag = 1; +pflag = true; break; case 'P': -Pflag = 1; +Pflag = true; pattern = parse_pattern(optarg); if (pattern < 0) { return 0; }
[Qemu-block] [PATCH v6 17/20] nbd: Switch to byte-based block access
Sector-based blk_read() should die; switch to byte-based blk_pread() instead. Signed-off-by: Eric Blake--- qemu-nbd.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/qemu-nbd.c b/qemu-nbd.c index c55b40f..c07ceef 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -159,12 +159,13 @@ static int find_partition(BlockBackend *blk, int partition, off_t *offset, off_t *size) { struct partition_record mbr[4]; -uint8_t data[512]; +uint8_t data[BDRV_SECTOR_SIZE]; int i; int ext_partnum = 4; int ret; -if ((ret = blk_read(blk, 0, data, 1)) < 0) { +ret = blk_pread(blk, 0, data, sizeof(data)); +if (ret < 0) { error_report("error while reading: %s", strerror(-ret)); exit(EXIT_FAILURE); } @@ -182,10 +183,12 @@ static int find_partition(BlockBackend *blk, int partition, if (mbr[i].system == 0xF || mbr[i].system == 0x5) { struct partition_record ext[4]; -uint8_t data1[512]; +uint8_t data1[BDRV_SECTOR_SIZE]; int j; -if ((ret = blk_read(blk, mbr[i].start_sector_abs, data1, 1)) < 0) { +ret = blk_pread(blk, mbr[i].start_sector_abs << BDRV_SECTOR_BITS, +data1, sizeof(data1)); +if (ret < 0) { error_report("error while reading: %s", strerror(-ret)); exit(EXIT_FAILURE); } -- 2.5.5
Re: [Qemu-block] [Qemu-devel] [PATCH 1/2] block: Invalidate all children
On Wed, 05/04 12:10, Kevin Wolf wrote: > Am 19.04.2016 um 03:42 hat Fam Zheng geschrieben: > > Currently we only recurse to bs->file, which will miss the children in > > quorum > > and VMDK. > > > > Recurse into the whole subtree to avoid that. > > > > Signed-off-by: Fam Zheng> > --- > > block.c | 20 ++-- > > 1 file changed, 14 insertions(+), 6 deletions(-) > > > > diff --git a/block.c b/block.c > > index d4939b4..fa8b38f 100644 > > --- a/block.c > > +++ b/block.c > > @@ -3201,6 +3201,7 @@ void bdrv_init_with_whitelist(void) > > > > void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) > > { > > +BdrvChild *child; > > Error *local_err = NULL; > > int ret; > > > > @@ -3215,13 +3216,20 @@ void bdrv_invalidate_cache(BlockDriverState *bs, > > Error **errp) > > > > if (bs->drv->bdrv_invalidate_cache) { > > bs->drv->bdrv_invalidate_cache(bs, _err); > > -} else if (bs->file) { > > -bdrv_invalidate_cache(bs->file->bs, _err); > > The old behaviour was that we only recurse for bs->file if the block > driver doesn't have its own implementation. > > This means that in qcow2, for example, we call bdrv_invalidate_cache() > explicitly for bs->file. If we can already invalidate it here, the call > inside qcow2 and probably other drivers could go away. Yes, will update. Thanks. Fam
Re: [Qemu-block] [PATCH 2/2] block: Inactivate all children
On Wed, 05/04 12:12, Kevin Wolf wrote: > Am 19.04.2016 um 03:42 hat Fam Zheng geschrieben: > > Currently we only inactivate the top BDS. Actually bdrv_inactivate > > should be the opposite of bdrv_invalidate_cache. > > > > Recurse into the whole subtree instead. > > > > Signed-off-by: Fam Zheng> > Did you actually test this? > > I would expect that bs->drv->bdrv_inactivate() fails now (as in > assertion failure) if it has anything to flush to the image because > bs->file has already be inactivated before. I think children need to be > inactived after their parents. OK, my test apparently failed to trigger that bdrv_pwritv() path. Good catch! > > Nodes with multiple parents could actually become even more > interesting... I'll make it two passes recursion: one for calling drv->bdrv_inactivate and the other for setting BDRV_O_INACTIVATE. Fam
[Qemu-block] [PATCH v6 13/20] pflash: Switch to byte-based block access
Sector-based blk_write() should die; switch to byte-based blk_pwrite() instead. Likewise for blk_read(). Signed-off-by: Eric Blake--- hw/block/pflash_cfi01.c | 12 ++-- hw/block/pflash_cfi02.c | 12 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index 106a775..3a1f85d 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -413,11 +413,11 @@ static void pflash_update(pflash_t *pfl, int offset, int offset_end; if (pfl->blk) { offset_end = offset + size; -/* round to sectors */ -offset = offset >> 9; -offset_end = (offset_end + 511) >> 9; -blk_write(pfl->blk, offset, pfl->storage + (offset << 9), - offset_end - offset); +/* widen to sector boundaries */ +offset = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE); +offset_end = QEMU_ALIGN_UP(offset_end, BDRV_SECTOR_SIZE); +blk_pwrite(pfl->blk, offset, pfl->storage + offset, + offset_end - offset, 0); } } @@ -739,7 +739,7 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) if (pfl->blk) { /* read the initial flash content */ -ret = blk_read(pfl->blk, 0, pfl->storage, total_len >> 9); +ret = blk_pread(pfl->blk, 0, pfl->storage, total_len); if (ret < 0) { vmstate_unregister_ram(>mem, DEVICE(pfl)); diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c index b13172c..5f10610 100644 --- a/hw/block/pflash_cfi02.c +++ b/hw/block/pflash_cfi02.c @@ -253,11 +253,11 @@ static void pflash_update(pflash_t *pfl, int offset, int offset_end; if (pfl->blk) { offset_end = offset + size; -/* round to sectors */ -offset = offset >> 9; -offset_end = (offset_end + 511) >> 9; -blk_write(pfl->blk, offset, pfl->storage + (offset << 9), - offset_end - offset); +/* widen to sector boundaries */ +offset = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE); +offset_end = QEMU_ALIGN_UP(offset_end, BDRV_SECTOR_SIZE); +blk_pwrite(pfl->blk, offset, pfl->storage + offset, + offset_end - offset, 0); } } @@ -622,7 +622,7 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp) pfl->chip_len = chip_len; if (pfl->blk) { /* read the initial flash content */ -ret = blk_read(pfl->blk, 0, pfl->storage, chip_len >> 9); +ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len); if (ret < 0) { vmstate_unregister_ram(>orig_mem, DEVICE(pfl)); error_setg(errp, "failed to read the initial flash content"); -- 2.5.5
[Qemu-block] [PATCH v6 20/20] block: Kill unused sector-based blk_* functions
Now that there are no remaining clients, we can drop the sector-based blk_read(), blk_write(), blk_aio_readv(), and blk_aio_writev(). Sadly, there are still remaining sector-based interfaces, such as blk_*discard(), or blk_write_compressed(); those will have to wait for another day. Signed-off-by: Eric Blake--- include/sysemu/block-backend.h | 10 - block/block-backend.c | 51 -- 2 files changed, 61 deletions(-) diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 8d7839c..457efd9 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -90,12 +90,8 @@ void blk_attach_dev_nofail(BlockBackend *blk, void *dev); void blk_detach_dev(BlockBackend *blk, void *dev); void *blk_get_attached_dev(BlockBackend *blk); void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); -int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf, - int nb_sectors); int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, int count); -int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf, - int nb_sectors); int blk_write_zeroes(BlockBackend *blk, int64_t offset, int count, BdrvRequestFlags flags); BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset, @@ -107,15 +103,9 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count, int64_t blk_getlength(BlockBackend *blk); void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr); int64_t blk_nb_sectors(BlockBackend *blk); -BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num, - QEMUIOVector *iov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque); BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, QEMUIOVector *iov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); -BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num, - QEMUIOVector *iov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque); BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, QEMUIOVector *iov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); diff --git a/block/block-backend.c b/block/block-backend.c index 104852d..0551be4 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -772,24 +772,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, return rwco.ret; } -static int blk_rw(BlockBackend *blk, int64_t sector_num, uint8_t *buf, - int nb_sectors, CoroutineEntry co_entry, - BdrvRequestFlags flags) -{ -if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { -return -EINVAL; -} - -return blk_prw(blk, sector_num << BDRV_SECTOR_BITS, buf, - nb_sectors << BDRV_SECTOR_BITS, co_entry, flags); -} - -int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf, - int nb_sectors) -{ -return blk_rw(blk, sector_num, buf, nb_sectors, blk_read_entry, 0); -} - int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, int count) { @@ -807,13 +789,6 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, return ret; } -int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf, - int nb_sectors) -{ -return blk_rw(blk, sector_num, (uint8_t*) buf, nb_sectors, - blk_write_entry, 0); -} - int blk_write_zeroes(BlockBackend *blk, int64_t offset, int count, BdrvRequestFlags flags) { @@ -985,19 +960,6 @@ int64_t blk_nb_sectors(BlockBackend *blk) return bdrv_nb_sectors(blk_bs(blk)); } -BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num, - QEMUIOVector *iov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ -if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { -return blk_abort_aio_request(blk, cb, opaque, -EINVAL); -} - -assert(nb_sectors << BDRV_SECTOR_BITS == iov->size); -return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov, -blk_aio_read_entry, 0, cb, opaque); -} - BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, QEMUIOVector *iov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) @@ -1006,19 +968,6 @@ BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, blk_aio_read_entry, flags, cb, opaque); } -BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num, -
[Qemu-block] [PATCH v6 19/20] qemu-io: Switch to byte-based block access
qemu-io is the last user of several sector-based interfaces. This patch upgrades to the new interfaces under the hood, then deletes the resulting dead code. Note that for maximum back-compat, while the -p option is no longer required to get blk_pread(), it is still needed to allow for unaligned access; this is because qemu-iotest 23 relies on qemu-io rejecting unaligned accesses without -p. A later patch may clean up the interface to be more user-friendly, but it's better to separate what's done under the hood from what the user sees. Signed-off-by: Eric Blake--- qemu-io-cmds.c | 62 ++ 1 file changed, 10 insertions(+), 52 deletions(-) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index a3e3982..0bbbc72 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -419,40 +419,6 @@ fail: return buf; } -static int do_read(BlockBackend *blk, char *buf, int64_t offset, int64_t count, - int64_t *total) -{ -int ret; - -if (count >> 9 > INT_MAX) { -return -ERANGE; -} - -ret = blk_read(blk, offset >> 9, (uint8_t *)buf, count >> 9); -if (ret < 0) { -return ret; -} -*total = count; -return 1; -} - -static int do_write(BlockBackend *blk, char *buf, int64_t offset, int64_t count, -int64_t *total) -{ -int ret; - -if (count >> 9 > INT_MAX) { -return -ERANGE; -} - -ret = blk_write(blk, offset >> 9, (uint8_t *)buf, count >> 9); -if (ret < 0) { -return ret; -} -*total = count; -return 1; -} - static int do_pread(BlockBackend *blk, char *buf, int64_t offset, int64_t count, int64_t *total) { @@ -588,8 +554,7 @@ static int do_aio_readv(BlockBackend *blk, QEMUIOVector *qiov, { int async_ret = NOT_DONE; -blk_aio_readv(blk, offset >> 9, qiov, qiov->size >> 9, - aio_rw_done, _ret); +blk_aio_preadv(blk, offset, qiov, 0, aio_rw_done, _ret); while (async_ret == NOT_DONE) { main_loop_wait(false); } @@ -603,8 +568,7 @@ static int do_aio_writev(BlockBackend *blk, QEMUIOVector *qiov, { int async_ret = NOT_DONE; -blk_aio_writev(blk, offset >> 9, qiov, qiov->size >> 9, - aio_rw_done, _ret); +blk_aio_pwritev(blk, offset, qiov, 0, aio_rw_done, _ret); while (async_ret == NOT_DONE) { main_loop_wait(false); } @@ -670,7 +634,7 @@ static void read_help(void) " -b, -- read from the VM state rather than the virtual disk\n" " -C, -- report statistics in a machine parsable format\n" " -l, -- length for pattern verification (only with -P)\n" -" -p, -- use blk_pread to read the file\n" +" -p, -- allow unaligned access\n" " -P, -- use a pattern to verify read data\n" " -q, -- quiet mode, do not show I/O statistics\n" " -s, -- start offset for pattern verification (only with -P)\n" @@ -805,12 +769,10 @@ static int read_f(BlockBackend *blk, int argc, char **argv) buf = qemu_io_alloc(blk, count, 0xab); gettimeofday(, NULL); -if (pflag) { -cnt = do_pread(blk, buf, offset, count, ); -} else if (bflag) { +if (bflag) { cnt = do_load_vmstate(blk, buf, offset, count, ); } else { -cnt = do_read(blk, buf, offset, count, ); +cnt = do_pread(blk, buf, offset, count, ); } gettimeofday(, NULL); @@ -990,7 +952,7 @@ static void write_help(void) " filled with a set pattern (0xcdcdcdcd).\n" " -b, -- write to the VM state rather than the virtual disk\n" " -c, -- write compressed data with blk_write_compressed\n" -" -p, -- use blk_pwrite to write the file\n" +" -p, -- allow unaligned access\n" " -P, -- use different pattern to fill file\n" " -C, -- report statistics in a machine parsable format\n" " -q, -- quiet mode, do not show I/O statistics\n" @@ -1106,16 +1068,14 @@ static int write_f(BlockBackend *blk, int argc, char **argv) } gettimeofday(, NULL); -if (pflag) { -cnt = do_pwrite(blk, buf, offset, count, ); -} else if (bflag) { +if (bflag) { cnt = do_save_vmstate(blk, buf, offset, count, ); } else if (zflag) { cnt = do_co_write_zeroes(blk, offset, count, ); } else if (cflag) { cnt = do_write_compressed(blk, buf, offset, count, ); } else { -cnt = do_write(blk, buf, offset, count, ); +cnt = do_pwrite(blk, buf, offset, count, ); } gettimeofday(, NULL); @@ -1592,8 +1552,7 @@ static int aio_read_f(BlockBackend *blk, int argc, char **argv) gettimeofday(>t1, NULL); block_acct_start(blk_get_stats(blk), >acct, ctx->qiov.size, BLOCK_ACCT_READ); -blk_aio_readv(blk, ctx->offset >> 9, >qiov, - ctx->qiov.size >> 9, aio_read_done, ctx); +blk_aio_preadv(blk, ctx->offset, >qiov, 0, aio_read_done, ctx); return 0; } @@ -1717,8 +1676,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv)
[Qemu-block] [PATCH v6 11/20] nand: Switch to byte-based block access
Sector-based blk_write() should die; switch to byte-based blk_pwrite() instead. Likewise for blk_read(). This file is doing some complex computations to map various flash page sizes (256, 512, and 2048) atop generic uses of 512-byte sector operations. Perhaps someone will want to tidy up the file for fewer gymnastics in managing addresses and offsets, and less wasteful visits of 256-byte pages, but it was out of scope for this series, where I just went with the mechanical conversion. Signed-off-by: Eric Blake--- v5: fix missing edit --- hw/block/nand.c | 36 +++- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/hw/block/nand.c b/hw/block/nand.c index 29c6596..c69e675 100644 --- a/hw/block/nand.c +++ b/hw/block/nand.c @@ -663,7 +663,8 @@ static void glue(nand_blk_write_, PAGE_SIZE)(NANDFlashState *s) sector = SECTOR(s->addr); off = (s->addr & PAGE_MASK) + s->offset; soff = SECTOR_OFFSET(s->addr); -if (blk_read(s->blk, sector, iobuf, PAGE_SECTORS) < 0) { +if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS, iobuf, + PAGE_SECTORS << BDRV_SECTOR_BITS) < 0) { printf("%s: read error in sector %" PRIu64 "\n", __func__, sector); return; } @@ -675,21 +676,24 @@ static void glue(nand_blk_write_, PAGE_SIZE)(NANDFlashState *s) MIN(OOB_SIZE, off + s->iolen - PAGE_SIZE)); } -if (blk_write(s->blk, sector, iobuf, PAGE_SECTORS) < 0) { +if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS, iobuf, + PAGE_SECTORS << BDRV_SECTOR_BITS, 0) < 0) { printf("%s: write error in sector %" PRIu64 "\n", __func__, sector); } } else { off = PAGE_START(s->addr) + (s->addr & PAGE_MASK) + s->offset; sector = off >> 9; soff = off & 0x1ff; -if (blk_read(s->blk, sector, iobuf, PAGE_SECTORS + 2) < 0) { +if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS, iobuf, + (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS) < 0) { printf("%s: read error in sector %" PRIu64 "\n", __func__, sector); return; } mem_and(iobuf + soff, s->io, s->iolen); -if (blk_write(s->blk, sector, iobuf, PAGE_SECTORS + 2) < 0) { +if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS, iobuf, + (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, 0) < 0) { printf("%s: write error in sector %" PRIu64 "\n", __func__, sector); } } @@ -716,17 +720,20 @@ static void glue(nand_blk_erase_, PAGE_SIZE)(NANDFlashState *s) i = SECTOR(addr); page = SECTOR(addr + (1 << (ADDR_SHIFT + s->erase_shift))); for (; i < page; i ++) -if (blk_write(s->blk, i, iobuf, 1) < 0) { +if (blk_pwrite(s->blk, i << BDRV_SECTOR_BITS, iobuf, + BDRV_SECTOR_SIZE, 0) < 0) { printf("%s: write error in sector %" PRIu64 "\n", __func__, i); } } else { addr = PAGE_START(addr); page = addr >> 9; -if (blk_read(s->blk, page, iobuf, 1) < 0) { +if (blk_pread(s->blk, page << BDRV_SECTOR_BITS, iobuf, + BDRV_SECTOR_SIZE) < 0) { printf("%s: read error in sector %" PRIu64 "\n", __func__, page); } memset(iobuf + (addr & 0x1ff), 0xff, (~addr & 0x1ff) + 1); -if (blk_write(s->blk, page, iobuf, 1) < 0) { +if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS, iobuf, + BDRV_SECTOR_SIZE, 0) < 0) { printf("%s: write error in sector %" PRIu64 "\n", __func__, page); } @@ -734,18 +741,20 @@ static void glue(nand_blk_erase_, PAGE_SIZE)(NANDFlashState *s) i = (addr & ~0x1ff) + 0x200; for (addr += ((PAGE_SIZE + OOB_SIZE) << s->erase_shift) - 0x200; i < addr; i += 0x200) { -if (blk_write(s->blk, i >> 9, iobuf, 1) < 0) { +if (blk_pwrite(s->blk, i, iobuf, BDRV_SECTOR_SIZE, 0) < 0) { printf("%s: write error in sector %" PRIu64 "\n", __func__, i >> 9); } } page = i >> 9; -if (blk_read(s->blk, page, iobuf, 1) < 0) { +if (blk_pread(s->blk, page << BDRV_SECTOR_BITS, iobuf, + BDRV_SECTOR_SIZE) < 0) { printf("%s: read error in sector %" PRIu64 "\n", __func__, page); } memset(iobuf, 0xff, ((addr - 1) & 0x1ff) + 1); -if (blk_write(s->blk, page, iobuf, 1) < 0) { +if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS, iobuf, + BDRV_SECTOR_SIZE, 0) < 0) { printf("%s: write error in sector %" PRIu64 "\n", __func__, page); } } @@ -760,7 +769,8 @@ static void glue(nand_blk_load_, PAGE_SIZE)(NANDFlashState *s, if (s->blk) {
[Qemu-block] [PATCH v6 18/20] qemu-img: Switch to byte-based block access
Sector-based blk_write() should die; switch to byte-based blk_pwrite() instead. Likewise for blk_read(). Signed-off-by: Eric Blake--- qemu-img.c | 28 +++- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 76430a8..491a460 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1088,7 +1088,8 @@ static int check_empty_sectors(BlockBackend *blk, int64_t sect_num, uint8_t *buffer, bool quiet) { int pnum, ret = 0; -ret = blk_read(blk, sect_num, buffer, sect_count); +ret = blk_pread(blk, sect_num << BDRV_SECTOR_BITS, buffer, +sect_count << BDRV_SECTOR_BITS); if (ret < 0) { error_report("Error while reading offset %" PRId64 " of %s: %s", sectors_to_bytes(sect_num), filename, strerror(-ret)); @@ -1301,7 +1302,8 @@ static int img_compare(int argc, char **argv) nb_sectors = MIN(pnum1, pnum2); } else if (allocated1 == allocated2) { if (allocated1) { -ret = blk_read(blk1, sector_num, buf1, nb_sectors); +ret = blk_pread(blk1, sector_num << BDRV_SECTOR_BITS, buf1, +nb_sectors << BDRV_SECTOR_BITS); if (ret < 0) { error_report("Error while reading offset %" PRId64 " of %s:" " %s", sectors_to_bytes(sector_num), filename1, @@ -1309,7 +1311,8 @@ static int img_compare(int argc, char **argv) ret = 4; goto out; } -ret = blk_read(blk2, sector_num, buf2, nb_sectors); +ret = blk_pread(blk2, sector_num << BDRV_SECTOR_BITS, buf2, +nb_sectors << BDRV_SECTOR_BITS); if (ret < 0) { error_report("Error while reading offset %" PRId64 " of %s: %s", sectors_to_bytes(sector_num), @@ -1522,7 +1525,9 @@ static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors, bs_sectors = s->src_sectors[s->src_cur]; n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset)); -ret = blk_read(blk, sector_num - s->src_cur_offset, buf, n); +ret = blk_pread(blk, +(sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS, +buf, n << BDRV_SECTOR_BITS); if (ret < 0) { return ret; } @@ -1577,7 +1582,8 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors, if (!s->min_sparse || is_allocated_sectors_min(buf, n, , s->min_sparse)) { -ret = blk_write(s->target, sector_num, buf, n); +ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS, + buf, n << BDRV_SECTOR_BITS, 0); if (ret < 0) { return ret; } @@ -3024,7 +3030,8 @@ static int img_rebase(int argc, char **argv) n = old_backing_num_sectors - sector; } -ret = blk_read(blk_old_backing, sector, buf_old, n); +ret = blk_pread(blk_old_backing, sector << BDRV_SECTOR_BITS, +buf_old, n << BDRV_SECTOR_BITS); if (ret < 0) { error_report("error while reading from old backing file"); goto out; @@ -3038,7 +3045,8 @@ static int img_rebase(int argc, char **argv) n = new_backing_num_sectors - sector; } -ret = blk_read(blk_new_backing, sector, buf_new, n); +ret = blk_pread(blk_new_backing, sector << BDRV_SECTOR_BITS, +buf_new, n << BDRV_SECTOR_BITS); if (ret < 0) { error_report("error while reading from new backing file"); goto out; @@ -3054,8 +3062,10 @@ static int img_rebase(int argc, char **argv) if (compare_sectors(buf_old + written * 512, buf_new + written * 512, n - written, )) { -ret = blk_write(blk, sector + written, -buf_old + written * 512, pnum); +ret = blk_pwrite(blk, + (sector + written) << BDRV_SECTOR_BITS, + buf_old + written * 512, + pnum << BDRV_SECTOR_BITS, 0); if (ret < 0) { error_report("Error while writing to COW image: %s", strerror(-ret)); -- 2.5.5
[Qemu-block] [PATCH v6 10/20] fdc: Switch to byte-based block access
Sector-based blk_write() should die; switch to byte-based blk_pwrite() instead. Likewise for blk_read(). Signed-off-by: Eric Blake--- hw/block/fdc.c | 25 + 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 3722275..f73af7d 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -223,6 +223,13 @@ static int fd_sector(FDrive *drv) NUM_SIDES(drv)); } +/* Returns current position, in bytes, for given drive */ +static int fd_offset(FDrive *drv) +{ +g_assert(fd_sector(drv) < INT_MAX >> BDRV_SECTOR_BITS); +return fd_sector(drv) << BDRV_SECTOR_BITS; +} + /* Seek to a new position: * returns 0 if already on right track * returns 1 if track changed @@ -1629,8 +1636,8 @@ static int fdctrl_transfer_handler (void *opaque, int nchan, if (fdctrl->data_dir != FD_DIR_WRITE || len < FD_SECTOR_LEN || rel_pos != 0) { /* READ & SCAN commands and realign to a sector for WRITE */ -if (blk_read(cur_drv->blk, fd_sector(cur_drv), - fdctrl->fifo, 1) < 0) { +if (blk_pread(cur_drv->blk, fd_offset(cur_drv), + fdctrl->fifo, BDRV_SECTOR_SIZE) < 0) { FLOPPY_DPRINTF("Floppy: error getting sector %d\n", fd_sector(cur_drv)); /* Sure, image size is too small... */ @@ -1657,8 +1664,8 @@ static int fdctrl_transfer_handler (void *opaque, int nchan, k->read_memory(fdctrl->dma, nchan, fdctrl->fifo + rel_pos, fdctrl->data_pos, len); -if (blk_write(cur_drv->blk, fd_sector(cur_drv), - fdctrl->fifo, 1) < 0) { +if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), + fdctrl->fifo, BDRV_SECTOR_SIZE, 0) < 0) { FLOPPY_DPRINTF("error writing sector %d\n", fd_sector(cur_drv)); fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM | FD_SR0_SEEK, 0x00, 0x00); @@ -1741,7 +1748,8 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl) fd_sector(cur_drv)); return 0; } -if (blk_read(cur_drv->blk, fd_sector(cur_drv), fdctrl->fifo, 1) +if (blk_pread(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, + BDRV_SECTOR_SIZE) < 0) { FLOPPY_DPRINTF("error getting sector %d\n", fd_sector(cur_drv)); @@ -1820,7 +1828,8 @@ static void fdctrl_format_sector(FDCtrl *fdctrl) } memset(fdctrl->fifo, 0, FD_SECTOR_LEN); if (cur_drv->blk == NULL || -blk_write(cur_drv->blk, fd_sector(cur_drv), fdctrl->fifo, 1) < 0) { +blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, + BDRV_SECTOR_SIZE, 0) < 0) { FLOPPY_DPRINTF("error formatting sector %d\n", fd_sector(cur_drv)); fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM | FD_SR0_SEEK, 0x00, 0x00); } else { @@ -2243,8 +2252,8 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value) if (pos == FD_SECTOR_LEN - 1 || fdctrl->data_pos == fdctrl->data_len) { cur_drv = get_cur_drv(fdctrl); -if (blk_write(cur_drv->blk, fd_sector(cur_drv), fdctrl->fifo, 1) -< 0) { +if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo, + BDRV_SECTOR_SIZE, 0) < 0) { FLOPPY_DPRINTF("error writing sector %d\n", fd_sector(cur_drv)); break; -- 2.5.5
[Qemu-block] [PATCH v6 12/20] onenand: Switch to byte-based block access
Sector-based blk_write() should die; switch to byte-based blk_pwrite() instead. Likewise for blk_read(). This particular device picks its size during onenand_initfn(), and can be at most 0x8000 bytes; therefore, shifting an 'int sec' request to get back to a byte offset should never overflow 32 bits. But adding assertions to document that point should not hurt. Signed-off-by: Eric Blake--- v5: compile-tested, add asserts that size never exceeds 0x8000, so that we can consistently use uint32_t --- hw/block/onenand.c | 41 +++-- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/hw/block/onenand.c b/hw/block/onenand.c index 883f4b1..8d84227 100644 --- a/hw/block/onenand.c +++ b/hw/block/onenand.c @@ -224,7 +224,8 @@ static void onenand_reset(OneNANDState *s, int cold) /* Lock the whole flash */ memset(s->blockwp, ONEN_LOCK_LOCKED, s->blocks); -if (s->blk_cur && blk_read(s->blk_cur, 0, s->boot[0], 8) < 0) { +if (s->blk_cur && blk_pread(s->blk_cur, 0, s->boot[0], +8 << BDRV_SECTOR_BITS) < 0) { hw_error("%s: Loading the BootRAM failed.\n", __func__); } } @@ -240,8 +241,11 @@ static void onenand_system_reset(DeviceState *dev) static inline int onenand_load_main(OneNANDState *s, int sec, int secn, void *dest) { +assert(UINT32_MAX >> BDRV_SECTOR_BITS > sec); +assert(UINT32_MAX >> BDRV_SECTOR_BITS > secn); if (s->blk_cur) { -return blk_read(s->blk_cur, sec, dest, secn) < 0; +return blk_pread(s->blk_cur, sec << BDRV_SECTOR_BITS, dest, + secn << BDRV_SECTOR_BITS) < 0; } else if (sec + secn > s->secs_cur) { return 1; } @@ -257,19 +261,22 @@ static inline int onenand_prog_main(OneNANDState *s, int sec, int secn, int result = 0; if (secn > 0) { -uint32_t size = (uint32_t)secn * 512; +uint32_t size = secn << BDRV_SECTOR_BITS; +uint32_t offset = sec << BDRV_SECTOR_BITS; +assert(UINT32_MAX >> BDRV_SECTOR_BITS > sec); +assert(UINT32_MAX >> BDRV_SECTOR_BITS > secn); const uint8_t *sp = (const uint8_t *)src; uint8_t *dp = 0; if (s->blk_cur) { dp = g_malloc(size); -if (!dp || blk_read(s->blk_cur, sec, dp, secn) < 0) { +if (!dp || blk_pread(s->blk_cur, offset, dp, size) < 0) { result = 1; } } else { if (sec + secn > s->secs_cur) { result = 1; } else { -dp = (uint8_t *)s->current + (sec << 9); +dp = (uint8_t *)s->current + offset; } } if (!result) { @@ -278,7 +285,7 @@ static inline int onenand_prog_main(OneNANDState *s, int sec, int secn, dp[i] &= sp[i]; } if (s->blk_cur) { -result = blk_write(s->blk_cur, sec, dp, secn) < 0; +result = blk_pwrite(s->blk_cur, offset, dp, size, 0) < 0; } } if (dp && s->blk_cur) { @@ -295,7 +302,8 @@ static inline int onenand_load_spare(OneNANDState *s, int sec, int secn, uint8_t buf[512]; if (s->blk_cur) { -if (blk_read(s->blk_cur, s->secs_cur + (sec >> 5), buf, 1) < 0) { +uint32_t offset = (s->secs_cur + (sec >> 5)) << BDRV_SECTOR_BITS; +if (blk_pread(s->blk_cur, offset, buf, BDRV_SECTOR_SIZE) < 0) { return 1; } memcpy(dest, buf + ((sec & 31) << 4), secn << 4); @@ -304,7 +312,7 @@ static inline int onenand_load_spare(OneNANDState *s, int sec, int secn, } else { memcpy(dest, s->current + (s->secs_cur << 9) + (sec << 4), secn << 4); } - + return 0; } @@ -315,10 +323,12 @@ static inline int onenand_prog_spare(OneNANDState *s, int sec, int secn, if (secn > 0) { const uint8_t *sp = (const uint8_t *)src; uint8_t *dp = 0, *dpp = 0; +uint32_t offset = (s->secs_cur + (sec >> 5)) << BDRV_SECTOR_BITS; +assert(UINT32_MAX >> BDRV_SECTOR_BITS > s->secs_cur + (sec >> 5)); if (s->blk_cur) { dp = g_malloc(512); if (!dp -|| blk_read(s->blk_cur, s->secs_cur + (sec >> 5), dp, 1) < 0) { +|| blk_pread(s->blk_cur, offset, dp, BDRV_SECTOR_SIZE) < 0) { result = 1; } else { dpp = dp + ((sec & 31) << 4); @@ -336,8 +346,8 @@ static inline int onenand_prog_spare(OneNANDState *s, int sec, int secn, dpp[i] &= sp[i]; } if (s->blk_cur) { -result = blk_write(s->blk_cur, s->secs_cur + (sec >> 5), - dp, 1) < 0; +result = blk_pwrite(s->blk_cur, offset, dp, +BDRV_SECTOR_SIZE, 0) < 0; } }
[Qemu-block] [PATCH v6 16/20] atapi: Switch to byte-based block access
Sector-based blk_read() should die; switch to byte-based blk_pread() instead. Add new defines ATAPI_SECTOR_BITS and ATAPI_SECTOR_SIZE to use anywhere we were previously scaling BDRV_SECTOR_* by 4, for better legibility. Signed-off-by: Eric Blake--- v4: add new defines for use in more places [jsnow] --- hw/ide/atapi.c | 19 +++ 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index 2bb606c..95056d9 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -28,6 +28,9 @@ #include "hw/scsi/scsi.h" #include "sysemu/block-backend.h" +#define ATAPI_SECTOR_BITS (2 + BDRV_SECTOR_BITS) +#define ATAPI_SECTOR_SIZE (1 << ATAPI_SECTOR_BITS) + static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret); static void padstr8(uint8_t *buf, int buf_size, const char *src) @@ -111,7 +114,7 @@ cd_read_sector_sync(IDEState *s) { int ret; block_acct_start(blk_get_stats(s->blk), >acct, - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); + ATAPI_SECTOR_SIZE, BLOCK_ACCT_READ); #ifdef DEBUG_IDE_ATAPI printf("cd_read_sector_sync: lba=%d\n", s->lba); @@ -119,12 +122,12 @@ cd_read_sector_sync(IDEState *s) switch (s->cd_sector_size) { case 2048: -ret = blk_read(s->blk, (int64_t)s->lba << 2, - s->io_buffer, 4); +ret = blk_pread(s->blk, (int64_t)s->lba << ATAPI_SECTOR_BITS, +s->io_buffer, ATAPI_SECTOR_SIZE); break; case 2352: -ret = blk_read(s->blk, (int64_t)s->lba << 2, - s->io_buffer + 16, 4); +ret = blk_pread(s->blk, (int64_t)s->lba << ATAPI_SECTOR_BITS, +s->io_buffer + 16, ATAPI_SECTOR_SIZE); if (ret >= 0) { cd_data_to_raw(s->io_buffer, s->lba); } @@ -182,7 +185,7 @@ static int cd_read_sector(IDEState *s) s->iov.iov_base = (s->cd_sector_size == 2352) ? s->io_buffer + 16 : s->io_buffer; -s->iov.iov_len = 4 * BDRV_SECTOR_SIZE; +s->iov.iov_len = ATAPI_SECTOR_SIZE; qemu_iovec_init_external(>qiov, >iov, 1); #ifdef DEBUG_IDE_ATAPI @@ -190,7 +193,7 @@ static int cd_read_sector(IDEState *s) #endif block_acct_start(blk_get_stats(s->blk), >acct, - 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); + ATAPI_SECTOR_SIZE, BLOCK_ACCT_READ); ide_buffered_readv(s, (int64_t)s->lba << 2, >qiov, 4, cd_read_sector_cb, s); @@ -435,7 +438,7 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret) #endif s->bus->dma->iov.iov_base = (void *)(s->io_buffer + data_offset); -s->bus->dma->iov.iov_len = n * 4 * 512; +s->bus->dma->iov.iov_len = n * ATAPI_SECTOR_SIZE; qemu_iovec_init_external(>bus->dma->qiov, >bus->dma->iov, 1); s->bus->dma->aiocb = ide_buffered_readv(s, (int64_t)s->lba << 2, -- 2.5.5
[Qemu-block] [PATCH v6 15/20] m25p80: Switch to byte-based block access
Sector-based blk_read() should die; switch to byte-based blk_pread() instead. Likewise for blk_aio_readv() and blk_aio_writev(). Signed-off-by: Eric Blake--- hw/block/m25p80.c | 23 +++ 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index 906b712..5d30863 100644 --- a/hw/block/m25p80.c +++ b/hw/block/m25p80.c @@ -358,25 +358,21 @@ static void blk_sync_complete(void *opaque, int ret) static void flash_sync_page(Flash *s, int page) { -int blk_sector, nb_sectors; QEMUIOVector iov; if (!s->blk || blk_is_read_only(s->blk)) { return; } -blk_sector = (page * s->pi->page_size) / BDRV_SECTOR_SIZE; -nb_sectors = DIV_ROUND_UP(s->pi->page_size, BDRV_SECTOR_SIZE); qemu_iovec_init(, 1); -qemu_iovec_add(, s->storage + blk_sector * BDRV_SECTOR_SIZE, - nb_sectors * BDRV_SECTOR_SIZE); -blk_aio_writev(s->blk, blk_sector, , nb_sectors, blk_sync_complete, - NULL); +qemu_iovec_add(, s->storage + page * s->pi->page_size, + s->pi->page_size); +blk_aio_pwritev(s->blk, page * s->pi->page_size, , 0, +blk_sync_complete, NULL); } static inline void flash_sync_area(Flash *s, int64_t off, int64_t len) { -int64_t start, end, nb_sectors; QEMUIOVector iov; if (!s->blk || blk_is_read_only(s->blk)) { @@ -384,13 +380,9 @@ static inline void flash_sync_area(Flash *s, int64_t off, int64_t len) } assert(!(len % BDRV_SECTOR_SIZE)); -start = off / BDRV_SECTOR_SIZE; -end = (off + len) / BDRV_SECTOR_SIZE; -nb_sectors = end - start; qemu_iovec_init(, 1); -qemu_iovec_add(, s->storage + (start * BDRV_SECTOR_SIZE), -nb_sectors * BDRV_SECTOR_SIZE); -blk_aio_writev(s->blk, start, , nb_sectors, blk_sync_complete, NULL); +qemu_iovec_add(, s->storage + off, len); +blk_aio_pwritev(s->blk, off, , 0, blk_sync_complete, NULL); } static void flash_erase(Flash *s, int offset, FlashCMD cmd) @@ -907,8 +899,7 @@ static int m25p80_init(SSISlave *ss) s->storage = blk_blockalign(s->blk, s->size); /* FIXME: Move to late init */ -if (blk_read(s->blk, 0, s->storage, - DIV_ROUND_UP(s->size, BDRV_SECTOR_SIZE))) { +if (blk_pread(s->blk, 0, s->storage, s->size)) { fprintf(stderr, "Failed to initialize SPI flash!\n"); return 1; } -- 2.5.5
[Qemu-block] [PATCH v6 14/20] sd: Switch to byte-based block access
Sector-based blk_write() should die; switch to byte-based blk_pwrite() instead. Likewise for blk_read(). Greatly simplifies the code, now that we let the block layer take care of alignment and read-modify-write on our behalf :) In fact, we no longer need to include 'buf' in the migration stream (although we do have to ensure that the stream remains compatible). Signed-off-by: Eric Blake--- v5: fix bug in sd_blk_write, drop sd->buf --- hw/sd/sd.c | 51 --- 1 file changed, 4 insertions(+), 47 deletions(-) diff --git a/hw/sd/sd.c b/hw/sd/sd.c index b66e5d2..87e3d23 100644 --- a/hw/sd/sd.c +++ b/hw/sd/sd.c @@ -123,7 +123,6 @@ struct SDState { qemu_irq readonly_cb; qemu_irq inserted_cb; BlockBackend *blk; -uint8_t *buf; bool enable; }; @@ -551,7 +550,7 @@ static const VMStateDescription sd_vmstate = { VMSTATE_UINT64(data_start, SDState), VMSTATE_UINT32(data_offset, SDState), VMSTATE_UINT8_ARRAY(data, SDState, 512), -VMSTATE_BUFFER_POINTER_UNSAFE(buf, SDState, 1, 512), +VMSTATE_UNUSED_V(1, 512), VMSTATE_BOOL(enable, SDState), VMSTATE_END_OF_LIST() }, @@ -1577,57 +1576,17 @@ send_response: static void sd_blk_read(SDState *sd, uint64_t addr, uint32_t len) { -uint64_t end = addr + len; - DPRINTF("sd_blk_read: addr = 0x%08llx, len = %d\n", (unsigned long long) addr, len); -if (!sd->blk || blk_read(sd->blk, addr >> 9, sd->buf, 1) < 0) { +if (!sd->blk || blk_pread(sd->blk, addr, sd->data, len) < 0) { fprintf(stderr, "sd_blk_read: read error on host side\n"); -return; } - -if (end > (addr & ~511) + 512) { -memcpy(sd->data, sd->buf + (addr & 511), 512 - (addr & 511)); - -if (blk_read(sd->blk, end >> 9, sd->buf, 1) < 0) { -fprintf(stderr, "sd_blk_read: read error on host side\n"); -return; -} -memcpy(sd->data + 512 - (addr & 511), sd->buf, end & 511); -} else -memcpy(sd->data, sd->buf + (addr & 511), len); } static void sd_blk_write(SDState *sd, uint64_t addr, uint32_t len) { -uint64_t end = addr + len; - -if ((addr & 511) || len < 512) -if (!sd->blk || blk_read(sd->blk, addr >> 9, sd->buf, 1) < 0) { -fprintf(stderr, "sd_blk_write: read error on host side\n"); -return; -} - -if (end > (addr & ~511) + 512) { -memcpy(sd->buf + (addr & 511), sd->data, 512 - (addr & 511)); -if (blk_write(sd->blk, addr >> 9, sd->buf, 1) < 0) { -fprintf(stderr, "sd_blk_write: write error on host side\n"); -return; -} - -if (blk_read(sd->blk, end >> 9, sd->buf, 1) < 0) { -fprintf(stderr, "sd_blk_write: read error on host side\n"); -return; -} -memcpy(sd->buf, sd->data + 512 - (addr & 511), end & 511); -if (blk_write(sd->blk, end >> 9, sd->buf, 1) < 0) { -fprintf(stderr, "sd_blk_write: write error on host side\n"); -} -} else { -memcpy(sd->buf + (addr & 511), sd->data, len); -if (!sd->blk || blk_write(sd->blk, addr >> 9, sd->buf, 1) < 0) { -fprintf(stderr, "sd_blk_write: write error on host side\n"); -} +if (!sd->blk || blk_pwrite(sd->blk, addr, sd->data, len, 0) < 0) { +fprintf(stderr, "sd_blk_write: write error on host side\n"); } } @@ -1925,8 +1884,6 @@ static void sd_realize(DeviceState *dev, Error **errp) return; } -sd->buf = blk_blockalign(sd->blk, 512); - if (sd->blk) { blk_set_dev_ops(sd->blk, _block_ops, sd); } -- 2.5.5
[Qemu-block] [PATCH v6 08/20] virtio: Switch to byte-based aio block access
Sector-based blk_aio_readv() and blk_aio_writev() should die; switch to byte-based blk_aio_preadv() and blk_aio_pwritev() instead. The trace is modified at the same time, and nb_sectors is now unused. Fix a comment typo while in the vicinity. Signed-off-by: Eric Blake--- hw/block/virtio-blk.c | 18 -- trace-events | 2 +- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 3f88f8c..284e646 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -322,7 +322,6 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, { QEMUIOVector *qiov = >reqs[start]->qiov; int64_t sector_num = mrb->reqs[start]->sector_num; -int nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE; bool is_write = mrb->is_write; if (num_reqs > 1) { @@ -331,7 +330,7 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, int tmp_niov = qiov->niov; /* mrb->reqs[start]->qiov was initialized from external so we can't - * modifiy it here. We need to initialize it locally and then add the + * modify it here. We need to initialize it locally and then add the * external iovecs. */ qemu_iovec_init(qiov, niov); @@ -343,23 +342,22 @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, qemu_iovec_concat(qiov, >reqs[i]->qiov, 0, mrb->reqs[i]->qiov.size); mrb->reqs[i - 1]->mr_next = mrb->reqs[i]; -nb_sectors += mrb->reqs[i]->qiov.size / BDRV_SECTOR_SIZE; } -assert(nb_sectors == qiov->size / BDRV_SECTOR_SIZE); -trace_virtio_blk_submit_multireq(mrb, start, num_reqs, sector_num, - nb_sectors, is_write); +trace_virtio_blk_submit_multireq(mrb, start, num_reqs, + sector_num << BDRV_SECTOR_BITS, + qiov->size, is_write); block_acct_merge_done(blk_get_stats(blk), is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ, num_reqs - 1); } if (is_write) { -blk_aio_writev(blk, sector_num, qiov, nb_sectors, +blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0, +virtio_blk_rw_complete, mrb->reqs[start]); +} else { +blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0, virtio_blk_rw_complete, mrb->reqs[start]); -} else { -blk_aio_readv(blk, sector_num, qiov, nb_sectors, - virtio_blk_rw_complete, mrb->reqs[start]); } } diff --git a/trace-events b/trace-events index b4acd2a..b588091 100644 --- a/trace-events +++ b/trace-events @@ -118,7 +118,7 @@ virtio_blk_req_complete(void *req, int status) "req %p status %d" virtio_blk_rw_complete(void *req, int ret) "req %p ret %d" virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu" -virtio_blk_submit_multireq(void *mrb, int start, int num_reqs, uint64_t sector, size_t nsectors, bool is_write) "mrb %p start %d num_reqs %d sector %"PRIu64" nsectors %zu is_write %d" +virtio_blk_submit_multireq(void *mrb, int start, int num_reqs, uint64_t offset, size_t size, bool is_write) "mrb %p start %d num_reqs %d offset %"PRIu64" size %zu is_write %d" # hw/block/dataplane/virtio-blk.c virtio_blk_data_plane_start(void *s) "dataplane %p" -- 2.5.5
[Qemu-block] [PATCH v6 01/20] block: Allow BDRV_REQ_FUA through blk_pwrite()
We have several block drivers that understand BDRV_REQ_FUA, and emulate it in the block layer for the rest by a full flush. But without a way to actually request BDRV_REQ_FUA during a pass-through blk_pwrite(), FUA-aware block drivers like NBD are forced to repeat the emulation logic of a full flush regardless of whether the backend they are writing to could do it more efficiently. This patch just wires up a flags argument; followup patches will actually make use of it in the NBD driver and in qemu-io. Signed-off-by: Eric BlakeAcked-by: Denis V. Lunev --- include/sysemu/block-backend.h | 3 ++- block/block-backend.c | 6 -- block/crypto.c | 2 +- block/parallels.c | 2 +- block/qcow.c | 8 block/qcow2.c | 4 ++-- block/qed.c| 6 +++--- block/sheepdog.c | 2 +- block/vdi.c| 4 ++-- block/vhdx.c | 5 +++-- block/vmdk.c | 10 +- block/vpc.c| 10 +- hw/nvram/spapr_nvram.c | 4 ++-- nbd/server.c | 2 +- qemu-io-cmds.c | 2 +- 15 files changed, 37 insertions(+), 33 deletions(-) diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index c62b6fe..6991b26 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -102,7 +102,8 @@ BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count); -int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count); +int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count, + BdrvRequestFlags flags); int64_t blk_getlength(BlockBackend *blk); void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr); int64_t blk_nb_sectors(BlockBackend *blk); diff --git a/block/block-backend.c b/block/block-backend.c index a7623e8..96c1d7c 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -953,9 +953,11 @@ int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count) return count; } -int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count) +int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count, + BdrvRequestFlags flags) { -int ret = blk_prw(blk, offset, (void*) buf, count, blk_write_entry, 0); +int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry, + flags); if (ret < 0) { return ret; } diff --git a/block/crypto.c b/block/crypto.c index 1903e84..32ba17c 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -91,7 +91,7 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block, struct BlockCryptoCreateData *data = opaque; ssize_t ret; -ret = blk_pwrite(data->blk, offset, buf, buflen); +ret = blk_pwrite(data->blk, offset, buf, buflen, 0); if (ret < 0) { error_setg_errno(errp, -ret, "Could not write encryption header"); return ret; diff --git a/block/parallels.c b/block/parallels.c index 324ed43..2d8bc87 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -512,7 +512,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) memset(tmp, 0, sizeof(tmp)); memcpy(tmp, , sizeof(header)); -ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE); +ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE, 0); if (ret < 0) { goto exit; } diff --git a/block/qcow.c b/block/qcow.c index 60ddb12..d6dc1b0 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -853,14 +853,14 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) } /* write all the data */ -ret = blk_pwrite(qcow_blk, 0, , sizeof(header)); +ret = blk_pwrite(qcow_blk, 0, , sizeof(header), 0); if (ret != sizeof(header)) { goto exit; } if (backing_file) { ret = blk_pwrite(qcow_blk, sizeof(header), -backing_file, backing_filename_len); + backing_file, backing_filename_len, 0); if (ret != backing_filename_len) { goto exit; } @@ -869,8 +869,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) tmp = g_malloc0(BDRV_SECTOR_SIZE); for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/ BDRV_SECTOR_SIZE); i++) { -ret = blk_pwrite(qcow_blk, header_size + -BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE); +ret = blk_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE * i, + tmp, BDRV_SECTOR_SIZE, 0); if (ret != BDRV_SECTOR_SIZE)
[Qemu-block] [PATCH v6 03/20] block: Switch blk_read_unthrottled() to byte interface
Sector-based blk_read() should die; convert the one-off variant blk_read_unthrottled(). Signed-off-by: Eric Blake--- include/sysemu/block-backend.h | 4 ++-- block/block-backend.c | 8 hw/block/hd-geometry.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 6991b26..662a106 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -92,8 +92,8 @@ void *blk_get_attached_dev(BlockBackend *blk); void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf, int nb_sectors); -int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf, - int nb_sectors); +int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, + int count); int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf, int nb_sectors); int blk_write_zeroes(BlockBackend *blk, int64_t sector_num, diff --git a/block/block-backend.c b/block/block-backend.c index 96c1d7c..e5a8a07 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -790,19 +790,19 @@ int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf, return blk_rw(blk, sector_num, buf, nb_sectors, blk_read_entry, 0); } -int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf, - int nb_sectors) +int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, + int count) { BlockDriverState *bs = blk_bs(blk); int ret; -ret = blk_check_request(blk, sector_num, nb_sectors); +ret = blk_check_byte_request(blk, offset, count); if (ret < 0) { return ret; } bdrv_no_throttling_begin(bs); -ret = blk_read(blk, sector_num, buf, nb_sectors); +ret = blk_pread(blk, offset, buf, count); bdrv_no_throttling_end(bs); return ret; } diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c index 6d02192..d388f13 100644 --- a/hw/block/hd-geometry.c +++ b/hw/block/hd-geometry.c @@ -66,7 +66,7 @@ static int guess_disk_lchs(BlockBackend *blk, * but also in async I/O mode. So the I/O throttling function has to * be disabled temporarily here, not permanently. */ -if (blk_read_unthrottled(blk, 0, buf, 1) < 0) { +if (blk_pread_unthrottled(blk, 0, buf, BDRV_SECTOR_SIZE) < 0) { return -1; } /* test msdos magic */ -- 2.5.5
[Qemu-block] [PATCH v6 04/20] block: Switch blk_*write_zeroes() to byte interface
Sector-based blk_write() should die; convert the one-off variant blk_write_zeroes() to use an offset/count interface instead. Likewise for blk_co_write_zeroes() and blk_aio_write_zeroes(). Signed-off-by: Eric Blake--- include/sysemu/block-backend.h | 12 ++-- block/block-backend.c | 33 +++-- block/parallels.c | 3 ++- hw/scsi/scsi-disk.c| 4 ++-- qemu-img.c | 3 ++- qemu-io-cmds.c | 6 ++ 6 files changed, 25 insertions(+), 36 deletions(-) diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 662a106..851376b 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -96,10 +96,10 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf, int count); int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf, int nb_sectors); -int blk_write_zeroes(BlockBackend *blk, int64_t sector_num, - int nb_sectors, BdrvRequestFlags flags); -BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num, - int nb_sectors, BdrvRequestFlags flags, +int blk_write_zeroes(BlockBackend *blk, int64_t offset, + int count, BdrvRequestFlags flags); +BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset, + int count, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count); int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count, @@ -179,8 +179,8 @@ int blk_get_open_flags_from_root_state(BlockBackend *blk); void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, BlockCompletionFunc *cb, void *opaque); -int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num, - int nb_sectors, BdrvRequestFlags flags); +int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset, + int count, BdrvRequestFlags flags); int blk_write_compressed(BlockBackend *blk, int64_t sector_num, const uint8_t *buf, int nb_sectors); int blk_truncate(BlockBackend *blk, int64_t offset); diff --git a/block/block-backend.c b/block/block-backend.c index e5a8a07..f8f88a6 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -814,11 +814,11 @@ int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf, blk_write_entry, 0); } -int blk_write_zeroes(BlockBackend *blk, int64_t sector_num, - int nb_sectors, BdrvRequestFlags flags) +int blk_write_zeroes(BlockBackend *blk, int64_t offset, + int count, BdrvRequestFlags flags) { -return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry, - flags | BDRV_REQ_ZERO_WRITE); +return blk_prw(blk, offset, NULL, count, blk_write_entry, + flags | BDRV_REQ_ZERO_WRITE); } static void error_callback_bh(void *opaque) @@ -930,18 +930,12 @@ static void blk_aio_write_entry(void *opaque) blk_aio_complete(acb); } -BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num, - int nb_sectors, BdrvRequestFlags flags, +BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset, + int count, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { -if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { -return blk_abort_aio_request(blk, cb, opaque, -EINVAL); -} - -return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, -nb_sectors << BDRV_SECTOR_BITS, NULL, -blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE, -cb, opaque); +return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry, +flags | BDRV_REQ_ZERO_WRITE, cb, opaque); } int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count) @@ -1444,15 +1438,10 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque); } -int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num, - int nb_sectors, BdrvRequestFlags flags) +int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset, + int count, BdrvRequestFlags flags) { -if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { -return -EINVAL; -} - -return blk_co_pwritev(blk, sector_num << BDRV_SECTOR_BITS, - nb_sectors << BDRV_SECTOR_BITS,
[Qemu-block] [PATCH v6 07/20] scsi-disk: Switch to byte-based aio block access
Sector-based blk_aio_readv() and blk_aio_writev() should die; switch to byte-based blk_aio_preadv() and blk_aio_pwritev() instead. Signed-off-by: Eric Blake--- hw/scsi/scsi-disk.c | 31 +++ 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 1335392..5d98f7b 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -343,8 +343,9 @@ static void scsi_do_read(SCSIDiskReq *r, int ret) n = scsi_init_iovec(r, SCSI_DMA_BUF_SIZE); block_acct_start(blk_get_stats(s->qdev.conf.blk), >acct, n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ); -r->req.aiocb = blk_aio_readv(s->qdev.conf.blk, r->sector, >qiov, n, - scsi_read_complete, r); +r->req.aiocb = blk_aio_preadv(s->qdev.conf.blk, + r->sector << BDRV_SECTOR_BITS, >qiov, + 0, scsi_read_complete, r); } done: @@ -504,7 +505,6 @@ static void scsi_write_data(SCSIRequest *req) { SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); -uint32_t n; /* No data transfer may already be in progress */ assert(r->req.aiocb == NULL); @@ -544,11 +544,11 @@ static void scsi_write_data(SCSIRequest *req) r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg, r->sector, scsi_dma_complete, r); } else { -n = r->qiov.size / 512; block_acct_start(blk_get_stats(s->qdev.conf.blk), >acct, - n * BDRV_SECTOR_SIZE, BLOCK_ACCT_WRITE); -r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, r->sector, >qiov, n, - scsi_write_complete, r); + r->qiov.size, BLOCK_ACCT_WRITE); +r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk, + r->sector << BDRV_SECTOR_BITS, >qiov, + 0, scsi_write_complete, r); } } @@ -1730,13 +1730,11 @@ static void scsi_write_same_complete(void *opaque, int ret) if (data->iov.iov_len) { block_acct_start(blk_get_stats(s->qdev.conf.blk), >acct, data->iov.iov_len, BLOCK_ACCT_WRITE); -/* blk_aio_write doesn't like the qiov size being different from - * nb_sectors, make sure they match. - */ qemu_iovec_init_external(>qiov, >iov, 1); -r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, data->sector, - >qiov, data->iov.iov_len / 512, - scsi_write_same_complete, data); +r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk, + data->sector << BDRV_SECTOR_BITS, + >qiov, 0, + scsi_write_same_complete, data); return; } @@ -1803,9 +1801,10 @@ static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf) scsi_req_ref(>req); block_acct_start(blk_get_stats(s->qdev.conf.blk), >acct, data->iov.iov_len, BLOCK_ACCT_WRITE); -r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, data->sector, - >qiov, data->iov.iov_len / 512, - scsi_write_same_complete, data); +r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk, + data->sector << BDRV_SECTOR_BITS, + >qiov, 0, + scsi_write_same_complete, data); } static void scsi_disk_emulate_write_data(SCSIRequest *req) -- 2.5.5
[Qemu-block] [PATCH v6 02/20] block: Drop private ioctl-only members of BlockRequest
I was thrown by the fact that the public type BlockRequest had an anonymous union, but no obvious discriminator. Turns out that the only client of the second branch of the union was code internal to io.c, and that with a slight abuse of QEMUIOVector* to pass a void* pointer, we can make the public interface less confusing. (Yes, I know that strict C doesn't guarantee that you can cast void* to the wrong type and then back to void* - it only guarantees the reverse direction of the original pointer to void* and back to the original type - but we already have other assumptions throughout the qemu code base that assume that all pointers are interchangeable in representation). Signed-off-by: Eric Blake--- include/block/block.h | 16 block/io.c| 8 +--- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/include/block/block.h b/include/block/block.h index 0e8b4d1..754aae3 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -334,18 +334,10 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb); typedef struct BlockRequest { /* Fields to be filled by multiwrite caller */ -union { -struct { -int64_t sector; -int nb_sectors; -int flags; -QEMUIOVector *qiov; -}; -struct { -int req; -void *buf; -}; -}; +int64_t sector; +int nb_sectors; +int flags; +QEMUIOVector *qiov; BlockCompletionFunc *cb; void *opaque; diff --git a/block/io.c b/block/io.c index 0db1146..f15c0f4 100644 --- a/block/io.c +++ b/block/io.c @@ -2614,7 +2614,7 @@ static void coroutine_fn bdrv_co_aio_ioctl_entry(void *opaque) { BlockAIOCBCoroutine *acb = opaque; acb->req.error = bdrv_co_do_ioctl(acb->common.bs, - acb->req.req, acb->req.buf); + acb->req.flags, acb->req.qiov); bdrv_co_complete(acb); } @@ -2628,8 +2628,10 @@ BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, acb->need_bh = true; acb->req.error = -EINPROGRESS; -acb->req.req = req; -acb->req.buf = buf; +/* Slight type abuse here, so we don't have to expose extra fields + * in the public struct BlockRequest */ +acb->req.flags = req; +acb->req.qiov = buf; co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry); qemu_coroutine_enter(co, acb); -- 2.5.5
[Qemu-block] [PATCH v6 05/20] block: Introduce byte-based aio read/write
blk_aio_readv() and blk_aio_writev() are annoying in that they can't access sub-sector granularity, and cannot pass flags. Also, they require the caller to pass redundant information about the size of the I/O. Add new blk_aio_preadv() and blk_aio_pwritev() functions to fix the flaws. The next few patches will upgrade callers, then finally delete the old interfaces. Signed-off-by: Eric Blake--- include/sysemu/block-backend.h | 6 ++ block/block-backend.c | 16 2 files changed, 22 insertions(+) diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 851376b..8d7839c 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -110,9 +110,15 @@ int64_t blk_nb_sectors(BlockBackend *blk); BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, + QEMUIOVector *iov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, +QEMUIOVector *iov, BdrvRequestFlags flags, +BlockCompletionFunc *cb, void *opaque); BlockAIOCB *blk_aio_flush(BlockBackend *blk, BlockCompletionFunc *cb, void *opaque); BlockAIOCB *blk_aio_discard(BlockBackend *blk, diff --git a/block/block-backend.c b/block/block-backend.c index f8f88a6..104852d 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -998,6 +998,14 @@ BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num, blk_aio_read_entry, 0, cb, opaque); } +BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, + QEMUIOVector *iov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque) +{ +return blk_aio_prwv(blk, offset, iov->size, iov, +blk_aio_read_entry, flags, cb, opaque); +} + BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockCompletionFunc *cb, void *opaque) @@ -1011,6 +1019,14 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num, blk_aio_write_entry, 0, cb, opaque); } +BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, +QEMUIOVector *iov, BdrvRequestFlags flags, +BlockCompletionFunc *cb, void *opaque) +{ +return blk_aio_prwv(blk, offset, iov->size, iov, +blk_aio_write_entry, flags, cb, opaque); +} + BlockAIOCB *blk_aio_flush(BlockBackend *blk, BlockCompletionFunc *cb, void *opaque) { -- 2.5.5
[Qemu-block] [PATCH v6 00/20] block: kill sector-based blk_write/read
2.7 material, depends on Kevin's block-next: git://repo.or.cz/qemu/kevin.git block-next Previously posted as part of a larger NBD series [1] and as v5 [2]. Mostly orthogonal to Kevin's recent work to also kill sector interfaces from the driver. [1] https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg03526.html [2] https://lists.gnu.org/archive/html/qemu-devel/2016-05/msg00235.html Also available as a tag at this location: git fetch git://repo.or.cz/qemu/ericb.git nbd-block-v6 Changes since then: - patch 2: new patch for something that confused me - patch 3 (was 12/14): moved earlier - patch 4 (was 13/14): moved earlier, retitled, and touches more interfaces - patches 5-9: new, to cover blk_aio_{read,write}v - patch 15: also cover blk_aio_{read,write}v - patch 19: also cover blk_aio_{read,write}v, don't break qemu-iotests 23 [kwolf] - patch 20 (was 14/14): retitle, kill more dead code I still plan to make qemu-io 'read' accept unaligned access without requiring -p, but that will now come in a later series that does other qemu-io UI improvements (besides, 20 patches is big enough to review for now). For qemu-iotests 23 to work with unaligned 'read', I also have to fix 'readv' to be unaligned; which is why this series grew to include blk_aio_*. Note that there are still a few sector-aligned blk_* interfaces left even after this series, but we are progressively getting closer to using byte interfaces throughout the code base. 001/20:[] [--] 'block: Allow BDRV_REQ_FUA through blk_pwrite()' 002/20:[down] 'block: Drop private ioctl-only members of BlockRequest' 003/20:[] [--] 'block: Switch blk_read_unthrottled() to byte interface' 004/20:[down] 'block: Switch blk_*write_zeroes() to byte interface' 005/20:[down] 'block: Introduce byte-based aio read/write' 006/20:[down] 'ide: Switch to byte-based aio block access' 007/20:[down] 'scsi-disk: Switch to byte-based aio block access' 008/20:[down] 'virtio: Switch to byte-based aio block access' 009/20:[down] 'xen_disk: Switch to byte-based aio block access' 010/20:[] [--] 'fdc: Switch to byte-based block access' 011/20:[] [--] 'nand: Switch to byte-based block access' 012/20:[] [--] 'onenand: Switch to byte-based block access' 013/20:[] [--] 'pflash: Switch to byte-based block access' 014/20:[] [--] 'sd: Switch to byte-based block access' 015/20:[0020] [FC] 'm25p80: Switch to byte-based block access' 016/20:[] [--] 'atapi: Switch to byte-based block access' 017/20:[] [--] 'nbd: Switch to byte-based block access' 018/20:[] [--] 'qemu-img: Switch to byte-based block access' 019/20:[0041] [FC] 'qemu-io: Switch to byte-based block access' 020/20:[down] 'block: Kill unused sector-based blk_* functions' Eric Blake (20): block: Allow BDRV_REQ_FUA through blk_pwrite() block: Drop private ioctl-only members of BlockRequest block: Switch blk_read_unthrottled() to byte interface block: Switch blk_*write_zeroes() to byte interface block: Introduce byte-based aio read/write ide: Switch to byte-based aio block access scsi-disk: Switch to byte-based aio block access virtio: Switch to byte-based aio block access xen_disk: Switch to byte-based aio block access fdc: Switch to byte-based block access nand: Switch to byte-based block access onenand: Switch to byte-based block access pflash: Switch to byte-based block access sd: Switch to byte-based block access m25p80: Switch to byte-based block access atapi: Switch to byte-based block access nbd: Switch to byte-based block access qemu-img: Switch to byte-based block access qemu-io: Switch to byte-based block access block: Kill unused sector-based blk_* functions hw/ide/internal.h | 2 +- include/block/block.h | 16 ++- include/sysemu/block-backend.h | 33 ++--- include/sysemu/dma.h | 4 +- block/block-backend.c | 104 - block/crypto.c | 2 +- block/io.c | 8 ++-- block/parallels.c | 5 +- block/qcow.c | 8 ++-- block/qcow2.c | 4 +- block/qed.c| 6 +-- block/sheepdog.c | 2 +- block/vdi.c| 4 +- block/vhdx.c | 5 +- block/vmdk.c | 10 ++-- block/vpc.c| 10 ++-- dma-helpers.c | 14 +++--- hw/block/fdc.c | 25 ++ hw/block/hd-geometry.c | 2 +- hw/block/m25p80.c | 23 +++-- hw/block/nand.c| 36 -- hw/block/onenand.c | 41 ++-- hw/block/pflash_cfi01.c| 12 ++--- hw/block/pflash_cfi02.c| 12 ++--- hw/block/virtio-blk.c | 18 --- hw/block/xen_disk.c| 10 ++-- hw/ide/atapi.c | 19 hw/ide/core.c | 10 ++-- hw/ide/macio.c
[Qemu-block] [PATCH v6 06/20] ide: Switch to byte-based aio block access
Sector-based blk_aio_readv() and blk_aio_writev() should die; switch to byte-based blk_aio_preadv() and blk_aio_pwritev() instead. The patch had to touch multiple files at once, because dma_blk_io() takes pointers to the functions, and ide_issue_trim() piggybacks on the same interface (while ignoring offset under the hood). Signed-off-by: Eric Blake--- hw/ide/internal.h| 2 +- include/sysemu/dma.h | 4 ++-- dma-helpers.c| 14 +++--- hw/ide/core.c| 10 +- hw/ide/macio.c | 9 +++-- 5 files changed, 18 insertions(+), 21 deletions(-) diff --git a/hw/ide/internal.h b/hw/ide/internal.h index d2c458f..ceb9e59 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -614,7 +614,7 @@ void ide_transfer_start(IDEState *s, uint8_t *buf, int size, void ide_transfer_stop(IDEState *s); void ide_set_inactive(IDEState *s, bool more); BlockAIOCB *ide_issue_trim(BlockBackend *blk, -int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, +int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h index b0fbb9b..0f7cd4d 100644 --- a/include/sysemu/dma.h +++ b/include/sysemu/dma.h @@ -197,8 +197,8 @@ void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len); void qemu_sglist_destroy(QEMUSGList *qsg); #endif -typedef BlockAIOCB *DMAIOFunc(BlockBackend *blk, int64_t sector_num, - QEMUIOVector *iov, int nb_sectors, +typedef BlockAIOCB *DMAIOFunc(BlockBackend *blk, int64_t offset, + QEMUIOVector *iov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); BlockAIOCB *dma_blk_io(BlockBackend *blk, diff --git a/dma-helpers.c b/dma-helpers.c index 4ad0bca..a6cc15f 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -73,7 +73,7 @@ typedef struct { BlockBackend *blk; BlockAIOCB *acb; QEMUSGList *sg; -uint64_t sector_num; +uint64_t offset; DMADirection dir; int sg_cur_index; dma_addr_t sg_cur_byte; @@ -130,7 +130,7 @@ static void dma_blk_cb(void *opaque, int ret) trace_dma_blk_cb(dbs, ret); dbs->acb = NULL; -dbs->sector_num += dbs->iov.size / 512; +dbs->offset += dbs->iov.size; if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { dma_complete(dbs, ret); @@ -164,8 +164,8 @@ static void dma_blk_cb(void *opaque, int ret) qemu_iovec_discard_back(>iov, dbs->iov.size & ~BDRV_SECTOR_MASK); } -dbs->acb = dbs->io_func(dbs->blk, dbs->sector_num, >iov, -dbs->iov.size / 512, dma_blk_cb, dbs); +dbs->acb = dbs->io_func(dbs->blk, dbs->offset, >iov, 0, +dma_blk_cb, dbs); assert(dbs->acb); } @@ -203,7 +203,7 @@ BlockAIOCB *dma_blk_io( dbs->acb = NULL; dbs->blk = blk; dbs->sg = sg; -dbs->sector_num = sector_num; +dbs->offset = sector_num << BDRV_SECTOR_BITS; dbs->sg_cur_index = 0; dbs->sg_cur_byte = 0; dbs->dir = dir; @@ -219,7 +219,7 @@ BlockAIOCB *dma_blk_read(BlockBackend *blk, QEMUSGList *sg, uint64_t sector, void (*cb)(void *opaque, int ret), void *opaque) { -return dma_blk_io(blk, sg, sector, blk_aio_readv, cb, opaque, +return dma_blk_io(blk, sg, sector, blk_aio_preadv, cb, opaque, DMA_DIRECTION_FROM_DEVICE); } @@ -227,7 +227,7 @@ BlockAIOCB *dma_blk_write(BlockBackend *blk, QEMUSGList *sg, uint64_t sector, void (*cb)(void *opaque, int ret), void *opaque) { -return dma_blk_io(blk, sg, sector, blk_aio_writev, cb, opaque, +return dma_blk_io(blk, sg, sector, blk_aio_pwritev, cb, opaque, DMA_DIRECTION_TO_DEVICE); } diff --git a/hw/ide/core.c b/hw/ide/core.c index 41e6a2d..fe2bfba 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -442,7 +442,7 @@ static void ide_issue_trim_cb(void *opaque, int ret) } BlockAIOCB *ide_issue_trim(BlockBackend *blk, -int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, +int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { TrimAIOCB *iocb; @@ -616,8 +616,8 @@ BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num, req->iov.iov_len = iov->size; qemu_iovec_init_external(>qiov, >iov, 1); -aioreq = blk_aio_readv(s->blk, sector_num, >qiov, nb_sectors, - ide_buffered_readv_cb, req); +aioreq = blk_aio_preadv(s->blk, sector_num << BDRV_SECTOR_BITS, +>qiov, 0, ide_buffered_readv_cb, req); QLIST_INSERT_HEAD(>buffered_requests, req, list); return aioreq; @@
Re: [Qemu-block] [PATCH v2 02/13] block: Introduce BlockBackendPublic
On 04/22/2016 11:42 AM, Kevin Wolf wrote: > Some features, like I/O throttling, are implemented outside > block-backend.c, but still want to keep information in BlockBackend, > e.g. list entries that allow keeping a list of BlockBackends. > > In order to avoid exposing the whole struct layout in the public header > file, this patch introduces an embedded public struct where such > information can be added and a pair of functions to convert between > BlockBackend and BlockBackendPublic. > > Signed-off-by: Kevin Wolf> --- > +/* This struct is embedded in (the private) BlockBackend struct and contains > + * fields that must be public. This is in particular for QLIST_ENTRY() and > + * friends so that BlockBackends can be kept in lists outside > block-backend.c */ > +typedef struct BlockBackendPublic { > +int dummy; /* empty structs are illegal */ Nit - it's not breaking any laws, so s/illegal/invalid/ sounds slightly better. But not enough to stop me from reviewing. Reviewed-by: Eric Blake -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH v2 11/13] block: Remove bdrv_move_feature_fields()
On Fri 22 Apr 2016 07:42:40 PM CEST, Kevin Wolf wrote: > bdrv_move_feature_fields() and swap_feature_fields() are empty now, they > can be removed. > > Signed-off-by: Kevin Wolf> Reviewed-by: Max Reitz Reviewed-by: Alberto Garcia Berto
Re: [Qemu-block] [PATCH v5 11/14] qemu-io: Switch to byte-based block access
On 05/04/2016 07:10 AM, Kevin Wolf wrote: > Am 03.05.2016 um 17:42 hat Eric Blake geschrieben: >> blk_write() and blk_read() are now very simple wrappers around >> blk_pwrite() and blk_pread(). There's no reason to require >> the user to pass in aligned numbers. Keep 'read -p' and >> 'write -p' so that I don't have to hunt down and update all >> users of qemu-io, but make the default 'read' and 'write' now >> do the same behavior that used to require -p. >> >> Signed-off-by: Eric Blake> > This breaks at least qemu-iotests 023. Whoops - shows how often I run qemu-iotests. Will fix. -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH v2 07/13] block: Move I/O throttling configuration functions to BlockBackend
On Fri 22 Apr 2016 07:42:36 PM CEST, Kevin Wolf wrote: > +void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg) > +{ > +throttle_group_config(blk, cfg); > +} > + > +void blk_io_limits_disable(BlockBackend *blk) > +{ > +assert(blk->public.throttle_state); > +bdrv_no_throttling_begin(blk_bs(blk)); > +throttle_group_unregister_blk(blk); > +bdrv_no_throttling_end(blk_bs(blk)); > +} > + > +/* should be called before bdrv_set_io_limits if a limit is set */ > +void blk_io_limits_enable(BlockBackend *blk, const char *group) s/bdrv_set_io_limits/blk_set_io_limits/ in the comment above. Otherwise the patch looks good to me. Reviewed-by: Alberto GarciaBerto
Re: [Qemu-block] [Qemu-devel] [PATCH v3 08/27] osdep: Add qemu_lock_fd and qemu_unlock_fd
On Thu, Apr 28, 2016 at 08:57:27PM +0800, Fam Zheng wrote: > They are wrappers of POSIX fcntl file locking, with the additional > interception of open/close (through qemu_open and qemu_close) to offer a > better semantics that preserves the locks across multiple life cycles of > different fds on the same file. The reason to make this semantics > change over the fcntl locks is to make the API cleaner for QEMU > subsystems. > > More precisely, we remove this "feature" of fcntl lock: > > If a process closes any file descriptor referring to a file, then > all of the process's locks on that file are released, regardless of > the file descriptor(s) on which the locks were obtained. > > as long as the fd is always open/closed through qemu_open and > qemu_close. > +ret = qemu_lock_do(fd, start, len, readonly ? F_RDLCK : F_WRLCK); > +if (ret == -1) { > +return -errno; > +} It still appears this patch would break libguestfs's valid use case. How about addressing what Dan wrote here: https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg02927.html ? Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-p2v converts physical machines to virtual machines. Boot with a live CD or over the network (PXE) and turn machines into KVM guests. http://libguestfs.org/virt-v2v
Re: [Qemu-block] [PATCH v2 05/13] block: Move throttling fields from BDS to BB
On Fri 22 Apr 2016 07:42:34 PM CEST, Kevin Wolf wrote: > This patch changes where the throttling state is stored (used to be the > BlockDriverState, now it is the BlockBackend), but it doesn't actually > make it a BB level feature yet. For example, throttling is still > disabled when the BDS is detached from the BB. > > Signed-off-by: Kevin WolfReviewed-by: Alberto Garcia Berto
Re: [Qemu-block] [PATCH v5 11/14] qemu-io: Switch to byte-based block access
Am 03.05.2016 um 17:42 hat Eric Blake geschrieben: > blk_write() and blk_read() are now very simple wrappers around > blk_pwrite() and blk_pread(). There's no reason to require > the user to pass in aligned numbers. Keep 'read -p' and > 'write -p' so that I don't have to hunt down and update all > users of qemu-io, but make the default 'read' and 'write' now > do the same behavior that used to require -p. > > Signed-off-by: Eric BlakeThis breaks at least qemu-iotests 023. Kevin
Re: [Qemu-block] [PATCH v2 06/13] block: Move actual I/O throttling to BlockBackend
On Fri 22 Apr 2016 07:42:35 PM CEST, Kevin Wolf wrote: > Signed-off-by: Kevin WolfReviewed-by: Alberto Garcia Berto
Re: [Qemu-block] [PATCH v2 04/13] block: Convert throttle_group_get_name() to BlockBackend
On Fri 22 Apr 2016 07:42:33 PM CEST, Kevin Wolf wrote: > Signed-off-by: Kevin WolfReviewed-by: Alberto Garcia Berto
Re: [Qemu-block] [PATCH v2 03/13] block: throttle-groups: Use BlockBackend pointers internally
On Fri 22 Apr 2016 07:42:32 PM CEST, Kevin Wolf wrote: > As a first step towards moving I/O throttling to the BlockBackend level, > this patch changes all pointers in struct ThrottleGroup from referencing > a BlockDriverState to referencing a BlockBackend. > > This change is valid because we made sure that throttling can only be > enabled on BDSes which have a BB attached. > > Signed-off-by: Kevin WolfReviewed-by: Alberto Garcia Berto
Re: [Qemu-block] [PATCH v2 01/13] block: Make sure throttled BDSes always have a BB
On Fri 22 Apr 2016 07:42:30 PM CEST, Kevin Wolf wrote: > It was already true in principle that a throttled BDS always has a BB > attached, except that the order of operations while attaching or > detaching a BDS to/from a BB wasn't careful enough. > > This commit breaks graph manipulations while I/O throttling is enabled. > It would have been possible to keep things working with some temporary > hacks, but quite cumbersome, so it's not worth the hassle. We'll fix > things again in a minute. > > Signed-off-by: Kevin Wolf> Reviewed-by: Eric Blake Reviewed-by: Alberto Garcia Berto
Re: [Qemu-block] [PATCH v2 02/13] block: Introduce BlockBackendPublic
On Fri 22 Apr 2016 07:42:31 PM CEST, Kevin Wolf wrote: > Some features, like I/O throttling, are implemented outside > block-backend.c, but still want to keep information in BlockBackend, > e.g. list entries that allow keeping a list of BlockBackends. > > In order to avoid exposing the whole struct layout in the public header > file, this patch introduces an embedded public struct where such > information can be added and a pair of functions to convert between > BlockBackend and BlockBackendPublic. > > Signed-off-by: Kevin WolfReviewed-by: Alberto Garcia Berto
Re: [Qemu-block] [Nbd] question on ioctl NBD_SET_FLAGS
On 20 Apr 2016, at 17:18, Wouter Verhelstwrote: > Hi Eric, > > On Wed, Apr 20, 2016 at 09:42:02AM -0600, Eric Blake wrote: > [...] >> But in 3.9, the overlap bug was still present, and the set of global >> flags had grown to include NBD_FLAG_NO_ZEROS (commit 038e066), which >> overlaps with NBD_FLAG_READ_ONLY. Ouch. This means that a client >> talking to a server that advertises NO_ZEROES means that the client will >> mistakenly tell the kernel to treat EVERY export as read-only, even if >> the client doesn't respond with NBD_FLAG_C_NO_ZEROES. >> >> 3.10 fixed things; negotiate() now uses uint16_t *flags (instead of u32 >> *), and no longer tries to merge global flags with transmission flags; >> only the transmission flags are ever passed to the kernel via >> NBD_SET_FLAGS. Maybe it's good that there was only 2 weeks between 3.9 >> and 3.10, so hopefully few distros are trying to ship that broken version. > > Well, yeah, since 3.10 was an "oops" release when 3.9 exposed that bug > (which indeed had existed for a while) and which was reported quite > quickly on the list. Released versions of nbd which have the bug exist > though, and trying to have a 3.8 (or below) client talk to a 3.9 (or > above) server has the same issue. > > I decided that there was no way in which I could fix it, and that "the > export is readonly" is bad but not a "critical data loss" kind of bug, > so releasing 3.10 was pretty much the only sane thing I could do (other > than delaying NO_ZEROES, which might have worked too). For what it's worth, Ubuntu 14.04 (still under long term support) ships with nbd-client 3.7 and I've just had a bug report filed against gonbdserver where all exports go readonly. https://github.com/abligh/gonbdserver/issues/4 I've added an option to disable NBD_FLAG_NO_ZEROS on nbd-server but that's pretty horrible. And I think this will affect all pre-3.10 clients talking to 3.9 and later servers. I'm going to find a minimal patch to nbd-client and offer that to Ubuntu / Debian. This message is here in part so I have something to point them at on the mailing list :-) -- Alex Bligh
Re: [Qemu-block] [PATCH 1/2] block: Invalidate all children
Am 19.04.2016 um 03:42 hat Fam Zheng geschrieben: > Currently we only recurse to bs->file, which will miss the children in quorum > and VMDK. > > Recurse into the whole subtree to avoid that. > > Signed-off-by: Fam Zheng> --- > block.c | 20 ++-- > 1 file changed, 14 insertions(+), 6 deletions(-) > > diff --git a/block.c b/block.c > index d4939b4..fa8b38f 100644 > --- a/block.c > +++ b/block.c > @@ -3201,6 +3201,7 @@ void bdrv_init_with_whitelist(void) > > void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) > { > +BdrvChild *child; > Error *local_err = NULL; > int ret; > > @@ -3215,13 +3216,20 @@ void bdrv_invalidate_cache(BlockDriverState *bs, > Error **errp) > > if (bs->drv->bdrv_invalidate_cache) { > bs->drv->bdrv_invalidate_cache(bs, _err); > -} else if (bs->file) { > -bdrv_invalidate_cache(bs->file->bs, _err); The old behaviour was that we only recurse for bs->file if the block driver doesn't have its own implementation. This means that in qcow2, for example, we call bdrv_invalidate_cache() explicitly for bs->file. If we can already invalidate it here, the call inside qcow2 and probably other drivers could go away. Kevin
Re: [Qemu-block] [PATCH v4] block: always compile-check debug prints
Am 29.04.2016 um 07:12 hat Zhou Jie geschrieben: > Files with conditional debug statements should ensure that the printf is > always compiled. This prevents bitrot of the format string of the debug > statement. And switch debug output to stderr. > > Signed-off-by: Zhou Jie> Reviewed-by: Eric Blake Thanks, applied to block-next. Kevin
[Qemu-block] [PATCH 12/14] backup: Use BlockBackend for I/O
This changes the backup block job to use the job's BlockBackend for performing its I/O. job->bs isn't used by the backup code any more afterwards. Signed-off-by: Kevin Wolf--- block/backup.c | 42 ++ block/block-backend.c | 14 ++ block/io.c | 9 - blockdev.c | 4 +--- include/block/block.h | 2 -- include/sysemu/block-backend.h | 2 ++ trace-events | 2 +- 7 files changed, 40 insertions(+), 35 deletions(-) diff --git a/block/backup.c b/block/backup.c index 670ba50..ba9eb42 100644 --- a/block/backup.c +++ b/block/backup.c @@ -36,7 +36,7 @@ typedef struct CowRequest { typedef struct BackupBlockJob { BlockJob common; -BlockDriverState *target; +BlockBackend *target; /* bitmap for sync=incremental */ BdrvDirtyBitmap *sync_bitmap; MirrorSyncMode sync_mode; @@ -99,7 +99,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job, bool *error_is_read, bool is_write_notifier) { -BlockDriverState *bs = job->common.bs; +BlockBackend *blk = job->common.blk; CowRequest cow_request; struct iovec iov; QEMUIOVector bounce_qiov; @@ -132,19 +132,18 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job, start * sectors_per_cluster); if (!bounce_buffer) { -bounce_buffer = qemu_blockalign(bs, job->cluster_size); +bounce_buffer = blk_blockalign(blk, job->cluster_size); } iov.iov_base = bounce_buffer; iov.iov_len = n * BDRV_SECTOR_SIZE; qemu_iovec_init_external(_qiov, , 1); if (is_write_notifier) { -ret = bdrv_co_readv_no_serialising(bs, - start * sectors_per_cluster, - n, _qiov); +ret = blk_co_readv_no_serialising(blk, start * sectors_per_cluster, + n, _qiov); } else { -ret = bdrv_co_readv(bs, start * sectors_per_cluster, n, -_qiov); +ret = blk_co_readv(blk, start * sectors_per_cluster, n, + _qiov); } if (ret < 0) { trace_backup_do_cow_read_fail(job, start, ret); @@ -155,13 +154,13 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job, } if (buffer_is_zero(iov.iov_base, iov.iov_len)) { -ret = bdrv_co_write_zeroes(job->target, +ret = blk_co_write_zeroes(job->target, start * sectors_per_cluster, n, BDRV_REQ_MAY_UNMAP); } else { -ret = bdrv_co_writev(job->target, - start * sectors_per_cluster, n, - _qiov); +ret = blk_co_writev(job->target, +start * sectors_per_cluster, n, +_qiov); } if (ret < 0) { trace_backup_do_cow_write_fail(job, start, ret); @@ -203,7 +202,7 @@ static int coroutine_fn backup_before_write_notify( int64_t sector_num = req->offset >> BDRV_SECTOR_BITS; int nb_sectors = req->bytes >> BDRV_SECTOR_BITS; -assert(req->bs == job->common.bs); +assert(req->bs == blk_bs(job->common.blk)); assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0); @@ -224,7 +223,7 @@ static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp) static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) { BdrvDirtyBitmap *bm; -BlockDriverState *bs = job->common.bs; +BlockDriverState *bs = blk_bs(job->common.blk); if (ret < 0 || block_job_is_cancelled(>common)) { /* Merge the successor back into the parent, delete nothing. */ @@ -282,7 +281,7 @@ static void backup_complete(BlockJob *job, void *opaque) BackupBlockJob *s = container_of(job, BackupBlockJob, common); BackupCompleteData *data = opaque; -bdrv_unref(s->target); +blk_unref(s->target); block_job_completed(job, data->ret); g_free(data); @@ -378,8 +377,8 @@ static void coroutine_fn backup_run(void *opaque) { BackupBlockJob *job = opaque; BackupCompleteData *data; -BlockDriverState *bs = job->common.bs; -BlockDriverState *target = job->target; +BlockDriverState *bs = blk_bs(job->common.blk); +BlockBackend *target = job->target; int64_t start, end; int64_t sectors_per_cluster = cluster_size_sectors(job); int ret = 0; @@ -468,7 +467,7 @@ static void coroutine_fn backup_run(void *opaque) qemu_co_rwlock_unlock(>flush_rwlock); g_free(job->done_bitmap); -
[Qemu-block] [PATCH 14/14] blockjob: Remove BlockJob.bs
There is a single remaining user in qemu-img, which can be trivially converted to using BlockJob.blk instead. Signed-off-by: Kevin Wolf--- blockjob.c | 1 - include/block/blockjob.h | 1 - qemu-img.c | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/blockjob.c b/blockjob.c index 01b58c7..c0d8ee9 100644 --- a/blockjob.c +++ b/blockjob.c @@ -83,7 +83,6 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, job->driver= driver; job->id= g_strdup(bdrv_get_device_name(bs)); -job->bs= bs; job->blk = blk; job->cb= cb; job->opaque= opaque; diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 32012af..86d2807 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -82,7 +82,6 @@ struct BlockJob { const BlockJobDriver *driver; /** The block device on which the job is operating. */ -BlockDriverState *bs; /* TODO Remove */ BlockBackend *blk; /** diff --git a/qemu-img.c b/qemu-img.c index 46f2a6d..77148c2 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -775,7 +775,7 @@ static void common_block_job_cb(void *opaque, int ret) static void run_block_job(BlockJob *job, Error **errp) { -AioContext *aio_context = bdrv_get_aio_context(job->bs); +AioContext *aio_context = blk_get_aio_context(job->blk); do { aio_poll(aio_context, true); -- 1.8.3.1
[Qemu-block] [PATCH 09/14] backup: Pack Notifier within BackupBlockJob
From: John SnowInstead of relying on peeking at bs->job, we want to explicitly get a reference to the job that was involved in this notifier callback. Pack the Notifier inside of the BackupBlockJob so we can use container_of to get a reference back to the BackupBlockJob object. This cuts out one more case where we rely unnecessarily on bs->job. Suggested-by: Paolo Bonzini Signed-off-by: John Snow Signed-off-by: Kevin Wolf --- block/backup.c | 19 ++- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/block/backup.c b/block/backup.c index a990cf1..a57288f 100644 --- a/block/backup.c +++ b/block/backup.c @@ -47,6 +47,7 @@ typedef struct BackupBlockJob { uint64_t sectors_read; unsigned long *done_bitmap; int64_t cluster_size; +NotifierWithReturn before_write; QLIST_HEAD(, CowRequest) inflight_reqs; } BackupBlockJob; @@ -94,11 +95,11 @@ static void cow_request_end(CowRequest *req) } static int coroutine_fn backup_do_cow(BlockDriverState *bs, + BackupBlockJob *job, int64_t sector_num, int nb_sectors, bool *error_is_read, bool is_write_notifier) { -BackupBlockJob *job = (BackupBlockJob *)bs->job; CowRequest cow_request; struct iovec iov; QEMUIOVector bounce_qiov; @@ -197,6 +198,7 @@ static int coroutine_fn backup_before_write_notify( NotifierWithReturn *notifier, void *opaque) { +BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write); BdrvTrackedRequest *req = opaque; int64_t sector_num = req->offset >> BDRV_SECTOR_BITS; int nb_sectors = req->bytes >> BDRV_SECTOR_BITS; @@ -204,7 +206,8 @@ static int coroutine_fn backup_before_write_notify( assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0); -return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true); +return backup_do_cow(req->bs, job, sector_num, + nb_sectors, NULL, true); } static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp) @@ -343,7 +346,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) if (yield_and_check(job)) { return ret; } -ret = backup_do_cow(bs, cluster * sectors_per_cluster, +ret = backup_do_cow(bs, job, cluster * sectors_per_cluster, sectors_per_cluster, _is_read, false); if ((ret < 0) && @@ -378,9 +381,6 @@ static void coroutine_fn backup_run(void *opaque) BackupCompleteData *data; BlockDriverState *bs = job->common.bs; BlockDriverState *target = job->target; -NotifierWithReturn before_write = { -.notify = backup_before_write_notify, -}; int64_t start, end; int64_t sectors_per_cluster = cluster_size_sectors(job); int ret = 0; @@ -393,7 +393,8 @@ static void coroutine_fn backup_run(void *opaque) job->done_bitmap = bitmap_new(end); -bdrv_add_before_write_notifier(bs, _write); +job->before_write.notify = backup_before_write_notify; +bdrv_add_before_write_notifier(bs, >before_write); if (job->sync_mode == MIRROR_SYNC_MODE_NONE) { while (!block_job_is_cancelled(>common)) { @@ -445,7 +446,7 @@ static void coroutine_fn backup_run(void *opaque) } } /* FULL sync mode we copy the whole drive. */ -ret = backup_do_cow(bs, start * sectors_per_cluster, +ret = backup_do_cow(bs, job, start * sectors_per_cluster, sectors_per_cluster, _is_read, false); if (ret < 0) { /* Depending on error action, fail now or retry cluster */ @@ -461,7 +462,7 @@ static void coroutine_fn backup_run(void *opaque) } } -notifier_with_return_remove(_write); +notifier_with_return_remove(>before_write); /* wait until pending backup_do_cow() calls have completed */ qemu_co_rwlock_wrlock(>flush_rwlock); -- 1.8.3.1
[Qemu-block] [PATCH 08/14] backup: Don't leak BackupBlockJob in error path
Signed-off-by: Kevin Wolf--- block/backup.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/backup.c b/block/backup.c index fec45e8..a990cf1 100644 --- a/block/backup.c +++ b/block/backup.c @@ -485,6 +485,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, { int64_t len; BlockDriverInfo bdi; +BackupBlockJob *job = NULL; int ret; assert(bs); @@ -542,8 +543,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, goto error; } -BackupBlockJob *job = block_job_create(_job_driver, bs, speed, - cb, opaque, errp); +job = block_job_create(_job_driver, bs, speed, cb, opaque, errp); if (!job) { goto error; } @@ -584,4 +584,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, if (sync_bitmap) { bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL); } +if (job) { +block_job_unref(>common); +} } -- 1.8.3.1
[Qemu-block] [PATCH 13/14] commit: Use BlockBackend for I/O
This changes the commit block job to use the job's BlockBackend for performing its I/O. job->bs isn't used by the commit code any more afterwards. Signed-off-by: Kevin Wolf--- block/commit.c | 42 +++--- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/block/commit.c b/block/commit.c index f308c8c..863e059 100644 --- a/block/commit.c +++ b/block/commit.c @@ -36,28 +36,34 @@ typedef struct CommitBlockJob { BlockJob common; RateLimit limit; BlockDriverState *active; -BlockDriverState *top; -BlockDriverState *base; +BlockBackend *top; +BlockBackend *base; BlockdevOnError on_error; int base_flags; int orig_overlay_flags; char *backing_file_str; } CommitBlockJob; -static int coroutine_fn commit_populate(BlockDriverState *bs, -BlockDriverState *base, +static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, int64_t sector_num, int nb_sectors, void *buf) { int ret = 0; +QEMUIOVector qiov; +struct iovec iov = { +.iov_base = (void *)buf, +.iov_len = nb_sectors * BDRV_SECTOR_SIZE, +}; -ret = bdrv_read(bs, sector_num, buf, nb_sectors); -if (ret) { +qemu_iovec_init_external(, , 1); + +ret = blk_co_readv(bs, sector_num, nb_sectors, ); +if (ret < 0) { return ret; } -ret = bdrv_write(base, sector_num, buf, nb_sectors); -if (ret) { +ret = blk_co_writev(base, sector_num, nb_sectors, ); +if (ret < 0) { return ret; } @@ -73,8 +79,8 @@ static void commit_complete(BlockJob *job, void *opaque) CommitBlockJob *s = container_of(job, CommitBlockJob, common); CommitCompleteData *data = opaque; BlockDriverState *active = s->active; -BlockDriverState *top = s->top; -BlockDriverState *base = s->base; +BlockDriverState *top = blk_bs(s->top); +BlockDriverState *base = blk_bs(s->base); BlockDriverState *overlay_bs; int ret = data->ret; @@ -94,6 +100,8 @@ static void commit_complete(BlockJob *job, void *opaque) bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); } g_free(s->backing_file_str); +blk_unref(s->top); +blk_unref(s->base); block_job_completed(>common, ret); g_free(data); } @@ -102,8 +110,8 @@ static void coroutine_fn commit_run(void *opaque) { CommitBlockJob *s = opaque; CommitCompleteData *data; -BlockDriverState *top = s->top; -BlockDriverState *base = s->base; +BlockDriverState *top = blk_bs(s->top); +BlockDriverState *base = blk_bs(s->base); int64_t sector_num, end; int ret = 0; int n = 0; @@ -158,7 +166,7 @@ wait: goto wait; } } -ret = commit_populate(top, base, sector_num, n, buf); +ret = commit_populate(s->top, s->base, sector_num, n, buf); bytes_written += n * BDRV_SECTOR_SIZE; } if (ret < 0) { @@ -253,8 +261,12 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, return; } -s->base = base; -s->top= top; +s->base = blk_new(_abort); +blk_insert_bs(s->base, base); + +s->top = blk_new(_abort); +blk_insert_bs(s->top, top); + s->active = bs; s->base_flags = orig_base_flags; -- 1.8.3.1
[Qemu-block] [PATCH 02/14] block: Cancel jobs first in bdrv_close_all()
So far, bdrv_close_all() first removed all root BlockDriverStates of BlockBackends and monitor owned BDSes, and then assumed that the remaining BDSes must be related to jobs and cancelled these jobs. This order doesn't work that well any more when block jobs use BlockBackends internally because then they will lose their BDS before being cancelled. This patch changes bdrv_close_all() to first cancel all jobs and then remove all root BDSes from the remaining BBs. Signed-off-by: Kevin Wolf--- block.c | 23 +-- blockjob.c | 13 + include/block/blockjob.h | 7 +++ 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/block.c b/block.c index 37aa0a3..bf6c204 100644 --- a/block.c +++ b/block.c @@ -2165,8 +2165,7 @@ static void bdrv_close(BlockDriverState *bs) void bdrv_close_all(void) { -BlockDriverState *bs; -AioContext *aio_context; +block_job_cancel_sync_all(); /* Drop references from requests still in flight, such as canceled block * jobs whose AIO context has not been polled yet */ @@ -2174,26 +2173,6 @@ void bdrv_close_all(void) blk_remove_all_bs(); blockdev_close_all_bdrv_states(); - -/* Cancel all block jobs */ -while (!QTAILQ_EMPTY(_bdrv_states)) { -QTAILQ_FOREACH(bs, _bdrv_states, bs_list) { -aio_context = bdrv_get_aio_context(bs); - -aio_context_acquire(aio_context); -if (bs->job) { -block_job_cancel_sync(bs->job); -aio_context_release(aio_context); -break; -} -aio_context_release(aio_context); -} - -/* All the remaining BlockDriverStates are referenced directly or - * indirectly from block jobs, so there needs to be at least one BDS - * directly used by a block job */ -assert(bs); -} } static void change_parent_backing_link(BlockDriverState *from, diff --git a/blockjob.c b/blockjob.c index 0f1bc77..e916b41 100644 --- a/blockjob.c +++ b/blockjob.c @@ -331,6 +331,19 @@ int block_job_cancel_sync(BlockJob *job) return block_job_finish_sync(job, _job_cancel_err, NULL); } +void block_job_cancel_sync_all(void) +{ +BlockJob *job; +AioContext *aio_context; + +while ((job = QLIST_FIRST(_jobs))) { +aio_context = bdrv_get_aio_context(job->bs); +aio_context_acquire(aio_context); +block_job_cancel_sync(job); +aio_context_release(aio_context); +} +} + int block_job_complete_sync(BlockJob *job, Error **errp) { return block_job_finish_sync(job, _job_complete, errp); diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 30bb2c6..4ac6831 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -371,6 +371,13 @@ bool block_job_is_paused(BlockJob *job); int block_job_cancel_sync(BlockJob *job); /** + * block_job_cancel_sync_all: + * + * Synchronously cancels all jobs using block_job_cancel_sync(). + */ +void block_job_cancel_sync_all(void); + +/** * block_job_complete_sync: * @job: The job to be completed. * @errp: Error object which may be set by block_job_complete(); this is not -- 1.8.3.1
[Qemu-block] [PATCH 01/14] block: keep a list of block jobs
From: Alberto GarciaThe current way to obtain the list of existing block jobs is to iterate over all root nodes and check which ones own a job. Since we want to be able to support block jobs in other nodes as well, this patch keeps a list of jobs that is updated every time one is created or destroyed. Signed-off-by: Alberto Garcia Signed-off-by: Kevin Wolf --- blockjob.c | 13 + include/block/blockjob.h | 14 ++ 2 files changed, 27 insertions(+) diff --git a/blockjob.c b/blockjob.c index 5b840a7..0f1bc77 100644 --- a/blockjob.c +++ b/blockjob.c @@ -50,6 +50,16 @@ struct BlockJobTxn { int refcnt; }; +static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs); + +BlockJob *block_job_next(BlockJob *job) +{ +if (!job) { +return QLIST_FIRST(_jobs); +} +return QLIST_NEXT(job, job_list); +} + void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, int64_t speed, BlockCompletionFunc *cb, void *opaque, Error **errp) @@ -76,6 +86,8 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, job->refcnt= 1; bs->job = job; +QLIST_INSERT_HEAD(_jobs, job, job_list); + /* Only set speed when necessary to avoid NotSupported error */ if (speed != 0) { Error *local_err = NULL; @@ -103,6 +115,7 @@ void block_job_unref(BlockJob *job) bdrv_unref(job->bs); error_free(job->blocker); g_free(job->id); +QLIST_REMOVE(job, job_list); g_free(job); } } diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 073a433..30bb2c6 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -135,6 +135,9 @@ struct BlockJob { */ bool deferred_to_main_loop; +/** Element of the list of block jobs */ +QLIST_ENTRY(BlockJob) job_list; + /** Status that is published by the query-block-jobs QMP API */ BlockDeviceIoStatus iostatus; @@ -173,6 +176,17 @@ struct BlockJob { }; /** + * block_job_next: + * @job: A block job, or %NULL. + * + * Get the next element from the list of block jobs after @job, or the + * first one if @job is %NULL. + * + * Returns the requested job, or %NULL if there are no more jobs left. + */ +BlockJob *block_job_next(BlockJob *job); + +/** * block_job_create: * @job_type: The class object for the newly-created job. * @bs: The block -- 1.8.3.1
[Qemu-block] [PATCH 11/14] block: Add blk_co_readv/writev()
Signed-off-by: Kevin Wolf--- block/block-backend.c | 26 ++ include/sysemu/block-backend.h | 4 trace-events | 2 ++ 3 files changed, 32 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c index 25bb5ca..8dc3aa5 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -794,6 +794,19 @@ static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, return bdrv_co_pwritev(blk_bs(blk), offset, bytes, qiov, flags); } +int coroutine_fn blk_co_readv(BlockBackend *blk, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ +trace_blk_co_readv(blk, blk_bs(blk), sector_num, nb_sectors); + +if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { +return -EINVAL; +} + +return blk_co_preadv(blk, sector_num << BDRV_SECTOR_BITS, + nb_sectors << BDRV_SECTOR_BITS, qiov, 0); +} + int coroutine_fn blk_co_copy_on_readv(BlockBackend *blk, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { @@ -808,6 +821,19 @@ int coroutine_fn blk_co_copy_on_readv(BlockBackend *blk, BDRV_REQ_COPY_ON_READ); } +int coroutine_fn blk_co_writev(BlockBackend *blk, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ +trace_blk_co_writev(blk, blk_bs(blk), sector_num, nb_sectors); + +if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { +return -EINVAL; +} + +return blk_co_pwritev(blk, sector_num << BDRV_SECTOR_BITS, + nb_sectors << BDRV_SECTOR_BITS, qiov, 0); +} + typedef struct BlkRwCo { BlockBackend *blk; int64_t offset; diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index eac4d88..d98552c 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -116,8 +116,12 @@ int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf, int nb_sectors); int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf, int nb_sectors); +int coroutine_fn blk_co_readv(BlockBackend *blk, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov); int coroutine_fn blk_co_copy_on_readv(BlockBackend *blk, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); +int coroutine_fn blk_co_writev(BlockBackend *blk, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov); int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf, int nb_sectors); int blk_write_zeroes(BlockBackend *blk, int64_t sector_num, diff --git a/trace-events b/trace-events index 9beb588..3b5cce3 100644 --- a/trace-events +++ b/trace-events @@ -62,6 +62,8 @@ bdrv_open_common(void *bs, const char *filename, int flags, const char *format_n bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d" # block/block-backend.c +blk_co_readv(void *blk, void *bs, int64_t sector_num, int nb_sector) "blk %p bs %p sector_num %"PRId64" nb_sectors %d" +blk_co_writev(void *blk, void *bs, int64_t sector_num, int nb_sector) "blk %p bs %p sector_num %"PRId64" nb_sectors %d" blk_co_copy_on_readv(void *blk, void *bs, int64_t sector_num, int nb_sector) "blk %p bs %p sector_num %"PRId64" nb_sectors %d" # block/io.c -- 1.8.3.1
[Qemu-block] [PATCH 06/14] mirror: Allow target that already has a BlockBackend
We had to forbid mirroring to a target BDS that already had a BB attached because the node swapping at job completion would add a second BB and we didn't support multiple BBs on a single BDS at the time. Now we do, so we can lift the restriction. As we allow additional BlockBackends for the target, we must expect other users to be sending requests. There may no requests be in flight during the graph modification, so we have to drain those users now. The core part of this patch is a revert of commit 40365552. Signed-off-by: Kevin Wolf--- block/mirror.c | 33 ++--- blockdev.c | 4 tests/qemu-iotests/041 | 27 --- tests/qemu-iotests/041.out | 4 ++-- 4 files changed, 8 insertions(+), 60 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index efca8fc..23aa10e 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -20,7 +20,6 @@ #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" #include "qemu/bitmap.h" -#include "qemu/error-report.h" #define SLICE_TIME1ULL /* ns */ #define MAX_IN_FLIGHT 16 @@ -466,24 +465,20 @@ static void mirror_exit(BlockJob *job, void *opaque) to_replace = s->to_replace; } -/* This was checked in mirror_start_job(), but meanwhile one of the - * nodes could have been newly attached to a BlockBackend. */ -if (bdrv_has_blk(to_replace) && bdrv_has_blk(s->target)) { -error_report("block job: Can't create node with two BlockBackends"); -data->ret = -EINVAL; -goto out; -} - if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); } + +/* The mirror job has no requests in flight any more, but we need to + * drain potential other users of the BDS before changing the graph. */ +bdrv_drained_begin(s->target); bdrv_replace_in_backing_chain(to_replace, s->target); +bdrv_drained_end(s->target); + /* We just changed the BDS the job BB refers to */ blk_remove_bs(job->blk); blk_insert_bs(job->blk, src); } - -out: if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); error_free(s->replace_blocker); @@ -807,7 +802,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, bool is_none_mode, BlockDriverState *base) { MirrorBlockJob *s; -BlockDriverState *replaced_bs; if (granularity == 0) { granularity = bdrv_get_default_bitmap_granularity(target); @@ -824,21 +818,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, buf_size = DEFAULT_MIRROR_BUF_SIZE; } -/* We can't support this case as long as the block layer can't handle - * multiple BlockBackends per BlockDriverState. */ -if (replaces) { -replaced_bs = bdrv_lookup_bs(replaces, replaces, errp); -if (replaced_bs == NULL) { -return; -} -} else { -replaced_bs = bs; -} -if (bdrv_has_blk(replaced_bs) && bdrv_has_blk(target)) { -error_setg(errp, "Can't create node with two BlockBackends"); -return; -} - s = block_job_create(driver, bs, speed, cb, opaque, errp); if (!s) { return; diff --git a/blockdev.c b/blockdev.c index 516c6cb..da9eeff 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3434,10 +3434,6 @@ static void blockdev_mirror_common(BlockDriverState *bs, if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_MIRROR_TARGET, errp)) { return; } -if (bdrv_has_blk(target)) { -error_setg(errp, "Cannot mirror to an attached block device"); -return; -} if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) { sync = MIRROR_SYNC_MODE_FULL; diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index b1c542f..ed1d9d4 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -207,33 +207,6 @@ class TestSingleBlockdev(TestSingleDrive): test_image_not_found = None test_small_buffer2 = None -class TestBlockdevAttached(iotests.QMPTestCase): -image_len = 1 * 1024 * 1024 # MB - -def setUp(self): -iotests.create_image(backing_img, self.image_len) -qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) -qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, target_img) -self.vm = iotests.VM().add_drive(test_img) -self.vm.launch() - -def tearDown(self): -self.vm.shutdown() -os.remove(test_img) -os.remove(target_img) - -def test_blockdev_attached(self): -self.assert_no_active_block_jobs() -args = {'options': -{'driver': iotests.imgfmt, - 'id':
[Qemu-block] [PATCH 05/14] stream: Use BlockBackend for I/O
This changes the streaming block job to use the job's BlockBackend for performing the COR reads. job->bs isn't used by the streaming code any more afterwards. Signed-off-by: Kevin Wolf--- block/block-backend.c | 15 +++ block/io.c | 9 - block/stream.c | 14 -- include/block/block.h | 2 -- include/sysemu/block-backend.h | 2 ++ trace-events | 4 +++- 6 files changed, 28 insertions(+), 18 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 2932a58..25bb5ca 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -19,6 +19,7 @@ #include "sysemu/sysemu.h" #include "qapi-event.h" #include "qemu/id.h" +#include "trace.h" /* Number of coroutines to reserve per attached device model */ #define COROUTINE_POOL_RESERVATION 64 @@ -793,6 +794,20 @@ static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, return bdrv_co_pwritev(blk_bs(blk), offset, bytes, qiov, flags); } +int coroutine_fn blk_co_copy_on_readv(BlockBackend *blk, +int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) +{ +trace_blk_co_copy_on_readv(blk, blk_bs(blk), sector_num, nb_sectors); + +if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { +return -EINVAL; +} + +return blk_co_preadv(blk, sector_num << BDRV_SECTOR_BITS, + nb_sectors << BDRV_SECTOR_BITS, qiov, + BDRV_REQ_COPY_ON_READ); +} + typedef struct BlkRwCo { BlockBackend *blk; int64_t offset; diff --git a/block/io.c b/block/io.c index 9aae030..8280e14 100644 --- a/block/io.c +++ b/block/io.c @@ -1102,15 +1102,6 @@ int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs, BDRV_REQ_NO_SERIALISING); } -int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, -int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) -{ -trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors); - -return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, -BDRV_REQ_COPY_ON_READ); -} - #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, diff --git a/block/stream.c b/block/stream.c index 40aa322..70ed9be 100644 --- a/block/stream.c +++ b/block/stream.c @@ -39,7 +39,7 @@ typedef struct StreamBlockJob { char *backing_file_str; } StreamBlockJob; -static int coroutine_fn stream_populate(BlockDriverState *bs, +static int coroutine_fn stream_populate(BlockBackend *blk, int64_t sector_num, int nb_sectors, void *buf) { @@ -52,7 +52,7 @@ static int coroutine_fn stream_populate(BlockDriverState *bs, qemu_iovec_init_external(, , 1); /* Copy-on-read the unallocated clusters */ -return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, ); +return blk_co_copy_on_readv(blk, sector_num, nb_sectors, ); } typedef struct { @@ -64,6 +64,7 @@ static void stream_complete(BlockJob *job, void *opaque) { StreamBlockJob *s = container_of(job, StreamBlockJob, common); StreamCompleteData *data = opaque; +BlockDriverState *bs = blk_bs(job->blk); BlockDriverState *base = s->base; if (!block_job_is_cancelled(>common) && data->reached_end && @@ -75,8 +76,8 @@ static void stream_complete(BlockJob *job, void *opaque) base_fmt = base->drv->format_name; } } -data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt); -bdrv_set_backing_hd(job->bs, base); +data->ret = bdrv_change_backing_file(bs, base_id, base_fmt); +bdrv_set_backing_hd(bs, base); } g_free(s->backing_file_str); @@ -88,7 +89,8 @@ static void coroutine_fn stream_run(void *opaque) { StreamBlockJob *s = opaque; StreamCompleteData *data; -BlockDriverState *bs = s->common.bs; +BlockBackend *blk = s->common.blk; +BlockDriverState *bs = blk_bs(blk); BlockDriverState *base = s->base; int64_t sector_num = 0; int64_t end = -1; @@ -159,7 +161,7 @@ wait: goto wait; } } -ret = stream_populate(bs, sector_num, n, buf); +ret = stream_populate(blk, sector_num, n, buf); } if (ret < 0) { BlockErrorAction action = diff --git a/include/block/block.h b/include/block/block.h index a4eb5be..39b5812 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -243,8 +243,6 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, const void *buf, int count); int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); -int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, -int64_t sector_num, int nb_sectors, QEMUIOVector
[Qemu-block] [PATCH 04/14] block: Convert block job core to BlockBackend
This adds a new BlockBackend field to the BlockJob struct, which coexists with the BlockDriverState while converting the individual jobs. When creating a block job, a new BlockBackend is created on top of the given BlockDriverState, and it is destroyed when the BlockJob ends. The reference to the BDS is now held by the BlockBackend instead of calling bdrv_ref/unref manually. We have to be careful when we use bdrv_replace_in_backing_chain() in block jobs because this changes the BDS that job->blk points to. At the moment block jobs are too tightly coupled with their BDS, so that moving a job to another BDS isn't easily possible; therefore, we need to just manually undo this change afterwards. Signed-off-by: Kevin Wolf--- block/mirror.c | 3 +++ blockjob.c | 37 - include/block/blockjob.h | 3 ++- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index b9986d8..efca8fc 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -478,6 +478,9 @@ static void mirror_exit(BlockJob *job, void *opaque) bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); } bdrv_replace_in_backing_chain(to_replace, s->target); +/* We just changed the BDS the job BB refers to */ +blk_remove_bs(job->blk); +blk_insert_bs(job->blk, src); } out: diff --git a/blockjob.c b/blockjob.c index e916b41..01b58c7 100644 --- a/blockjob.c +++ b/blockjob.c @@ -64,13 +64,17 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, int64_t speed, BlockCompletionFunc *cb, void *opaque, Error **errp) { +BlockBackend *blk; BlockJob *job; if (bs->job) { error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); return NULL; } -bdrv_ref(bs); + +blk = blk_new(_abort); +blk_insert_bs(blk, bs); + job = g_malloc0(driver->instance_size); error_setg(>blocker, "block device is in use by block job: %s", BlockJobType_lookup[driver->job_type]); @@ -80,6 +84,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, job->driver= driver; job->id= g_strdup(bdrv_get_device_name(bs)); job->bs= bs; +job->blk = blk; job->cb= cb; job->opaque= opaque; job->busy = true; @@ -110,9 +115,10 @@ void block_job_ref(BlockJob *job) void block_job_unref(BlockJob *job) { if (--job->refcnt == 0) { -job->bs->job = NULL; -bdrv_op_unblock_all(job->bs, job->blocker); -bdrv_unref(job->bs); +BlockDriverState *bs = blk_bs(job->blk); +bs->job = NULL; +bdrv_op_unblock_all(bs, job->blocker); +blk_unref(job->blk); error_free(job->blocker); g_free(job->id); QLIST_REMOVE(job, job_list); @@ -153,7 +159,7 @@ static void block_job_completed_txn_abort(BlockJob *job) txn->aborting = true; /* We are the first failed job. Cancel other jobs. */ QLIST_FOREACH(other_job, >jobs, txn_list) { -ctx = bdrv_get_aio_context(other_job->bs); +ctx = blk_get_aio_context(other_job->blk); aio_context_acquire(ctx); } QLIST_FOREACH(other_job, >jobs, txn_list) { @@ -170,7 +176,7 @@ static void block_job_completed_txn_abort(BlockJob *job) assert(other_job->completed); } QLIST_FOREACH_SAFE(other_job, >jobs, txn_list, next) { -ctx = bdrv_get_aio_context(other_job->bs); +ctx = blk_get_aio_context(other_job->blk); block_job_completed_single(other_job); aio_context_release(ctx); } @@ -192,7 +198,7 @@ static void block_job_completed_txn_success(BlockJob *job) } /* We are the last completed job, commit the transaction. */ QLIST_FOREACH_SAFE(other_job, >jobs, txn_list, next) { -ctx = bdrv_get_aio_context(other_job->bs); +ctx = blk_get_aio_context(other_job->blk); aio_context_acquire(ctx); assert(other_job->ret == 0); block_job_completed_single(other_job); @@ -202,9 +208,7 @@ static void block_job_completed_txn_success(BlockJob *job) void block_job_completed(BlockJob *job, int ret) { -BlockDriverState *bs = job->bs; - -assert(bs->job == job); +assert(blk_bs(job->blk)->job == job); assert(!job->completed); job->completed = true; job->ret = ret; @@ -295,11 +299,10 @@ static int block_job_finish_sync(BlockJob *job, void (*finish)(BlockJob *, Error **errp), Error **errp) { -BlockDriverState *bs = job->bs; Error *local_err = NULL; int ret; -assert(bs->job == job); +assert(blk_bs(job->blk)->job == job); block_job_ref(job); finish(job, _err); @@ -310,7 +313,7 @@ static int
[Qemu-block] [PATCH 00/14] block jobs: Convert I/O to BlockBackend
This series changes the block jobs so that they have a separate BlockBackend for every node on which they perform I/O. This doesn't only get us closer to the goal of only doing I/O through blk_*() from external sources (and block jobs are considered external), but can also later be used to use BlockBackend features for jobs: One could imagine replacing the very basic job throttling by the already existing and more advanced BlockBackend throttling, or using the BlockBackend error handling for block jobs. This depends on the following queue of series on top of block-next: * [PATCH v2 00/13] block: Move I/O throttling to BlockBackend * [PATCH v2 0/9] block: Remove BlockDriverState.blk * [PATCH v3 for-2.7 0/8] blockdev: (Nearly) free clean-up work Alberto Garcia (1): block: keep a list of block jobs John Snow (1): backup: Pack Notifier within BackupBlockJob Kevin Wolf (12): block: Cancel jobs first in bdrv_close_all() block: Default to enabled write cache in blk_new() block: Convert block job core to BlockBackend stream: Use BlockBackend for I/O mirror: Allow target that already has a BlockBackend mirror: Use BlockBackend for I/O backup: Don't leak BackupBlockJob in error path backup: Remove bs parameter from backup_do_cow() block: Add blk_co_readv/writev() backup: Use BlockBackend for I/O commit: Use BlockBackend for I/O blockjob: Remove BlockJob.bs block.c| 23 + block/backup.c | 67 ++ block/block-backend.c | 58 ++- block/commit.c | 42 +++-- block/io.c | 18 --- block/mirror.c | 105 ++--- block/stream.c | 14 +++--- blockdev.c | 12 + blockjob.c | 62 +--- include/block/block.h | 4 -- include/block/blockjob.h | 23 - include/sysemu/block-backend.h | 8 qemu-img.c | 2 +- tests/qemu-iotests/041 | 27 --- tests/qemu-iotests/041.out | 4 +- trace-events | 8 +++- 16 files changed, 261 insertions(+), 216 deletions(-) -- 1.8.3.1
[Qemu-block] [PATCH 03/14] block: Default to enabled write cache in blk_new()
The existing users of the function are: 1. blk_new_open(), which already enabled the write cache 2. Some test cases that don't care about the setting 3. blockdev_init() for empty drives, where the cache mode is overridden with the value from the options when a medium is inserted Therefore, this patch doesn't change the current behaviour. It will be convenient, however, for additional users of blk_new() (like block jobs) if the most sensible WCE setting is the default. Signed-off-by: Kevin Wolf--- block/block-backend.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/block-backend.c b/block/block-backend.c index 6732fb4..2932a58 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -125,6 +125,8 @@ BlockBackend *blk_new(Error **errp) blk = g_new0(BlockBackend, 1); blk->refcnt = 1; +blk_set_enable_write_cache(blk, true); + qemu_co_queue_init(>public.throttled_reqs[0]); qemu_co_queue_init(>public.throttled_reqs[1]); @@ -165,7 +167,6 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, return NULL; } -blk_set_enable_write_cache(blk, true); blk->root = bdrv_root_attach_child(bs, "root", _root); blk->root->opaque = blk; -- 1.8.3.1
[Qemu-block] [PATCH 07/14] mirror: Use BlockBackend for I/O
This changes the mirror block job to use the job's BlockBackend for performing its I/O. job->bs isn't used by the mirroring code any more afterwards. Signed-off-by: Kevin Wolf--- block/mirror.c | 75 +++--- blockdev.c | 4 +--- 2 files changed, 41 insertions(+), 38 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 23aa10e..dc66340 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -35,7 +35,7 @@ typedef struct MirrorBuffer { typedef struct MirrorBlockJob { BlockJob common; RateLimit limit; -BlockDriverState *target; +BlockBackend *target; BlockDriverState *base; /* The name of the graph node to replace */ char *replaces; @@ -156,8 +156,8 @@ static void mirror_read_complete(void *opaque, int ret) mirror_iteration_done(op, ret); return; } -bdrv_aio_writev(s->target, op->sector_num, >qiov, op->nb_sectors, -mirror_write_complete, op); +blk_aio_writev(s->target, op->sector_num, >qiov, op->nb_sectors, + mirror_write_complete, op); } static inline void mirror_clip_sectors(MirrorBlockJob *s, @@ -185,7 +185,7 @@ static int mirror_cow_align(MirrorBlockJob *s, need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors, s->cow_bitmap); if (need_cow) { -bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors, +bdrv_round_to_clusters(blk_bs(s->target), *sector_num, *nb_sectors, _sector_num, _nb_sectors); } @@ -223,7 +223,7 @@ static inline void mirror_wait_for_io(MirrorBlockJob *s) static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num, int nb_sectors) { -BlockDriverState *source = s->common.bs; +BlockBackend *source = s->common.blk; int sectors_per_chunk, nb_chunks; int ret = nb_sectors; MirrorOp *op; @@ -273,8 +273,8 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num, s->sectors_in_flight += nb_sectors; trace_mirror_one_iteration(s, sector_num, nb_sectors); -bdrv_aio_readv(source, sector_num, >qiov, nb_sectors, - mirror_read_complete, op); +blk_aio_readv(source, sector_num, >qiov, nb_sectors, + mirror_read_complete, op); return ret; } @@ -295,18 +295,18 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s, s->in_flight++; s->sectors_in_flight += nb_sectors; if (is_discard) { -bdrv_aio_discard(s->target, sector_num, op->nb_sectors, - mirror_write_complete, op); +blk_aio_discard(s->target, sector_num, op->nb_sectors, +mirror_write_complete, op); } else { -bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors, - s->unmap ? BDRV_REQ_MAY_UNMAP : 0, - mirror_write_complete, op); +blk_aio_write_zeroes(s->target, sector_num, op->nb_sectors, + s->unmap ? BDRV_REQ_MAY_UNMAP : 0, + mirror_write_complete, op); } } static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) { -BlockDriverState *source = s->common.bs; +BlockDriverState *source = blk_bs(s->common.blk); int64_t sector_num, first_chunk; uint64_t delay_ns = 0; /* At least the first dirty chunk is mirrored in one iteration. */ @@ -383,7 +383,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) { int64_t target_sector_num; int target_nb_sectors; -bdrv_round_to_clusters(s->target, sector_num, io_sectors, +bdrv_round_to_clusters(blk_bs(s->target), sector_num, io_sectors, _sector_num, _nb_sectors); if (target_sector_num == sector_num && target_nb_sectors == io_sectors) { @@ -448,7 +448,8 @@ static void mirror_exit(BlockJob *job, void *opaque) MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); MirrorExitData *data = opaque; AioContext *replace_aio_context = NULL; -BlockDriverState *src = s->common.bs; +BlockDriverState *src = blk_bs(s->common.blk); +BlockDriverState *target_bs = blk_bs(s->target); /* Make sure that the source BDS doesn't go away before we called * block_job_completed(). */ @@ -460,20 +461,20 @@ static void mirror_exit(BlockJob *job, void *opaque) } if (s->should_complete && data->ret == 0) { -BlockDriverState *to_replace = s->common.bs; +BlockDriverState *to_replace = src; if (s->to_replace) { to_replace = s->to_replace; } -if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { -bdrv_reopen(s->target, bdrv_get_flags(to_replace),