[Qemu-block] [PATCH v4 3/6] qemu-io: Allow unaligned access by default

2016-05-04 Thread Eric Blake
There's no reason to require the user to specify a flag just so
they can pass in unaligned numbers.  Keep 'read -p' and 'write -p'
as no-ops so that I don't have to hunt down and update all users
of qemu-io, but otherwise make their behavior default as 'read' and
'write'.  Also fix 'write -z', 'readv', 'writev', 'writev',
'aio_read', 'aio_write', and 'aio_write -z'.  For now, 'read -b',
'multiwrite', 'write -b', and 'write -c' still require alignment.

qemu-iotest 23 is updated to match, as the only test that was
previously explicitly expecting an error on an unaligned request.

Signed-off-by: Eric Blake 
---
 qemu-io-cmds.c |   63 +-
 tests/qemu-iotests/023.out | 2160 +---
 2 files changed, 1452 insertions(+), 771 deletions(-)

diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index dc6b0dc..8bcf742 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -395,12 +395,6 @@ create_iovec(BlockBackend *blk, QEMUIOVector *qiov, char 
**argv, int nr_iov,
 goto fail;
 }

-if (len & 0x1ff) {
-printf("length argument %" PRId64
-   " is not sector aligned\n", len);
-goto fail;
-}
-
 sizes[i] = len;
 count += len;
 }
@@ -634,7 +628,7 @@ static void read_help(void)
 " -b, -- read from the VM state rather than the virtual disk\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -l, -- length for pattern verification (only with -P)\n"
-" -p, -- allow unaligned access\n"
+" -p, -- ignored for back-compat\n"
 " -P, -- use a pattern to verify read data\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
 " -s, -- start offset for pattern verification (only with -P)\n"
@@ -650,7 +644,7 @@ static const cmdinfo_t read_cmd = {
 .cfunc  = read_f,
 .argmin = 2,
 .argmax = -1,
-.args   = "[-abCpqv] [-P pattern [-s off] [-l len]] off len",
+.args   = "[-abCqv] [-P pattern [-s off] [-l len]] off len",
 .oneline= "reads a number of bytes at a specified offset",
 .help   = read_help,
 };
@@ -658,7 +652,7 @@ static const cmdinfo_t read_cmd = {
 static int read_f(BlockBackend *blk, int argc, char **argv)
 {
 struct timeval t1, t2;
-bool Cflag = false, pflag = false, qflag = false, vflag = false;
+bool Cflag = false, qflag = false, vflag = false;
 bool Pflag = false, sflag = false, lflag = false, bflag = false;
 int c, cnt;
 char *buf;
@@ -686,7 +680,7 @@ static int read_f(BlockBackend *blk, int argc, char **argv)
 }
 break;
 case 'p':
-pflag = true;
+/* Ignored for back-compat */
 break;
 case 'P':
 Pflag = true;
@@ -718,11 +712,6 @@ static int read_f(BlockBackend *blk, int argc, char **argv)
 return qemuio_command_usage(_cmd);
 }

-if (bflag && pflag) {
-printf("-b and -p cannot be specified at the same time\n");
-return 0;
-}
-
 offset = cvtnum(argv[optind]);
 if (offset < 0) {
 print_cvtnum_err(offset, argv[optind]);
@@ -753,7 +742,7 @@ static int read_f(BlockBackend *blk, int argc, char **argv)
 return 0;
 }

-if (!pflag) {
+if (bflag) {
 if (offset & 0x1ff) {
 printf("offset %" PRId64 " is not sector aligned\n",
offset);
@@ -890,12 +879,6 @@ static int readv_f(BlockBackend *blk, int argc, char 
**argv)
 }
 optind++;

-if (offset & 0x1ff) {
-printf("offset %" PRId64 " is not sector aligned\n",
-   offset);
-return 0;
-}
-
 nr_iov = argc - optind;
 buf = create_iovec(blk, , [optind], nr_iov, 0xab);
 if (buf == NULL) {
@@ -952,7 +935,7 @@ static void write_help(void)
 " filled with a set pattern (0xcdcdcdcd).\n"
 " -b, -- write to the VM state rather than the virtual disk\n"
 " -c, -- write compressed data with blk_write_compressed\n"
-" -p, -- allow unaligned access\n"
+" -p, -- ignored for back-compat\n"
 " -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
@@ -968,7 +951,7 @@ static const cmdinfo_t write_cmd = {
 .cfunc  = write_f,
 .argmin = 2,
 .argmax = -1,
-.args   = "[-bcCpqz] [-P pattern ] off len",
+.args   = "[-bcCqz] [-P pattern ] off len",
 .oneline= "writes a number of bytes at a specified offset",
 .help   = write_help,
 };
@@ -976,7 +959,7 @@ static const cmdinfo_t write_cmd = {
 static int write_f(BlockBackend *blk, int argc, char **argv)
 {
 struct timeval t1, t2;
-bool Cflag = false, pflag = false, qflag = false, bflag = false;
+bool Cflag = false, qflag = false, bflag = false;
 bool Pflag = false, zflag = false, cflag = false;
 int c, cnt;
 char *buf = NULL;
@@ -998,7 +981,7 @@ static int write_f(BlockBackend *blk, int argc, 

[Qemu-block] [PATCH v4 4/6] qemu-io: Add 'write -f' to test FUA flag

2016-05-04 Thread Eric Blake
Make it easier to test block drivers with BDRV_REQ_FUA in
.supported_write_flags, by adding the '-f' flag to qemu-io to
conditionally pass the flag through to specific writes ('write',
'write -z', 'writev', 'aio_write', 'aio_write -z'). You'll want
to use 'qemu-io -t none' to actually make -f useful (as
otherwise, the default writethrough mode automatically sets the
FUA bit on every write).

Signed-off-by: Eric Blake 
---
 qemu-io-cmds.c | 57 +
 1 file changed, 41 insertions(+), 16 deletions(-)

diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 8bcf742..ba811fe 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -428,13 +428,13 @@ static int do_pread(BlockBackend *blk, char *buf, int64_t 
offset,
 }

 static int do_pwrite(BlockBackend *blk, char *buf, int64_t offset,
- int64_t count, int64_t *total)
+ int64_t count, int flags, int64_t *total)
 {
 if (count > INT_MAX) {
 return -ERANGE;
 }

-*total = blk_pwrite(blk, offset, (uint8_t *)buf, count, 0);
+*total = blk_pwrite(blk, offset, (uint8_t *)buf, count, flags);
 if (*total < 0) {
 return *total;
 }
@@ -446,6 +446,7 @@ typedef struct {
 int64_t offset;
 int64_t count;
 int64_t *total;
+int flags;
 int ret;
 bool done;
 } CoWriteZeroes;
@@ -454,7 +455,8 @@ static void coroutine_fn co_write_zeroes_entry(void *opaque)
 {
 CoWriteZeroes *data = opaque;

-data->ret = blk_co_write_zeroes(data->blk, data->offset, data->count, 0);
+data->ret = blk_co_write_zeroes(data->blk, data->offset, data->count,
+data->flags);
 data->done = true;
 if (data->ret < 0) {
 *data->total = data->ret;
@@ -465,7 +467,7 @@ static void coroutine_fn co_write_zeroes_entry(void *opaque)
 }

 static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int64_t count,
-  int64_t *total)
+  int flags, int64_t *total)
 {
 Coroutine *co;
 CoWriteZeroes data = {
@@ -473,6 +475,7 @@ static int do_co_write_zeroes(BlockBackend *blk, int64_t 
offset, int64_t count,
 .offset = offset,
 .count  = count,
 .total  = total,
+.flags  = flags,
 .done   = false,
 };

@@ -558,11 +561,11 @@ static int do_aio_readv(BlockBackend *blk, QEMUIOVector 
*qiov,
 }

 static int do_aio_writev(BlockBackend *blk, QEMUIOVector *qiov,
- int64_t offset, int *total)
+ int64_t offset, int flags, int *total)
 {
 int async_ret = NOT_DONE;

-blk_aio_pwritev(blk, offset, qiov, 0, aio_rw_done, _ret);
+blk_aio_pwritev(blk, offset, qiov, flags, aio_rw_done, _ret);
 while (async_ret == NOT_DONE) {
 main_loop_wait(false);
 }
@@ -935,6 +938,7 @@ static void write_help(void)
 " filled with a set pattern (0xcdcdcdcd).\n"
 " -b, -- write to the VM state rather than the virtual disk\n"
 " -c, -- write compressed data with blk_write_compressed\n"
+" -f, -- use Force Unit Access semantics\n"
 " -p, -- ignored for back-compat\n"
 " -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
@@ -951,7 +955,7 @@ static const cmdinfo_t write_cmd = {
 .cfunc  = write_f,
 .argmin = 2,
 .argmax = -1,
-.args   = "[-bcCqz] [-P pattern ] off len",
+.args   = "[-bcCfqz] [-P pattern ] off len",
 .oneline= "writes a number of bytes at a specified offset",
 .help   = write_help,
 };
@@ -961,6 +965,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
 struct timeval t1, t2;
 bool Cflag = false, qflag = false, bflag = false;
 bool Pflag = false, zflag = false, cflag = false;
+int flags = 0;
 int c, cnt;
 char *buf = NULL;
 int64_t offset;
@@ -969,7 +974,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
 int64_t total = 0;
 int pattern = 0xcd;

-while ((c = getopt(argc, argv, "bcCpP:qz")) != -1) {
+while ((c = getopt(argc, argv, "bcCfpP:qz")) != -1) {
 switch (c) {
 case 'b':
 bflag = true;
@@ -980,6 +985,9 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
 case 'C':
 Cflag = true;
 break;
+case 'f':
+flags |= BDRV_REQ_FUA;
+break;
 case 'p':
 /* Ignored for back-compat */
 break;
@@ -1010,6 +1018,11 @@ static int write_f(BlockBackend *blk, int argc, char 
**argv)
 return 0;
 }

+if ((flags & BDRV_REQ_FUA) && (bflag + cflag)) {
+printf("-f and -b or -c cannot be specified at the same time\n");
+return 0;
+}
+
 if (zflag && Pflag) {
 printf("-z and -P cannot be specified at the same time\n");
 return 0;
@@ -1054,11 +1067,11 @@ static int write_f(BlockBackend *blk, int 

[Qemu-block] [PATCH v4 5/6] qemu-io: Add 'open -u' to set BDRV_O_UNMAP after the fact

2016-05-04 Thread Eric Blake
When opening a file from the command line, qemu-io defaults
to BDRV_O_UNMAP but allows -d to give full control to disable
unmaps. But when opening via the 'open' command, qemu-io did
not set BDRV_O_UNMAP, and had no way to allow it.

Make it at least possible to symmetrically test things:
'qemu-io -d ignore' at the CLI now matches 'qemu-io> open'
in batch mode, and 'qemu-io' or 'qemu-io -d unmap' at
the CLI matches 'qemu-io> open -u'.

Signed-off-by: Eric Blake 
---
 qemu-io.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/qemu-io.c b/qemu-io.c
index 4aba7e0..2196159 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -107,6 +107,7 @@ static void open_help(void)
 " -r, -- open file read-only\n"
 " -s, -- use snapshot file\n"
 " -n, -- disable host cache\n"
+" -u, -- allow discard and zero operations to unmap\n"
 " -o, -- options to be given to the block driver"
 "\n");
 }
@@ -120,7 +121,7 @@ static const cmdinfo_t open_cmd = {
 .argmin = 1,
 .argmax = -1,
 .flags  = CMD_NOFILE_OK,
-.args   = "[-Crsn] [-o options] [path]",
+.args   = "[-Crsnu] [-o options] [path]",
 .oneline= "open the file specified by path",
 .help   = open_help,
 };
@@ -144,7 +145,7 @@ static int open_f(BlockBackend *blk, int argc, char **argv)
 QemuOpts *qopts;
 QDict *opts;

-while ((c = getopt(argc, argv, "snrgo:")) != -1) {
+while ((c = getopt(argc, argv, "snrguo:")) != -1) {
 switch (c) {
 case 's':
 flags |= BDRV_O_SNAPSHOT;
@@ -156,6 +157,9 @@ static int open_f(BlockBackend *blk, int argc, char **argv)
 case 'r':
 readonly = 1;
 break;
+case 'u':
+flags |= BDRV_O_UNMAP;
+break;
 case 'o':
 if (imageOpts) {
 printf("--image-opts and 'open -o' are mutually exclusive\n");
-- 
2.5.5




[Qemu-block] [PATCH v4 0/6] qemu-io: UI enhancements

2016-05-04 Thread Eric Blake
While working on NBD, I found myself cursing the qemu-io UI for
not letting me test various scenarios, particularly after fixing
NBD to serve at byte granularity [1].  And in the process of
writing these qemu-io enhancements, I also managed to flush out
several other bugs in the block layer proper, with fixes posted
separately, such as loss of BDRV_REQ_FUA during write_zeroes [2]

2.7 material, depends on Kevin's block-next:
git://repo.or.cz/qemu/kevin.git block-next
and on my pending "block: kill sector-based blk_write/read"
https://lists.gnu.org/archive/html/qemu-devel/2016-05/msg00557.html

Previously posted as part of a larger NBD series [3] at v3, hence
this series starts life at v4.

[1] commit df7b97ff
[2] https://lists.gnu.org/archive/html/qemu-devel/2016-05/msg00285.html
[3] https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg03526.html

Also available as a tag at this location:
git fetch git://repo.or.cz/qemu/ericb.git nbd-qemu-io-v4

Changes since then:
More cleanups
Include readv/writev and aio_read/aio_write.
Update qemu-iotests 23 to match new 'write' semantics [kwolf]

001/6:[] [--] 'qemu-io: Add missing option documentation'
002/6:[down] 'qemu-io: Use bool for command line flags'
003/6:[down] 'qemu-io: Allow unaligned access by default'
004/6:[0056] [FC] 'qemu-io: Add 'write -f' to test FUA flag'
005/6:[] [--] 'qemu-io: Add 'open -u' to set BDRV_O_UNMAP after the fact'
006/6:[0026] [FC] 'qemu-io: Add 'write -z -u' to test MAY_UNMAP flag'

Eric Blake (6):
  qemu-io: Add missing option documentation
  qemu-io: Use bool for command line flags
  qemu-io: Allow unaligned access by default
  qemu-io: Add 'write -f' to test FUA flag
  qemu-io: Add 'open -u' to set BDRV_O_UNMAP after the fact
  qemu-io: Add 'write -z -u' to test MAY_UNMAP flag

 qemu-io-cmds.c |  222 ++---
 qemu-io.c  |   12 +-
 tests/qemu-iotests/023.out | 2160 +---
 3 files changed, 1562 insertions(+), 832 deletions(-)

-- 
2.5.5




[Qemu-block] [PATCH v4 6/6] qemu-io: Add 'write -z -u' to test MAY_UNMAP flag

2016-05-04 Thread Eric Blake
Make it easier to control whether the BDRV_REQ_MAY_UNMAP flag
can be passed through a write_zeroes command, by adding the '-u'
flag to qemu-io 'write -z' and 'aio_write -z'.  To be useful,
the device has to be opened with 'qemu-io -d unmap' (or the
just-added 'open -u' subcommand).

Signed-off-by: Eric Blake 
---
 qemu-io-cmds.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index ba811fe..e71bc5c 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -943,6 +943,7 @@ static void write_help(void)
 " -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -u, -- with -z, allow unmapping\n"
 " -z, -- write zeroes using blk_co_write_zeroes\n"
 "\n");
 }
@@ -955,7 +956,7 @@ static const cmdinfo_t write_cmd = {
 .cfunc  = write_f,
 .argmin = 2,
 .argmax = -1,
-.args   = "[-bcCfqz] [-P pattern ] off len",
+.args   = "[-bcCfquz] [-P pattern ] off len",
 .oneline= "writes a number of bytes at a specified offset",
 .help   = write_help,
 };
@@ -974,7 +975,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
 int64_t total = 0;
 int pattern = 0xcd;

-while ((c = getopt(argc, argv, "bcCfpP:qz")) != -1) {
+while ((c = getopt(argc, argv, "bcCfpP:quz")) != -1) {
 switch (c) {
 case 'b':
 bflag = true;
@@ -1001,6 +1002,9 @@ static int write_f(BlockBackend *blk, int argc, char 
**argv)
 case 'q':
 qflag = true;
 break;
+case 'u':
+flags |= BDRV_REQ_MAY_UNMAP;
+break;
 case 'z':
 zflag = true;
 break;
@@ -1023,6 +1027,11 @@ static int write_f(BlockBackend *blk, int argc, char 
**argv)
 return 0;
 }

+if ((flags & BDRV_REQ_MAY_UNMAP) && !zflag) {
+printf("-u requires -z to be specified\n");
+return 0;
+}
+
 if (zflag && Pflag) {
 printf("-z and -P cannot be specified at the same time\n");
 return 0;
@@ -1561,6 +1570,7 @@ static void aio_write_help(void)
 " -C, -- report statistics in a machine parsable format\n"
 " -f, -- use Force Unit Access semantics\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
+" -u, -- with -z, allow unmapping\n"
 " -z, -- write zeroes using blk_aio_write_zeroes\n"
 "\n");
 }
@@ -1572,7 +1582,7 @@ static const cmdinfo_t aio_write_cmd = {
 .cfunc  = aio_write_f,
 .argmin = 2,
 .argmax = -1,
-.args   = "[-Cfqz] [-P pattern ] off len [len..]",
+.args   = "[-Cfquz] [-P pattern ] off len [len..]",
 .oneline= "asynchronously writes a number of bytes",
 .help   = aio_write_help,
 };
@@ -1596,6 +1606,9 @@ static int aio_write_f(BlockBackend *blk, int argc, char 
**argv)
 case 'q':
 ctx->qflag = true;
 break;
+case 'u':
+flags |= BDRV_REQ_MAY_UNMAP;
+break;
 case 'P':
 pattern = parse_pattern(optarg);
 if (pattern < 0) {
@@ -1623,6 +1636,11 @@ static int aio_write_f(BlockBackend *blk, int argc, char 
**argv)
 return 0;
 }

+if ((flags & BDRV_REQ_MAY_UNMAP) && !ctx->zflag) {
+printf("-u requires -z to be specified\n");
+return 0;
+}
+
 if (ctx->zflag && ctx->Pflag) {
 printf("-z and -P cannot be specified at the same time\n");
 g_free(ctx);
-- 
2.5.5




[Qemu-block] [PATCH v4 1/6] qemu-io: Add missing option documentation

2016-05-04 Thread Eric Blake
Commit 499afa2 added --image-opts, but forgot to document it in
--help.  Likewise for commit 9e8f183 and -d/--discard.

Finally, commit 10d9d75 removed -g/--growable, but forgot to
cull it from the valid short options.

Signed-off-by: Eric Blake 
---
 qemu-io.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/qemu-io.c b/qemu-io.c
index 0598251..4aba7e0 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -221,6 +221,7 @@ static void usage(const char *name)
 "\n"
 "  --object OBJECTDEF   define an object such as 'secret' for\n"
 "   passwords and/or encryption keys\n"
+"  --image-opts treat file as option string\n"
 "  -c, --cmd STRING execute command with its arguments\n"
 "   from the given string\n"
 "  -f, --format FMT specifies the block driver to use\n"
@@ -230,6 +231,7 @@ static void usage(const char *name)
 "  -m, --misalign   misalign allocations for O_DIRECT\n"
 "  -k, --native-aio use kernel AIO implementation (on Linux only)\n"
 "  -t, --cache=MODE use the given cache mode for the image\n"
+"  -d, --discard=MODE   use the given discard mode for the image\n"
 "  -T, --trace FILE enable trace events listed in the given file\n"
 "  -h, --help   display this help and exit\n"
 "  -V, --versionoutput version information and exit\n"
@@ -410,7 +412,7 @@ static QemuOptsList file_opts = {
 int main(int argc, char **argv)
 {
 int readonly = 0;
-const char *sopt = "hVc:d:f:rsnmgkt:T:";
+const char *sopt = "hVc:d:f:rsnmkt:T:";
 const struct option lopt[] = {
 { "help", no_argument, NULL, 'h' },
 { "version", no_argument, NULL, 'V' },
-- 
2.5.5




[Qemu-block] [PATCH v4 2/6] qemu-io: Use bool for command line flags

2016-05-04 Thread Eric Blake
We require a C99 compiler; let's use it to express what we
really mean.

Signed-off-by: Eric Blake 
---
 qemu-io-cmds.c | 94 +-
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 0bbbc72..dc6b0dc 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -345,7 +345,7 @@ static void dump_buffer(const void *buffer, int64_t offset, 
int64_t len)
 }

 static void print_report(const char *op, struct timeval *t, int64_t offset,
- int64_t count, int64_t total, int cnt, int Cflag)
+ int64_t count, int64_t total, int cnt, bool Cflag)
 {
 char s1[64], s2[64], ts[64];

@@ -658,8 +658,8 @@ static const cmdinfo_t read_cmd = {
 static int read_f(BlockBackend *blk, int argc, char **argv)
 {
 struct timeval t1, t2;
-int Cflag = 0, pflag = 0, qflag = 0, vflag = 0;
-int Pflag = 0, sflag = 0, lflag = 0, bflag = 0;
+bool Cflag = false, pflag = false, qflag = false, vflag = false;
+bool Pflag = false, sflag = false, lflag = false, bflag = false;
 int c, cnt;
 char *buf;
 int64_t offset;
@@ -672,13 +672,13 @@ static int read_f(BlockBackend *blk, int argc, char 
**argv)
 while ((c = getopt(argc, argv, "bCl:pP:qs:v")) != -1) {
 switch (c) {
 case 'b':
-bflag = 1;
+bflag = true;
 break;
 case 'C':
-Cflag = 1;
+Cflag = true;
 break;
 case 'l':
-lflag = 1;
+lflag = true;
 pattern_count = cvtnum(optarg);
 if (pattern_count < 0) {
 print_cvtnum_err(pattern_count, optarg);
@@ -686,20 +686,20 @@ static int read_f(BlockBackend *blk, int argc, char 
**argv)
 }
 break;
 case 'p':
-pflag = 1;
+pflag = true;
 break;
 case 'P':
-Pflag = 1;
+Pflag = true;
 pattern = parse_pattern(optarg);
 if (pattern < 0) {
 return 0;
 }
 break;
 case 'q':
-qflag = 1;
+qflag = true;
 break;
 case 's':
-sflag = 1;
+sflag = true;
 pattern_offset = cvtnum(optarg);
 if (pattern_offset < 0) {
 print_cvtnum_err(pattern_offset, optarg);
@@ -707,7 +707,7 @@ static int read_f(BlockBackend *blk, int argc, char **argv)
 }
 break;
 case 'v':
-vflag = 1;
+vflag = true;
 break;
 default:
 return qemuio_command_usage(_cmd);
@@ -844,7 +844,7 @@ static const cmdinfo_t readv_cmd = {
 static int readv_f(BlockBackend *blk, int argc, char **argv)
 {
 struct timeval t1, t2;
-int Cflag = 0, qflag = 0, vflag = 0;
+bool Cflag = false, qflag = false, vflag = false;
 int c, cnt;
 char *buf;
 int64_t offset;
@@ -853,25 +853,25 @@ static int readv_f(BlockBackend *blk, int argc, char 
**argv)
 int nr_iov;
 QEMUIOVector qiov;
 int pattern = 0;
-int Pflag = 0;
+bool Pflag = false;

 while ((c = getopt(argc, argv, "CP:qv")) != -1) {
 switch (c) {
 case 'C':
-Cflag = 1;
+Cflag = true;
 break;
 case 'P':
-Pflag = 1;
+Pflag = true;
 pattern = parse_pattern(optarg);
 if (pattern < 0) {
 return 0;
 }
 break;
 case 'q':
-qflag = 1;
+qflag = true;
 break;
 case 'v':
-vflag = 1;
+vflag = true;
 break;
 default:
 return qemuio_command_usage(_cmd);
@@ -976,8 +976,8 @@ static const cmdinfo_t write_cmd = {
 static int write_f(BlockBackend *blk, int argc, char **argv)
 {
 struct timeval t1, t2;
-int Cflag = 0, pflag = 0, qflag = 0, bflag = 0, Pflag = 0, zflag = 0;
-int cflag = 0;
+bool Cflag = false, pflag = false, qflag = false, bflag = false;
+bool Pflag = false, zflag = false, cflag = false;
 int c, cnt;
 char *buf = NULL;
 int64_t offset;
@@ -989,29 +989,29 @@ static int write_f(BlockBackend *blk, int argc, char 
**argv)
 while ((c = getopt(argc, argv, "bcCpP:qz")) != -1) {
 switch (c) {
 case 'b':
-bflag = 1;
+bflag = true;
 break;
 case 'c':
-cflag = 1;
+cflag = true;
 break;
 case 'C':
-Cflag = 1;
+Cflag = true;
 break;
 case 'p':
-pflag = 1;
+pflag = true;
 break;
 case 'P':
-Pflag = 1;
+Pflag = true;
 pattern = parse_pattern(optarg);
 if (pattern < 0) {
 return 0;
 }
 

[Qemu-block] [PATCH v6 17/20] nbd: Switch to byte-based block access

2016-05-04 Thread Eric Blake
Sector-based blk_read() should die; switch to byte-based
blk_pread() instead.

Signed-off-by: Eric Blake 
---
 qemu-nbd.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index c55b40f..c07ceef 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -159,12 +159,13 @@ static int find_partition(BlockBackend *blk, int 
partition,
   off_t *offset, off_t *size)
 {
 struct partition_record mbr[4];
-uint8_t data[512];
+uint8_t data[BDRV_SECTOR_SIZE];
 int i;
 int ext_partnum = 4;
 int ret;

-if ((ret = blk_read(blk, 0, data, 1)) < 0) {
+ret = blk_pread(blk, 0, data, sizeof(data));
+if (ret < 0) {
 error_report("error while reading: %s", strerror(-ret));
 exit(EXIT_FAILURE);
 }
@@ -182,10 +183,12 @@ static int find_partition(BlockBackend *blk, int 
partition,

 if (mbr[i].system == 0xF || mbr[i].system == 0x5) {
 struct partition_record ext[4];
-uint8_t data1[512];
+uint8_t data1[BDRV_SECTOR_SIZE];
 int j;

-if ((ret = blk_read(blk, mbr[i].start_sector_abs, data1, 1)) < 0) {
+ret = blk_pread(blk, mbr[i].start_sector_abs << BDRV_SECTOR_BITS,
+data1, sizeof(data1));
+if (ret < 0) {
 error_report("error while reading: %s", strerror(-ret));
 exit(EXIT_FAILURE);
 }
-- 
2.5.5




Re: [Qemu-block] [Qemu-devel] [PATCH 1/2] block: Invalidate all children

2016-05-04 Thread Fam Zheng
On Wed, 05/04 12:10, Kevin Wolf wrote:
> Am 19.04.2016 um 03:42 hat Fam Zheng geschrieben:
> > Currently we only recurse to bs->file, which will miss the children in 
> > quorum
> > and VMDK.
> > 
> > Recurse into the whole subtree to avoid that.
> > 
> > Signed-off-by: Fam Zheng 
> > ---
> >  block.c | 20 ++--
> >  1 file changed, 14 insertions(+), 6 deletions(-)
> > 
> > diff --git a/block.c b/block.c
> > index d4939b4..fa8b38f 100644
> > --- a/block.c
> > +++ b/block.c
> > @@ -3201,6 +3201,7 @@ void bdrv_init_with_whitelist(void)
> >  
> >  void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
> >  {
> > +BdrvChild *child;
> >  Error *local_err = NULL;
> >  int ret;
> >  
> > @@ -3215,13 +3216,20 @@ void bdrv_invalidate_cache(BlockDriverState *bs, 
> > Error **errp)
> >  
> >  if (bs->drv->bdrv_invalidate_cache) {
> >  bs->drv->bdrv_invalidate_cache(bs, _err);
> > -} else if (bs->file) {
> > -bdrv_invalidate_cache(bs->file->bs, _err);
> 
> The old behaviour was that we only recurse for bs->file if the block
> driver doesn't have its own implementation.
> 
> This means that in qcow2, for example, we call bdrv_invalidate_cache()
> explicitly for bs->file. If we can already invalidate it here, the call
> inside qcow2 and probably other drivers could go away.

Yes, will update. Thanks.

Fam



Re: [Qemu-block] [PATCH 2/2] block: Inactivate all children

2016-05-04 Thread Fam Zheng
On Wed, 05/04 12:12, Kevin Wolf wrote:
> Am 19.04.2016 um 03:42 hat Fam Zheng geschrieben:
> > Currently we only inactivate the top BDS. Actually bdrv_inactivate
> > should be the opposite of bdrv_invalidate_cache.
> > 
> > Recurse into the whole subtree instead.
> > 
> > Signed-off-by: Fam Zheng 
> 
> Did you actually test this?
> 
> I would expect that bs->drv->bdrv_inactivate() fails now (as in
> assertion failure) if it has anything to flush to the image because
> bs->file has already be inactivated before. I think children need to be
> inactived after their parents.

OK, my test apparently failed to trigger that bdrv_pwritv() path. Good catch!

> 
> Nodes with multiple parents could actually become even more
> interesting...

I'll make it two passes recursion: one for calling drv->bdrv_inactivate and the
other for setting BDRV_O_INACTIVATE.

Fam



[Qemu-block] [PATCH v6 13/20] pflash: Switch to byte-based block access

2016-05-04 Thread Eric Blake
Sector-based blk_write() should die; switch to byte-based
blk_pwrite() instead.  Likewise for blk_read().

Signed-off-by: Eric Blake 
---
 hw/block/pflash_cfi01.c | 12 ++--
 hw/block/pflash_cfi02.c | 12 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 106a775..3a1f85d 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -413,11 +413,11 @@ static void pflash_update(pflash_t *pfl, int offset,
 int offset_end;
 if (pfl->blk) {
 offset_end = offset + size;
-/* round to sectors */
-offset = offset >> 9;
-offset_end = (offset_end + 511) >> 9;
-blk_write(pfl->blk, offset, pfl->storage + (offset << 9),
-  offset_end - offset);
+/* widen to sector boundaries */
+offset = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE);
+offset_end = QEMU_ALIGN_UP(offset_end, BDRV_SECTOR_SIZE);
+blk_pwrite(pfl->blk, offset, pfl->storage + offset,
+   offset_end - offset, 0);
 }
 }

@@ -739,7 +739,7 @@ static void pflash_cfi01_realize(DeviceState *dev, Error 
**errp)

 if (pfl->blk) {
 /* read the initial flash content */
-ret = blk_read(pfl->blk, 0, pfl->storage, total_len >> 9);
+ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);

 if (ret < 0) {
 vmstate_unregister_ram(>mem, DEVICE(pfl));
diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c
index b13172c..5f10610 100644
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@@ -253,11 +253,11 @@ static void pflash_update(pflash_t *pfl, int offset,
 int offset_end;
 if (pfl->blk) {
 offset_end = offset + size;
-/* round to sectors */
-offset = offset >> 9;
-offset_end = (offset_end + 511) >> 9;
-blk_write(pfl->blk, offset, pfl->storage + (offset << 9),
-  offset_end - offset);
+/* widen to sector boundaries */
+offset = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE);
+offset_end = QEMU_ALIGN_UP(offset_end, BDRV_SECTOR_SIZE);
+blk_pwrite(pfl->blk, offset, pfl->storage + offset,
+   offset_end - offset, 0);
 }
 }

@@ -622,7 +622,7 @@ static void pflash_cfi02_realize(DeviceState *dev, Error 
**errp)
 pfl->chip_len = chip_len;
 if (pfl->blk) {
 /* read the initial flash content */
-ret = blk_read(pfl->blk, 0, pfl->storage, chip_len >> 9);
+ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len);
 if (ret < 0) {
 vmstate_unregister_ram(>orig_mem, DEVICE(pfl));
 error_setg(errp, "failed to read the initial flash content");
-- 
2.5.5




[Qemu-block] [PATCH v6 20/20] block: Kill unused sector-based blk_* functions

2016-05-04 Thread Eric Blake
Now that there are no remaining clients, we can drop the
sector-based blk_read(), blk_write(), blk_aio_readv(), and
blk_aio_writev().  Sadly, there are still remaining
sector-based interfaces, such as blk_*discard(), or
blk_write_compressed(); those will have to wait for another
day.

Signed-off-by: Eric Blake 
---
 include/sysemu/block-backend.h | 10 -
 block/block-backend.c  | 51 --
 2 files changed, 61 deletions(-)

diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 8d7839c..457efd9 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -90,12 +90,8 @@ void blk_attach_dev_nofail(BlockBackend *blk, void *dev);
 void blk_detach_dev(BlockBackend *blk, void *dev);
 void *blk_get_attached_dev(BlockBackend *blk);
 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
-int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors);
 int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
   int count);
-int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
-  int nb_sectors);
 int blk_write_zeroes(BlockBackend *blk, int64_t offset,
  int count, BdrvRequestFlags flags);
 BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset,
@@ -107,15 +103,9 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const 
void *buf, int count,
 int64_t blk_getlength(BlockBackend *blk);
 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
 int64_t blk_nb_sectors(BlockBackend *blk);
-BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
-  QEMUIOVector *iov, int nb_sectors,
-  BlockCompletionFunc *cb, void *opaque);
 BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
QEMUIOVector *iov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
-   QEMUIOVector *iov, int nb_sectors,
-   BlockCompletionFunc *cb, void *opaque);
 BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
 QEMUIOVector *iov, BdrvRequestFlags flags,
 BlockCompletionFunc *cb, void *opaque);
diff --git a/block/block-backend.c b/block/block-backend.c
index 104852d..0551be4 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -772,24 +772,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, 
uint8_t *buf,
 return rwco.ret;
 }

-static int blk_rw(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
-  int nb_sectors, CoroutineEntry co_entry,
-  BdrvRequestFlags flags)
-{
-if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-return -EINVAL;
-}
-
-return blk_prw(blk, sector_num << BDRV_SECTOR_BITS, buf,
-   nb_sectors << BDRV_SECTOR_BITS, co_entry, flags);
-}
-
-int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
-{
-return blk_rw(blk, sector_num, buf, nb_sectors, blk_read_entry, 0);
-}
-
 int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
   int count)
 {
@@ -807,13 +789,6 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t 
offset, uint8_t *buf,
 return ret;
 }

-int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
-  int nb_sectors)
-{
-return blk_rw(blk, sector_num, (uint8_t*) buf, nb_sectors,
-  blk_write_entry, 0);
-}
-
 int blk_write_zeroes(BlockBackend *blk, int64_t offset,
  int count, BdrvRequestFlags flags)
 {
@@ -985,19 +960,6 @@ int64_t blk_nb_sectors(BlockBackend *blk)
 return bdrv_nb_sectors(blk_bs(blk));
 }

-BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
-  QEMUIOVector *iov, int nb_sectors,
-  BlockCompletionFunc *cb, void *opaque)
-{
-if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
-}
-
-assert(nb_sectors << BDRV_SECTOR_BITS == iov->size);
-return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov,
-blk_aio_read_entry, 0, cb, opaque);
-}
-
 BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
QEMUIOVector *iov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
@@ -1006,19 +968,6 @@ BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t 
offset,
 blk_aio_read_entry, flags, cb, opaque);
 }

-BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
-  

[Qemu-block] [PATCH v6 19/20] qemu-io: Switch to byte-based block access

2016-05-04 Thread Eric Blake
qemu-io is the last user of several sector-based interfaces.
This patch upgrades to the new interfaces under the hood,
then deletes the resulting dead code.  Note that for maximum
back-compat, while the -p option is no longer required to get
blk_pread(), it is still needed to allow for unaligned access;
this is because qemu-iotest 23 relies on qemu-io rejecting
unaligned accesses without -p.  A later patch may clean up the
interface to be more user-friendly, but it's better to separate
what's done under the hood from what the user sees.

Signed-off-by: Eric Blake 
---
 qemu-io-cmds.c | 62 ++
 1 file changed, 10 insertions(+), 52 deletions(-)

diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index a3e3982..0bbbc72 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -419,40 +419,6 @@ fail:
 return buf;
 }

-static int do_read(BlockBackend *blk, char *buf, int64_t offset, int64_t count,
-   int64_t *total)
-{
-int ret;
-
-if (count >> 9 > INT_MAX) {
-return -ERANGE;
-}
-
-ret = blk_read(blk, offset >> 9, (uint8_t *)buf, count >> 9);
-if (ret < 0) {
-return ret;
-}
-*total = count;
-return 1;
-}
-
-static int do_write(BlockBackend *blk, char *buf, int64_t offset, int64_t 
count,
-int64_t *total)
-{
-int ret;
-
-if (count >> 9 > INT_MAX) {
-return -ERANGE;
-}
-
-ret = blk_write(blk, offset >> 9, (uint8_t *)buf, count >> 9);
-if (ret < 0) {
-return ret;
-}
-*total = count;
-return 1;
-}
-
 static int do_pread(BlockBackend *blk, char *buf, int64_t offset,
 int64_t count, int64_t *total)
 {
@@ -588,8 +554,7 @@ static int do_aio_readv(BlockBackend *blk, QEMUIOVector 
*qiov,
 {
 int async_ret = NOT_DONE;

-blk_aio_readv(blk, offset >> 9, qiov, qiov->size >> 9,
-  aio_rw_done, _ret);
+blk_aio_preadv(blk, offset, qiov, 0, aio_rw_done, _ret);
 while (async_ret == NOT_DONE) {
 main_loop_wait(false);
 }
@@ -603,8 +568,7 @@ static int do_aio_writev(BlockBackend *blk, QEMUIOVector 
*qiov,
 {
 int async_ret = NOT_DONE;

-blk_aio_writev(blk, offset >> 9, qiov, qiov->size >> 9,
-   aio_rw_done, _ret);
+blk_aio_pwritev(blk, offset, qiov, 0, aio_rw_done, _ret);
 while (async_ret == NOT_DONE) {
 main_loop_wait(false);
 }
@@ -670,7 +634,7 @@ static void read_help(void)
 " -b, -- read from the VM state rather than the virtual disk\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -l, -- length for pattern verification (only with -P)\n"
-" -p, -- use blk_pread to read the file\n"
+" -p, -- allow unaligned access\n"
 " -P, -- use a pattern to verify read data\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
 " -s, -- start offset for pattern verification (only with -P)\n"
@@ -805,12 +769,10 @@ static int read_f(BlockBackend *blk, int argc, char 
**argv)
 buf = qemu_io_alloc(blk, count, 0xab);

 gettimeofday(, NULL);
-if (pflag) {
-cnt = do_pread(blk, buf, offset, count, );
-} else if (bflag) {
+if (bflag) {
 cnt = do_load_vmstate(blk, buf, offset, count, );
 } else {
-cnt = do_read(blk, buf, offset, count, );
+cnt = do_pread(blk, buf, offset, count, );
 }
 gettimeofday(, NULL);

@@ -990,7 +952,7 @@ static void write_help(void)
 " filled with a set pattern (0xcdcdcdcd).\n"
 " -b, -- write to the VM state rather than the virtual disk\n"
 " -c, -- write compressed data with blk_write_compressed\n"
-" -p, -- use blk_pwrite to write the file\n"
+" -p, -- allow unaligned access\n"
 " -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
@@ -1106,16 +1068,14 @@ static int write_f(BlockBackend *blk, int argc, char 
**argv)
 }

 gettimeofday(, NULL);
-if (pflag) {
-cnt = do_pwrite(blk, buf, offset, count, );
-} else if (bflag) {
+if (bflag) {
 cnt = do_save_vmstate(blk, buf, offset, count, );
 } else if (zflag) {
 cnt = do_co_write_zeroes(blk, offset, count, );
 } else if (cflag) {
 cnt = do_write_compressed(blk, buf, offset, count, );
 } else {
-cnt = do_write(blk, buf, offset, count, );
+cnt = do_pwrite(blk, buf, offset, count, );
 }
 gettimeofday(, NULL);

@@ -1592,8 +1552,7 @@ static int aio_read_f(BlockBackend *blk, int argc, char 
**argv)
 gettimeofday(>t1, NULL);
 block_acct_start(blk_get_stats(blk), >acct, ctx->qiov.size,
  BLOCK_ACCT_READ);
-blk_aio_readv(blk, ctx->offset >> 9, >qiov,
-  ctx->qiov.size >> 9, aio_read_done, ctx);
+blk_aio_preadv(blk, ctx->offset, >qiov, 0, aio_read_done, ctx);
 return 0;
 }

@@ -1717,8 +1676,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char 
**argv)
   

[Qemu-block] [PATCH v6 11/20] nand: Switch to byte-based block access

2016-05-04 Thread Eric Blake
Sector-based blk_write() should die; switch to byte-based
blk_pwrite() instead.  Likewise for blk_read().

This file is doing some complex computations to map various
flash page sizes (256, 512, and 2048) atop generic uses of
512-byte sector operations.  Perhaps someone will want to tidy
up the file for fewer gymnastics in managing addresses and
offsets, and less wasteful visits of 256-byte pages, but it
was out of scope for this series, where I just went with the
mechanical conversion.

Signed-off-by: Eric Blake 

---
v5: fix missing edit
---
 hw/block/nand.c | 36 +++-
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/hw/block/nand.c b/hw/block/nand.c
index 29c6596..c69e675 100644
--- a/hw/block/nand.c
+++ b/hw/block/nand.c
@@ -663,7 +663,8 @@ static void glue(nand_blk_write_, PAGE_SIZE)(NANDFlashState 
*s)
 sector = SECTOR(s->addr);
 off = (s->addr & PAGE_MASK) + s->offset;
 soff = SECTOR_OFFSET(s->addr);
-if (blk_read(s->blk, sector, iobuf, PAGE_SECTORS) < 0) {
+if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS, iobuf,
+  PAGE_SECTORS << BDRV_SECTOR_BITS) < 0) {
 printf("%s: read error in sector %" PRIu64 "\n", __func__, sector);
 return;
 }
@@ -675,21 +676,24 @@ static void glue(nand_blk_write_, 
PAGE_SIZE)(NANDFlashState *s)
 MIN(OOB_SIZE, off + s->iolen - PAGE_SIZE));
 }

-if (blk_write(s->blk, sector, iobuf, PAGE_SECTORS) < 0) {
+if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS, iobuf,
+   PAGE_SECTORS << BDRV_SECTOR_BITS, 0) < 0) {
 printf("%s: write error in sector %" PRIu64 "\n", __func__, 
sector);
 }
 } else {
 off = PAGE_START(s->addr) + (s->addr & PAGE_MASK) + s->offset;
 sector = off >> 9;
 soff = off & 0x1ff;
-if (blk_read(s->blk, sector, iobuf, PAGE_SECTORS + 2) < 0) {
+if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS, iobuf,
+  (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS) < 0) {
 printf("%s: read error in sector %" PRIu64 "\n", __func__, sector);
 return;
 }

 mem_and(iobuf + soff, s->io, s->iolen);

-if (blk_write(s->blk, sector, iobuf, PAGE_SECTORS + 2) < 0) {
+if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS, iobuf,
+   (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, 0) < 0) {
 printf("%s: write error in sector %" PRIu64 "\n", __func__, 
sector);
 }
 }
@@ -716,17 +720,20 @@ static void glue(nand_blk_erase_, 
PAGE_SIZE)(NANDFlashState *s)
 i = SECTOR(addr);
 page = SECTOR(addr + (1 << (ADDR_SHIFT + s->erase_shift)));
 for (; i < page; i ++)
-if (blk_write(s->blk, i, iobuf, 1) < 0) {
+if (blk_pwrite(s->blk, i << BDRV_SECTOR_BITS, iobuf,
+   BDRV_SECTOR_SIZE, 0) < 0) {
 printf("%s: write error in sector %" PRIu64 "\n", __func__, i);
 }
 } else {
 addr = PAGE_START(addr);
 page = addr >> 9;
-if (blk_read(s->blk, page, iobuf, 1) < 0) {
+if (blk_pread(s->blk, page << BDRV_SECTOR_BITS, iobuf,
+  BDRV_SECTOR_SIZE) < 0) {
 printf("%s: read error in sector %" PRIu64 "\n", __func__, page);
 }
 memset(iobuf + (addr & 0x1ff), 0xff, (~addr & 0x1ff) + 1);
-if (blk_write(s->blk, page, iobuf, 1) < 0) {
+if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS, iobuf,
+   BDRV_SECTOR_SIZE, 0) < 0) {
 printf("%s: write error in sector %" PRIu64 "\n", __func__, page);
 }

@@ -734,18 +741,20 @@ static void glue(nand_blk_erase_, 
PAGE_SIZE)(NANDFlashState *s)
 i = (addr & ~0x1ff) + 0x200;
 for (addr += ((PAGE_SIZE + OOB_SIZE) << s->erase_shift) - 0x200;
 i < addr; i += 0x200) {
-if (blk_write(s->blk, i >> 9, iobuf, 1) < 0) {
+if (blk_pwrite(s->blk, i, iobuf, BDRV_SECTOR_SIZE, 0) < 0) {
 printf("%s: write error in sector %" PRIu64 "\n",
__func__, i >> 9);
 }
 }

 page = i >> 9;
-if (blk_read(s->blk, page, iobuf, 1) < 0) {
+if (blk_pread(s->blk, page << BDRV_SECTOR_BITS, iobuf,
+  BDRV_SECTOR_SIZE) < 0) {
 printf("%s: read error in sector %" PRIu64 "\n", __func__, page);
 }
 memset(iobuf, 0xff, ((addr - 1) & 0x1ff) + 1);
-if (blk_write(s->blk, page, iobuf, 1) < 0) {
+if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS, iobuf,
+   BDRV_SECTOR_SIZE, 0) < 0) {
 printf("%s: write error in sector %" PRIu64 "\n", __func__, page);
 }
 }
@@ -760,7 +769,8 @@ static void glue(nand_blk_load_, PAGE_SIZE)(NANDFlashState 
*s,

 if (s->blk) {
 

[Qemu-block] [PATCH v6 18/20] qemu-img: Switch to byte-based block access

2016-05-04 Thread Eric Blake
Sector-based blk_write() should die; switch to byte-based
blk_pwrite() instead.  Likewise for blk_read().

Signed-off-by: Eric Blake 
---
 qemu-img.c | 28 +++-
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 76430a8..491a460 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1088,7 +1088,8 @@ static int check_empty_sectors(BlockBackend *blk, int64_t 
sect_num,
uint8_t *buffer, bool quiet)
 {
 int pnum, ret = 0;
-ret = blk_read(blk, sect_num, buffer, sect_count);
+ret = blk_pread(blk, sect_num << BDRV_SECTOR_BITS, buffer,
+sect_count << BDRV_SECTOR_BITS);
 if (ret < 0) {
 error_report("Error while reading offset %" PRId64 " of %s: %s",
  sectors_to_bytes(sect_num), filename, strerror(-ret));
@@ -1301,7 +1302,8 @@ static int img_compare(int argc, char **argv)
 nb_sectors = MIN(pnum1, pnum2);
 } else if (allocated1 == allocated2) {
 if (allocated1) {
-ret = blk_read(blk1, sector_num, buf1, nb_sectors);
+ret = blk_pread(blk1, sector_num << BDRV_SECTOR_BITS, buf1,
+nb_sectors << BDRV_SECTOR_BITS);
 if (ret < 0) {
 error_report("Error while reading offset %" PRId64 " of 
%s:"
  " %s", sectors_to_bytes(sector_num), 
filename1,
@@ -1309,7 +1311,8 @@ static int img_compare(int argc, char **argv)
 ret = 4;
 goto out;
 }
-ret = blk_read(blk2, sector_num, buf2, nb_sectors);
+ret = blk_pread(blk2, sector_num << BDRV_SECTOR_BITS, buf2,
+nb_sectors << BDRV_SECTOR_BITS);
 if (ret < 0) {
 error_report("Error while reading offset %" PRId64
  " of %s: %s", sectors_to_bytes(sector_num),
@@ -1522,7 +1525,9 @@ static int convert_read(ImgConvertState *s, int64_t 
sector_num, int nb_sectors,
 bs_sectors = s->src_sectors[s->src_cur];

 n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset));
-ret = blk_read(blk, sector_num - s->src_cur_offset, buf, n);
+ret = blk_pread(blk,
+(sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS,
+buf, n << BDRV_SECTOR_BITS);
 if (ret < 0) {
 return ret;
 }
@@ -1577,7 +1582,8 @@ static int convert_write(ImgConvertState *s, int64_t 
sector_num, int nb_sectors,
 if (!s->min_sparse ||
 is_allocated_sectors_min(buf, n, , s->min_sparse))
 {
-ret = blk_write(s->target, sector_num, buf, n);
+ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
+ buf, n << BDRV_SECTOR_BITS, 0);
 if (ret < 0) {
 return ret;
 }
@@ -3024,7 +3030,8 @@ static int img_rebase(int argc, char **argv)
 n = old_backing_num_sectors - sector;
 }

-ret = blk_read(blk_old_backing, sector, buf_old, n);
+ret = blk_pread(blk_old_backing, sector << BDRV_SECTOR_BITS,
+buf_old, n << BDRV_SECTOR_BITS);
 if (ret < 0) {
 error_report("error while reading from old backing file");
 goto out;
@@ -3038,7 +3045,8 @@ static int img_rebase(int argc, char **argv)
 n = new_backing_num_sectors - sector;
 }

-ret = blk_read(blk_new_backing, sector, buf_new, n);
+ret = blk_pread(blk_new_backing, sector << BDRV_SECTOR_BITS,
+buf_new, n << BDRV_SECTOR_BITS);
 if (ret < 0) {
 error_report("error while reading from new backing file");
 goto out;
@@ -3054,8 +3062,10 @@ static int img_rebase(int argc, char **argv)
 if (compare_sectors(buf_old + written * 512,
 buf_new + written * 512, n - written, ))
 {
-ret = blk_write(blk, sector + written,
-buf_old + written * 512, pnum);
+ret = blk_pwrite(blk,
+ (sector + written) << BDRV_SECTOR_BITS,
+ buf_old + written * 512,
+ pnum << BDRV_SECTOR_BITS, 0);
 if (ret < 0) {
 error_report("Error while writing to COW image: %s",
 strerror(-ret));
-- 
2.5.5




[Qemu-block] [PATCH v6 10/20] fdc: Switch to byte-based block access

2016-05-04 Thread Eric Blake
Sector-based blk_write() should die; switch to byte-based
blk_pwrite() instead.  Likewise for blk_read().

Signed-off-by: Eric Blake 
---
 hw/block/fdc.c | 25 +
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 3722275..f73af7d 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -223,6 +223,13 @@ static int fd_sector(FDrive *drv)
   NUM_SIDES(drv));
 }

+/* Returns current position, in bytes, for given drive */
+static int fd_offset(FDrive *drv)
+{
+g_assert(fd_sector(drv) < INT_MAX >> BDRV_SECTOR_BITS);
+return fd_sector(drv) << BDRV_SECTOR_BITS;
+}
+
 /* Seek to a new position:
  * returns 0 if already on right track
  * returns 1 if track changed
@@ -1629,8 +1636,8 @@ static int fdctrl_transfer_handler (void *opaque, int 
nchan,
 if (fdctrl->data_dir != FD_DIR_WRITE ||
 len < FD_SECTOR_LEN || rel_pos != 0) {
 /* READ & SCAN commands and realign to a sector for WRITE */
-if (blk_read(cur_drv->blk, fd_sector(cur_drv),
- fdctrl->fifo, 1) < 0) {
+if (blk_pread(cur_drv->blk, fd_offset(cur_drv),
+  fdctrl->fifo, BDRV_SECTOR_SIZE) < 0) {
 FLOPPY_DPRINTF("Floppy: error getting sector %d\n",
fd_sector(cur_drv));
 /* Sure, image size is too small... */
@@ -1657,8 +1664,8 @@ static int fdctrl_transfer_handler (void *opaque, int 
nchan,

 k->read_memory(fdctrl->dma, nchan, fdctrl->fifo + rel_pos,
fdctrl->data_pos, len);
-if (blk_write(cur_drv->blk, fd_sector(cur_drv),
-  fdctrl->fifo, 1) < 0) {
+if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv),
+   fdctrl->fifo, BDRV_SECTOR_SIZE, 0) < 0) {
 FLOPPY_DPRINTF("error writing sector %d\n",
fd_sector(cur_drv));
 fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM | FD_SR0_SEEK, 
0x00, 0x00);
@@ -1741,7 +1748,8 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl)
fd_sector(cur_drv));
 return 0;
 }
-if (blk_read(cur_drv->blk, fd_sector(cur_drv), fdctrl->fifo, 1)
+if (blk_pread(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo,
+  BDRV_SECTOR_SIZE)
 < 0) {
 FLOPPY_DPRINTF("error getting sector %d\n",
fd_sector(cur_drv));
@@ -1820,7 +1828,8 @@ static void fdctrl_format_sector(FDCtrl *fdctrl)
 }
 memset(fdctrl->fifo, 0, FD_SECTOR_LEN);
 if (cur_drv->blk == NULL ||
-blk_write(cur_drv->blk, fd_sector(cur_drv), fdctrl->fifo, 1) < 0) {
+blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo,
+   BDRV_SECTOR_SIZE, 0) < 0) {
 FLOPPY_DPRINTF("error formatting sector %d\n", fd_sector(cur_drv));
 fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM | FD_SR0_SEEK, 0x00, 0x00);
 } else {
@@ -2243,8 +2252,8 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t 
value)
 if (pos == FD_SECTOR_LEN - 1 ||
 fdctrl->data_pos == fdctrl->data_len) {
 cur_drv = get_cur_drv(fdctrl);
-if (blk_write(cur_drv->blk, fd_sector(cur_drv), fdctrl->fifo, 1)
-< 0) {
+if (blk_pwrite(cur_drv->blk, fd_offset(cur_drv), fdctrl->fifo,
+   BDRV_SECTOR_SIZE, 0) < 0) {
 FLOPPY_DPRINTF("error writing sector %d\n",
fd_sector(cur_drv));
 break;
-- 
2.5.5




[Qemu-block] [PATCH v6 12/20] onenand: Switch to byte-based block access

2016-05-04 Thread Eric Blake
Sector-based blk_write() should die; switch to byte-based
blk_pwrite() instead.  Likewise for blk_read().

This particular device picks its size during onenand_initfn(),
and can be at most 0x8000 bytes; therefore, shifting an
'int sec' request to get back to a byte offset should never
overflow 32 bits.  But adding assertions to document that point
should not hurt.

Signed-off-by: Eric Blake 

---
v5: compile-tested, add asserts that size never exceeds 0x8000,
so that we can consistently use uint32_t
---
 hw/block/onenand.c | 41 +++--
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/hw/block/onenand.c b/hw/block/onenand.c
index 883f4b1..8d84227 100644
--- a/hw/block/onenand.c
+++ b/hw/block/onenand.c
@@ -224,7 +224,8 @@ static void onenand_reset(OneNANDState *s, int cold)
 /* Lock the whole flash */
 memset(s->blockwp, ONEN_LOCK_LOCKED, s->blocks);

-if (s->blk_cur && blk_read(s->blk_cur, 0, s->boot[0], 8) < 0) {
+if (s->blk_cur && blk_pread(s->blk_cur, 0, s->boot[0],
+8 << BDRV_SECTOR_BITS) < 0) {
 hw_error("%s: Loading the BootRAM failed.\n", __func__);
 }
 }
@@ -240,8 +241,11 @@ static void onenand_system_reset(DeviceState *dev)
 static inline int onenand_load_main(OneNANDState *s, int sec, int secn,
 void *dest)
 {
+assert(UINT32_MAX >> BDRV_SECTOR_BITS > sec);
+assert(UINT32_MAX >> BDRV_SECTOR_BITS > secn);
 if (s->blk_cur) {
-return blk_read(s->blk_cur, sec, dest, secn) < 0;
+return blk_pread(s->blk_cur, sec << BDRV_SECTOR_BITS, dest,
+ secn << BDRV_SECTOR_BITS) < 0;
 } else if (sec + secn > s->secs_cur) {
 return 1;
 }
@@ -257,19 +261,22 @@ static inline int onenand_prog_main(OneNANDState *s, int 
sec, int secn,
 int result = 0;

 if (secn > 0) {
-uint32_t size = (uint32_t)secn * 512;
+uint32_t size = secn << BDRV_SECTOR_BITS;
+uint32_t offset = sec << BDRV_SECTOR_BITS;
+assert(UINT32_MAX >> BDRV_SECTOR_BITS > sec);
+assert(UINT32_MAX >> BDRV_SECTOR_BITS > secn);
 const uint8_t *sp = (const uint8_t *)src;
 uint8_t *dp = 0;
 if (s->blk_cur) {
 dp = g_malloc(size);
-if (!dp || blk_read(s->blk_cur, sec, dp, secn) < 0) {
+if (!dp || blk_pread(s->blk_cur, offset, dp, size) < 0) {
 result = 1;
 }
 } else {
 if (sec + secn > s->secs_cur) {
 result = 1;
 } else {
-dp = (uint8_t *)s->current + (sec << 9);
+dp = (uint8_t *)s->current + offset;
 }
 }
 if (!result) {
@@ -278,7 +285,7 @@ static inline int onenand_prog_main(OneNANDState *s, int 
sec, int secn,
 dp[i] &= sp[i];
 }
 if (s->blk_cur) {
-result = blk_write(s->blk_cur, sec, dp, secn) < 0;
+result = blk_pwrite(s->blk_cur, offset, dp, size, 0) < 0;
 }
 }
 if (dp && s->blk_cur) {
@@ -295,7 +302,8 @@ static inline int onenand_load_spare(OneNANDState *s, int 
sec, int secn,
 uint8_t buf[512];

 if (s->blk_cur) {
-if (blk_read(s->blk_cur, s->secs_cur + (sec >> 5), buf, 1) < 0) {
+uint32_t offset = (s->secs_cur + (sec >> 5)) << BDRV_SECTOR_BITS;
+if (blk_pread(s->blk_cur, offset, buf, BDRV_SECTOR_SIZE) < 0) {
 return 1;
 }
 memcpy(dest, buf + ((sec & 31) << 4), secn << 4);
@@ -304,7 +312,7 @@ static inline int onenand_load_spare(OneNANDState *s, int 
sec, int secn,
 } else {
 memcpy(dest, s->current + (s->secs_cur << 9) + (sec << 4), secn << 4);
 }
- 
+
 return 0;
 }

@@ -315,10 +323,12 @@ static inline int onenand_prog_spare(OneNANDState *s, int 
sec, int secn,
 if (secn > 0) {
 const uint8_t *sp = (const uint8_t *)src;
 uint8_t *dp = 0, *dpp = 0;
+uint32_t offset = (s->secs_cur + (sec >> 5)) << BDRV_SECTOR_BITS;
+assert(UINT32_MAX >> BDRV_SECTOR_BITS > s->secs_cur + (sec >> 5));
 if (s->blk_cur) {
 dp = g_malloc(512);
 if (!dp
-|| blk_read(s->blk_cur, s->secs_cur + (sec >> 5), dp, 1) < 0) {
+|| blk_pread(s->blk_cur, offset, dp, BDRV_SECTOR_SIZE) < 0) {
 result = 1;
 } else {
 dpp = dp + ((sec & 31) << 4);
@@ -336,8 +346,8 @@ static inline int onenand_prog_spare(OneNANDState *s, int 
sec, int secn,
 dpp[i] &= sp[i];
 }
 if (s->blk_cur) {
-result = blk_write(s->blk_cur, s->secs_cur + (sec >> 5),
-   dp, 1) < 0;
+result = blk_pwrite(s->blk_cur, offset, dp,
+BDRV_SECTOR_SIZE, 0) < 0;
 }
 }

[Qemu-block] [PATCH v6 16/20] atapi: Switch to byte-based block access

2016-05-04 Thread Eric Blake
Sector-based blk_read() should die; switch to byte-based
blk_pread() instead.

Add new defines ATAPI_SECTOR_BITS and ATAPI_SECTOR_SIZE to
use anywhere we were previously scaling BDRV_SECTOR_* by 4,
for better legibility.

Signed-off-by: Eric Blake 

---
v4: add new defines for use in more places [jsnow]
---
 hw/ide/atapi.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 2bb606c..95056d9 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -28,6 +28,9 @@
 #include "hw/scsi/scsi.h"
 #include "sysemu/block-backend.h"

+#define ATAPI_SECTOR_BITS (2 + BDRV_SECTOR_BITS)
+#define ATAPI_SECTOR_SIZE (1 << ATAPI_SECTOR_BITS)
+
 static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret);

 static void padstr8(uint8_t *buf, int buf_size, const char *src)
@@ -111,7 +114,7 @@ cd_read_sector_sync(IDEState *s)
 {
 int ret;
 block_acct_start(blk_get_stats(s->blk), >acct,
- 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
+ ATAPI_SECTOR_SIZE, BLOCK_ACCT_READ);

 #ifdef DEBUG_IDE_ATAPI
 printf("cd_read_sector_sync: lba=%d\n", s->lba);
@@ -119,12 +122,12 @@ cd_read_sector_sync(IDEState *s)

 switch (s->cd_sector_size) {
 case 2048:
-ret = blk_read(s->blk, (int64_t)s->lba << 2,
-   s->io_buffer, 4);
+ret = blk_pread(s->blk, (int64_t)s->lba << ATAPI_SECTOR_BITS,
+s->io_buffer, ATAPI_SECTOR_SIZE);
 break;
 case 2352:
-ret = blk_read(s->blk, (int64_t)s->lba << 2,
-   s->io_buffer + 16, 4);
+ret = blk_pread(s->blk, (int64_t)s->lba << ATAPI_SECTOR_BITS,
+s->io_buffer + 16, ATAPI_SECTOR_SIZE);
 if (ret >= 0) {
 cd_data_to_raw(s->io_buffer, s->lba);
 }
@@ -182,7 +185,7 @@ static int cd_read_sector(IDEState *s)
 s->iov.iov_base = (s->cd_sector_size == 2352) ?
   s->io_buffer + 16 : s->io_buffer;

-s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
+s->iov.iov_len = ATAPI_SECTOR_SIZE;
 qemu_iovec_init_external(>qiov, >iov, 1);

 #ifdef DEBUG_IDE_ATAPI
@@ -190,7 +193,7 @@ static int cd_read_sector(IDEState *s)
 #endif

 block_acct_start(blk_get_stats(s->blk), >acct,
- 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
+ ATAPI_SECTOR_SIZE, BLOCK_ACCT_READ);

 ide_buffered_readv(s, (int64_t)s->lba << 2, >qiov, 4,
cd_read_sector_cb, s);
@@ -435,7 +438,7 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
 #endif

 s->bus->dma->iov.iov_base = (void *)(s->io_buffer + data_offset);
-s->bus->dma->iov.iov_len = n * 4 * 512;
+s->bus->dma->iov.iov_len = n * ATAPI_SECTOR_SIZE;
 qemu_iovec_init_external(>bus->dma->qiov, >bus->dma->iov, 1);

 s->bus->dma->aiocb = ide_buffered_readv(s, (int64_t)s->lba << 2,
-- 
2.5.5




[Qemu-block] [PATCH v6 15/20] m25p80: Switch to byte-based block access

2016-05-04 Thread Eric Blake
Sector-based blk_read() should die; switch to byte-based
blk_pread() instead.

Likewise for blk_aio_readv() and blk_aio_writev().

Signed-off-by: Eric Blake 
---
 hw/block/m25p80.c | 23 +++
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 906b712..5d30863 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -358,25 +358,21 @@ static void blk_sync_complete(void *opaque, int ret)

 static void flash_sync_page(Flash *s, int page)
 {
-int blk_sector, nb_sectors;
 QEMUIOVector iov;

 if (!s->blk || blk_is_read_only(s->blk)) {
 return;
 }

-blk_sector = (page * s->pi->page_size) / BDRV_SECTOR_SIZE;
-nb_sectors = DIV_ROUND_UP(s->pi->page_size, BDRV_SECTOR_SIZE);
 qemu_iovec_init(, 1);
-qemu_iovec_add(, s->storage + blk_sector * BDRV_SECTOR_SIZE,
-   nb_sectors * BDRV_SECTOR_SIZE);
-blk_aio_writev(s->blk, blk_sector, , nb_sectors, blk_sync_complete,
-   NULL);
+qemu_iovec_add(, s->storage + page * s->pi->page_size,
+   s->pi->page_size);
+blk_aio_pwritev(s->blk, page * s->pi->page_size, , 0,
+blk_sync_complete, NULL);
 }

 static inline void flash_sync_area(Flash *s, int64_t off, int64_t len)
 {
-int64_t start, end, nb_sectors;
 QEMUIOVector iov;

 if (!s->blk || blk_is_read_only(s->blk)) {
@@ -384,13 +380,9 @@ static inline void flash_sync_area(Flash *s, int64_t off, 
int64_t len)
 }

 assert(!(len % BDRV_SECTOR_SIZE));
-start = off / BDRV_SECTOR_SIZE;
-end = (off + len) / BDRV_SECTOR_SIZE;
-nb_sectors = end - start;
 qemu_iovec_init(, 1);
-qemu_iovec_add(, s->storage + (start * BDRV_SECTOR_SIZE),
-nb_sectors * BDRV_SECTOR_SIZE);
-blk_aio_writev(s->blk, start, , nb_sectors, blk_sync_complete, NULL);
+qemu_iovec_add(, s->storage + off, len);
+blk_aio_pwritev(s->blk, off, , 0, blk_sync_complete, NULL);
 }

 static void flash_erase(Flash *s, int offset, FlashCMD cmd)
@@ -907,8 +899,7 @@ static int m25p80_init(SSISlave *ss)
 s->storage = blk_blockalign(s->blk, s->size);

 /* FIXME: Move to late init */
-if (blk_read(s->blk, 0, s->storage,
- DIV_ROUND_UP(s->size, BDRV_SECTOR_SIZE))) {
+if (blk_pread(s->blk, 0, s->storage, s->size)) {
 fprintf(stderr, "Failed to initialize SPI flash!\n");
 return 1;
 }
-- 
2.5.5




[Qemu-block] [PATCH v6 14/20] sd: Switch to byte-based block access

2016-05-04 Thread Eric Blake
Sector-based blk_write() should die; switch to byte-based
blk_pwrite() instead.  Likewise for blk_read().

Greatly simplifies the code, now that we let the block layer
take care of alignment and read-modify-write on our behalf :)
In fact, we no longer need to include 'buf' in the migration
stream (although we do have to ensure that the stream remains
compatible).

Signed-off-by: Eric Blake 

---
v5: fix bug in sd_blk_write, drop sd->buf
---
 hw/sd/sd.c | 51 ---
 1 file changed, 4 insertions(+), 47 deletions(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index b66e5d2..87e3d23 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -123,7 +123,6 @@ struct SDState {
 qemu_irq readonly_cb;
 qemu_irq inserted_cb;
 BlockBackend *blk;
-uint8_t *buf;

 bool enable;
 };
@@ -551,7 +550,7 @@ static const VMStateDescription sd_vmstate = {
 VMSTATE_UINT64(data_start, SDState),
 VMSTATE_UINT32(data_offset, SDState),
 VMSTATE_UINT8_ARRAY(data, SDState, 512),
-VMSTATE_BUFFER_POINTER_UNSAFE(buf, SDState, 1, 512),
+VMSTATE_UNUSED_V(1, 512),
 VMSTATE_BOOL(enable, SDState),
 VMSTATE_END_OF_LIST()
 },
@@ -1577,57 +1576,17 @@ send_response:

 static void sd_blk_read(SDState *sd, uint64_t addr, uint32_t len)
 {
-uint64_t end = addr + len;
-
 DPRINTF("sd_blk_read: addr = 0x%08llx, len = %d\n",
 (unsigned long long) addr, len);
-if (!sd->blk || blk_read(sd->blk, addr >> 9, sd->buf, 1) < 0) {
+if (!sd->blk || blk_pread(sd->blk, addr, sd->data, len) < 0) {
 fprintf(stderr, "sd_blk_read: read error on host side\n");
-return;
 }
-
-if (end > (addr & ~511) + 512) {
-memcpy(sd->data, sd->buf + (addr & 511), 512 - (addr & 511));
-
-if (blk_read(sd->blk, end >> 9, sd->buf, 1) < 0) {
-fprintf(stderr, "sd_blk_read: read error on host side\n");
-return;
-}
-memcpy(sd->data + 512 - (addr & 511), sd->buf, end & 511);
-} else
-memcpy(sd->data, sd->buf + (addr & 511), len);
 }

 static void sd_blk_write(SDState *sd, uint64_t addr, uint32_t len)
 {
-uint64_t end = addr + len;
-
-if ((addr & 511) || len < 512)
-if (!sd->blk || blk_read(sd->blk, addr >> 9, sd->buf, 1) < 0) {
-fprintf(stderr, "sd_blk_write: read error on host side\n");
-return;
-}
-
-if (end > (addr & ~511) + 512) {
-memcpy(sd->buf + (addr & 511), sd->data, 512 - (addr & 511));
-if (blk_write(sd->blk, addr >> 9, sd->buf, 1) < 0) {
-fprintf(stderr, "sd_blk_write: write error on host side\n");
-return;
-}
-
-if (blk_read(sd->blk, end >> 9, sd->buf, 1) < 0) {
-fprintf(stderr, "sd_blk_write: read error on host side\n");
-return;
-}
-memcpy(sd->buf, sd->data + 512 - (addr & 511), end & 511);
-if (blk_write(sd->blk, end >> 9, sd->buf, 1) < 0) {
-fprintf(stderr, "sd_blk_write: write error on host side\n");
-}
-} else {
-memcpy(sd->buf + (addr & 511), sd->data, len);
-if (!sd->blk || blk_write(sd->blk, addr >> 9, sd->buf, 1) < 0) {
-fprintf(stderr, "sd_blk_write: write error on host side\n");
-}
+if (!sd->blk || blk_pwrite(sd->blk, addr, sd->data, len, 0) < 0) {
+fprintf(stderr, "sd_blk_write: write error on host side\n");
 }
 }

@@ -1925,8 +1884,6 @@ static void sd_realize(DeviceState *dev, Error **errp)
 return;
 }

-sd->buf = blk_blockalign(sd->blk, 512);
-
 if (sd->blk) {
 blk_set_dev_ops(sd->blk, _block_ops, sd);
 }
-- 
2.5.5




[Qemu-block] [PATCH v6 08/20] virtio: Switch to byte-based aio block access

2016-05-04 Thread Eric Blake
Sector-based blk_aio_readv() and blk_aio_writev() should die; switch
to byte-based blk_aio_preadv() and blk_aio_pwritev() instead.

The trace is modified at the same time, and nb_sectors is now
unused.  Fix a comment typo while in the vicinity.

Signed-off-by: Eric Blake 
---
 hw/block/virtio-blk.c | 18 --
 trace-events  |  2 +-
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 3f88f8c..284e646 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -322,7 +322,6 @@ static inline void submit_requests(BlockBackend *blk, 
MultiReqBuffer *mrb,
 {
 QEMUIOVector *qiov = >reqs[start]->qiov;
 int64_t sector_num = mrb->reqs[start]->sector_num;
-int nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE;
 bool is_write = mrb->is_write;

 if (num_reqs > 1) {
@@ -331,7 +330,7 @@ static inline void submit_requests(BlockBackend *blk, 
MultiReqBuffer *mrb,
 int tmp_niov = qiov->niov;

 /* mrb->reqs[start]->qiov was initialized from external so we can't
- * modifiy it here. We need to initialize it locally and then add the
+ * modify it here. We need to initialize it locally and then add the
  * external iovecs. */
 qemu_iovec_init(qiov, niov);

@@ -343,23 +342,22 @@ static inline void submit_requests(BlockBackend *blk, 
MultiReqBuffer *mrb,
 qemu_iovec_concat(qiov, >reqs[i]->qiov, 0,
   mrb->reqs[i]->qiov.size);
 mrb->reqs[i - 1]->mr_next = mrb->reqs[i];
-nb_sectors += mrb->reqs[i]->qiov.size / BDRV_SECTOR_SIZE;
 }
-assert(nb_sectors == qiov->size / BDRV_SECTOR_SIZE);

-trace_virtio_blk_submit_multireq(mrb, start, num_reqs, sector_num,
- nb_sectors, is_write);
+trace_virtio_blk_submit_multireq(mrb, start, num_reqs,
+ sector_num << BDRV_SECTOR_BITS,
+ qiov->size, is_write);
 block_acct_merge_done(blk_get_stats(blk),
   is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ,
   num_reqs - 1);
 }

 if (is_write) {
-blk_aio_writev(blk, sector_num, qiov, nb_sectors,
+blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
+virtio_blk_rw_complete, mrb->reqs[start]);
+} else {
+blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
virtio_blk_rw_complete, mrb->reqs[start]);
-} else {
-blk_aio_readv(blk, sector_num, qiov, nb_sectors,
-  virtio_blk_rw_complete, mrb->reqs[start]);
 }
 }

diff --git a/trace-events b/trace-events
index b4acd2a..b588091 100644
--- a/trace-events
+++ b/trace-events
@@ -118,7 +118,7 @@ virtio_blk_req_complete(void *req, int status) "req %p 
status %d"
 virtio_blk_rw_complete(void *req, int ret) "req %p ret %d"
 virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p 
sector %"PRIu64" nsectors %zu"
 virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p 
sector %"PRIu64" nsectors %zu"
-virtio_blk_submit_multireq(void *mrb, int start, int num_reqs, uint64_t 
sector, size_t nsectors, bool is_write) "mrb %p start %d num_reqs %d sector 
%"PRIu64" nsectors %zu is_write %d"
+virtio_blk_submit_multireq(void *mrb, int start, int num_reqs, uint64_t 
offset, size_t size, bool is_write) "mrb %p start %d num_reqs %d offset 
%"PRIu64" size %zu is_write %d"

 # hw/block/dataplane/virtio-blk.c
 virtio_blk_data_plane_start(void *s) "dataplane %p"
-- 
2.5.5




[Qemu-block] [PATCH v6 01/20] block: Allow BDRV_REQ_FUA through blk_pwrite()

2016-05-04 Thread Eric Blake
We have several block drivers that understand BDRV_REQ_FUA,
and emulate it in the block layer for the rest by a full flush.
But without a way to actually request BDRV_REQ_FUA during a
pass-through blk_pwrite(), FUA-aware block drivers like NBD are
forced to repeat the emulation logic of a full flush regardless
of whether the backend they are writing to could do it more
efficiently.

This patch just wires up a flags argument; followup patches
will actually make use of it in the NBD driver and in qemu-io.

Signed-off-by: Eric Blake 
Acked-by: Denis V. Lunev 
---
 include/sysemu/block-backend.h |  3 ++-
 block/block-backend.c  |  6 --
 block/crypto.c |  2 +-
 block/parallels.c  |  2 +-
 block/qcow.c   |  8 
 block/qcow2.c  |  4 ++--
 block/qed.c|  6 +++---
 block/sheepdog.c   |  2 +-
 block/vdi.c|  4 ++--
 block/vhdx.c   |  5 +++--
 block/vmdk.c   | 10 +-
 block/vpc.c| 10 +-
 hw/nvram/spapr_nvram.c |  4 ++--
 nbd/server.c   |  2 +-
 qemu-io-cmds.c |  2 +-
 15 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index c62b6fe..6991b26 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -102,7 +102,8 @@ BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t 
sector_num,
  int nb_sectors, BdrvRequestFlags flags,
  BlockCompletionFunc *cb, void *opaque);
 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count);
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count);
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
+   BdrvRequestFlags flags);
 int64_t blk_getlength(BlockBackend *blk);
 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
 int64_t blk_nb_sectors(BlockBackend *blk);
diff --git a/block/block-backend.c b/block/block-backend.c
index a7623e8..96c1d7c 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -953,9 +953,11 @@ int blk_pread(BlockBackend *blk, int64_t offset, void 
*buf, int count)
 return count;
 }

-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
+   BdrvRequestFlags flags)
 {
-int ret = blk_prw(blk, offset, (void*) buf, count, blk_write_entry, 0);
+int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
+  flags);
 if (ret < 0) {
 return ret;
 }
diff --git a/block/crypto.c b/block/crypto.c
index 1903e84..32ba17c 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -91,7 +91,7 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
 struct BlockCryptoCreateData *data = opaque;
 ssize_t ret;

-ret = blk_pwrite(data->blk, offset, buf, buflen);
+ret = blk_pwrite(data->blk, offset, buf, buflen, 0);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "Could not write encryption header");
 return ret;
diff --git a/block/parallels.c b/block/parallels.c
index 324ed43..2d8bc87 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -512,7 +512,7 @@ static int parallels_create(const char *filename, QemuOpts 
*opts, Error **errp)
 memset(tmp, 0, sizeof(tmp));
 memcpy(tmp, , sizeof(header));

-ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
+ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE, 0);
 if (ret < 0) {
 goto exit;
 }
diff --git a/block/qcow.c b/block/qcow.c
index 60ddb12..d6dc1b0 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -853,14 +853,14 @@ static int qcow_create(const char *filename, QemuOpts 
*opts, Error **errp)
 }

 /* write all the data */
-ret = blk_pwrite(qcow_blk, 0, , sizeof(header));
+ret = blk_pwrite(qcow_blk, 0, , sizeof(header), 0);
 if (ret != sizeof(header)) {
 goto exit;
 }

 if (backing_file) {
 ret = blk_pwrite(qcow_blk, sizeof(header),
-backing_file, backing_filename_len);
+ backing_file, backing_filename_len, 0);
 if (ret != backing_filename_len) {
 goto exit;
 }
@@ -869,8 +869,8 @@ static int qcow_create(const char *filename, QemuOpts 
*opts, Error **errp)
 tmp = g_malloc0(BDRV_SECTOR_SIZE);
 for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
 BDRV_SECTOR_SIZE); i++) {
-ret = blk_pwrite(qcow_blk, header_size +
-BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
+ret = blk_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE * i,
+ tmp, BDRV_SECTOR_SIZE, 0);
 if (ret != BDRV_SECTOR_SIZE) 

[Qemu-block] [PATCH v6 03/20] block: Switch blk_read_unthrottled() to byte interface

2016-05-04 Thread Eric Blake
Sector-based blk_read() should die; convert the one-off
variant blk_read_unthrottled().

Signed-off-by: Eric Blake 
---
 include/sysemu/block-backend.h | 4 ++--
 block/block-backend.c  | 8 
 hw/block/hd-geometry.c | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 6991b26..662a106 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -92,8 +92,8 @@ void *blk_get_attached_dev(BlockBackend *blk);
 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
 int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
  int nb_sectors);
-int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors);
+int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
+  int count);
 int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
   int nb_sectors);
 int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
diff --git a/block/block-backend.c b/block/block-backend.c
index 96c1d7c..e5a8a07 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -790,19 +790,19 @@ int blk_read(BlockBackend *blk, int64_t sector_num, 
uint8_t *buf,
 return blk_rw(blk, sector_num, buf, nb_sectors, blk_read_entry, 0);
 }

-int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
+int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
+  int count)
 {
 BlockDriverState *bs = blk_bs(blk);
 int ret;

-ret = blk_check_request(blk, sector_num, nb_sectors);
+ret = blk_check_byte_request(blk, offset, count);
 if (ret < 0) {
 return ret;
 }

 bdrv_no_throttling_begin(bs);
-ret = blk_read(blk, sector_num, buf, nb_sectors);
+ret = blk_pread(blk, offset, buf, count);
 bdrv_no_throttling_end(bs);
 return ret;
 }
diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c
index 6d02192..d388f13 100644
--- a/hw/block/hd-geometry.c
+++ b/hw/block/hd-geometry.c
@@ -66,7 +66,7 @@ static int guess_disk_lchs(BlockBackend *blk,
  * but also in async I/O mode. So the I/O throttling function has to
  * be disabled temporarily here, not permanently.
  */
-if (blk_read_unthrottled(blk, 0, buf, 1) < 0) {
+if (blk_pread_unthrottled(blk, 0, buf, BDRV_SECTOR_SIZE) < 0) {
 return -1;
 }
 /* test msdos magic */
-- 
2.5.5




[Qemu-block] [PATCH v6 04/20] block: Switch blk_*write_zeroes() to byte interface

2016-05-04 Thread Eric Blake
Sector-based blk_write() should die; convert the one-off
variant blk_write_zeroes() to use an offset/count interface
instead.  Likewise for blk_co_write_zeroes() and
blk_aio_write_zeroes().

Signed-off-by: Eric Blake 
---
 include/sysemu/block-backend.h | 12 ++--
 block/block-backend.c  | 33 +++--
 block/parallels.c  |  3 ++-
 hw/scsi/scsi-disk.c|  4 ++--
 qemu-img.c |  3 ++-
 qemu-io-cmds.c |  6 ++
 6 files changed, 25 insertions(+), 36 deletions(-)

diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 662a106..851376b 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -96,10 +96,10 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t 
offset, uint8_t *buf,
   int count);
 int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
   int nb_sectors);
-int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags);
-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags,
+int blk_write_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags);
+BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags,
  BlockCompletionFunc *cb, void *opaque);
 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count);
 int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
@@ -179,8 +179,8 @@ int blk_get_open_flags_from_root_state(BlockBackend *blk);

 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
   BlockCompletionFunc *cb, void *opaque);
-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags);
+int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags);
 int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
  const uint8_t *buf, int nb_sectors);
 int blk_truncate(BlockBackend *blk, int64_t offset);
diff --git a/block/block-backend.c b/block/block-backend.c
index e5a8a07..f8f88a6 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -814,11 +814,11 @@ int blk_write(BlockBackend *blk, int64_t sector_num, 
const uint8_t *buf,
   blk_write_entry, 0);
 }

-int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+int blk_write_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
 {
-return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry,
-  flags | BDRV_REQ_ZERO_WRITE);
+return blk_prw(blk, offset, NULL, count, blk_write_entry,
+   flags | BDRV_REQ_ZERO_WRITE);
 }

 static void error_callback_bh(void *opaque)
@@ -930,18 +930,12 @@ static void blk_aio_write_entry(void *opaque)
 blk_aio_complete(acb);
 }

-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags,
+BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags,
  BlockCompletionFunc *cb, void *opaque)
 {
-if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
-}
-
-return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS,
-nb_sectors << BDRV_SECTOR_BITS, NULL,
-blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE,
-cb, opaque);
+return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
+flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
 }

 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
@@ -1444,15 +1438,10 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, 
BlockBackend *blk,
 return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
 }

-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
 {
-if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-return -EINVAL;
-}
-
-return blk_co_pwritev(blk, sector_num << BDRV_SECTOR_BITS,
-  nb_sectors << BDRV_SECTOR_BITS, 

[Qemu-block] [PATCH v6 07/20] scsi-disk: Switch to byte-based aio block access

2016-05-04 Thread Eric Blake
Sector-based blk_aio_readv() and blk_aio_writev() should die; switch
to byte-based blk_aio_preadv() and blk_aio_pwritev() instead.

Signed-off-by: Eric Blake 
---
 hw/scsi/scsi-disk.c | 31 +++
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 1335392..5d98f7b 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -343,8 +343,9 @@ static void scsi_do_read(SCSIDiskReq *r, int ret)
 n = scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
 block_acct_start(blk_get_stats(s->qdev.conf.blk), >acct,
  n * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
-r->req.aiocb = blk_aio_readv(s->qdev.conf.blk, r->sector, >qiov, n,
- scsi_read_complete, r);
+r->req.aiocb = blk_aio_preadv(s->qdev.conf.blk,
+  r->sector << BDRV_SECTOR_BITS, >qiov,
+  0, scsi_read_complete, r);
 }

 done:
@@ -504,7 +505,6 @@ static void scsi_write_data(SCSIRequest *req)
 {
 SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
 SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
-uint32_t n;

 /* No data transfer may already be in progress */
 assert(r->req.aiocb == NULL);
@@ -544,11 +544,11 @@ static void scsi_write_data(SCSIRequest *req)
 r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg, r->sector,
  scsi_dma_complete, r);
 } else {
-n = r->qiov.size / 512;
 block_acct_start(blk_get_stats(s->qdev.conf.blk), >acct,
- n * BDRV_SECTOR_SIZE, BLOCK_ACCT_WRITE);
-r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, r->sector, >qiov, n,
-  scsi_write_complete, r);
+ r->qiov.size, BLOCK_ACCT_WRITE);
+r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
+   r->sector << BDRV_SECTOR_BITS, >qiov,
+   0, scsi_write_complete, r);
 }
 }

@@ -1730,13 +1730,11 @@ static void scsi_write_same_complete(void *opaque, int 
ret)
 if (data->iov.iov_len) {
 block_acct_start(blk_get_stats(s->qdev.conf.blk), >acct,
  data->iov.iov_len, BLOCK_ACCT_WRITE);
-/* blk_aio_write doesn't like the qiov size being different from
- * nb_sectors, make sure they match.
- */
 qemu_iovec_init_external(>qiov, >iov, 1);
-r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, data->sector,
-  >qiov, data->iov.iov_len / 512,
-  scsi_write_same_complete, data);
+r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
+   data->sector << BDRV_SECTOR_BITS,
+   >qiov, 0,
+   scsi_write_same_complete, data);
 return;
 }

@@ -1803,9 +1801,10 @@ static void scsi_disk_emulate_write_same(SCSIDiskReq *r, 
uint8_t *inbuf)
 scsi_req_ref(>req);
 block_acct_start(blk_get_stats(s->qdev.conf.blk), >acct,
  data->iov.iov_len, BLOCK_ACCT_WRITE);
-r->req.aiocb = blk_aio_writev(s->qdev.conf.blk, data->sector,
-  >qiov, data->iov.iov_len / 512,
-  scsi_write_same_complete, data);
+r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
+   data->sector << BDRV_SECTOR_BITS,
+   >qiov, 0,
+   scsi_write_same_complete, data);
 }

 static void scsi_disk_emulate_write_data(SCSIRequest *req)
-- 
2.5.5




[Qemu-block] [PATCH v6 02/20] block: Drop private ioctl-only members of BlockRequest

2016-05-04 Thread Eric Blake
I was thrown by the fact that the public type BlockRequest had
an anonymous union, but no obvious discriminator.  Turns out
that the only client of the second branch of the union was code
internal to io.c, and that with a slight abuse of QEMUIOVector*
to pass a void* pointer, we can make the public interface less
confusing.

(Yes, I know that strict C doesn't guarantee that you can cast
void* to the wrong type and then back to void* - it only
guarantees the reverse direction of the original pointer to
void* and back to the original type - but we already have other
assumptions throughout the qemu code base that assume that all
pointers are interchangeable in representation).

Signed-off-by: Eric Blake 
---
 include/block/block.h | 16 
 block/io.c|  8 +---
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 0e8b4d1..754aae3 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -334,18 +334,10 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb);

 typedef struct BlockRequest {
 /* Fields to be filled by multiwrite caller */
-union {
-struct {
-int64_t sector;
-int nb_sectors;
-int flags;
-QEMUIOVector *qiov;
-};
-struct {
-int req;
-void *buf;
-};
-};
+int64_t sector;
+int nb_sectors;
+int flags;
+QEMUIOVector *qiov;
 BlockCompletionFunc *cb;
 void *opaque;

diff --git a/block/io.c b/block/io.c
index 0db1146..f15c0f4 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2614,7 +2614,7 @@ static void coroutine_fn bdrv_co_aio_ioctl_entry(void 
*opaque)
 {
 BlockAIOCBCoroutine *acb = opaque;
 acb->req.error = bdrv_co_do_ioctl(acb->common.bs,
-  acb->req.req, acb->req.buf);
+  acb->req.flags, acb->req.qiov);
 bdrv_co_complete(acb);
 }

@@ -2628,8 +2628,10 @@ BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,

 acb->need_bh = true;
 acb->req.error = -EINPROGRESS;
-acb->req.req = req;
-acb->req.buf = buf;
+/* Slight type abuse here, so we don't have to expose extra fields
+ * in the public struct BlockRequest */
+acb->req.flags = req;
+acb->req.qiov = buf;
 co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry);
 qemu_coroutine_enter(co, acb);

-- 
2.5.5




[Qemu-block] [PATCH v6 05/20] block: Introduce byte-based aio read/write

2016-05-04 Thread Eric Blake
blk_aio_readv() and blk_aio_writev() are annoying in that they
can't access sub-sector granularity, and cannot pass flags.
Also, they require the caller to pass redundant information
about the size of the I/O.

Add new blk_aio_preadv() and blk_aio_pwritev() functions to fix
the flaws. The next few patches will upgrade callers, then
finally delete the old interfaces.

Signed-off-by: Eric Blake 
---
 include/sysemu/block-backend.h |  6 ++
 block/block-backend.c  | 16 
 2 files changed, 22 insertions(+)

diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 851376b..8d7839c 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -110,9 +110,15 @@ int64_t blk_nb_sectors(BlockBackend *blk);
 BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
   QEMUIOVector *iov, int nb_sectors,
   BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+   QEMUIOVector *iov, BdrvRequestFlags flags,
+   BlockCompletionFunc *cb, void *opaque);
 BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
QEMUIOVector *iov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+QEMUIOVector *iov, BdrvRequestFlags flags,
+BlockCompletionFunc *cb, void *opaque);
 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
   BlockCompletionFunc *cb, void *opaque);
 BlockAIOCB *blk_aio_discard(BlockBackend *blk,
diff --git a/block/block-backend.c b/block/block-backend.c
index f8f88a6..104852d 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -998,6 +998,14 @@ BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t 
sector_num,
 blk_aio_read_entry, 0, cb, opaque);
 }

+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+   QEMUIOVector *iov, BdrvRequestFlags flags,
+   BlockCompletionFunc *cb, void *opaque)
+{
+return blk_aio_prwv(blk, offset, iov->size, iov,
+blk_aio_read_entry, flags, cb, opaque);
+}
+
 BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
QEMUIOVector *iov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque)
@@ -1011,6 +1019,14 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t 
sector_num,
 blk_aio_write_entry, 0, cb, opaque);
 }

+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+QEMUIOVector *iov, BdrvRequestFlags flags,
+BlockCompletionFunc *cb, void *opaque)
+{
+return blk_aio_prwv(blk, offset, iov->size, iov,
+blk_aio_write_entry, flags, cb, opaque);
+}
+
 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
   BlockCompletionFunc *cb, void *opaque)
 {
-- 
2.5.5




[Qemu-block] [PATCH v6 00/20] block: kill sector-based blk_write/read

2016-05-04 Thread Eric Blake
2.7 material, depends on Kevin's block-next:
git://repo.or.cz/qemu/kevin.git block-next

Previously posted as part of a larger NBD series [1] and as v5 [2].
Mostly orthogonal to Kevin's recent work to also kill sector
interfaces from the driver.

[1] https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg03526.html
[2] https://lists.gnu.org/archive/html/qemu-devel/2016-05/msg00235.html

Also available as a tag at this location:
git fetch git://repo.or.cz/qemu/ericb.git nbd-block-v6

Changes since then:
- patch 2: new patch for something that confused me
- patch 3 (was 12/14): moved earlier
- patch 4 (was 13/14): moved earlier, retitled, and touches more
interfaces
- patches 5-9: new, to cover blk_aio_{read,write}v
- patch 15: also cover blk_aio_{read,write}v
- patch 19: also cover blk_aio_{read,write}v, don't break
qemu-iotests 23 [kwolf]
- patch 20 (was 14/14): retitle, kill more dead code

I still plan to make qemu-io 'read' accept unaligned access
without requiring -p, but that will now come in a later series
that does other qemu-io UI improvements (besides, 20 patches is
big enough to review for now).  For qemu-iotests 23 to work
with unaligned 'read', I also have to fix 'readv' to be
unaligned; which is why this series grew to include blk_aio_*.
Note that there are still a few sector-aligned blk_* interfaces
left even after this series, but we are progressively getting
closer to using byte interfaces throughout the code base.

001/20:[] [--] 'block: Allow BDRV_REQ_FUA through blk_pwrite()'
002/20:[down] 'block: Drop private ioctl-only members of BlockRequest'
003/20:[] [--] 'block: Switch blk_read_unthrottled() to byte interface'
004/20:[down] 'block: Switch blk_*write_zeroes() to byte interface'
005/20:[down] 'block: Introduce byte-based aio read/write'
006/20:[down] 'ide: Switch to byte-based aio block access'
007/20:[down] 'scsi-disk: Switch to byte-based aio block access'
008/20:[down] 'virtio: Switch to byte-based aio block access'
009/20:[down] 'xen_disk: Switch to byte-based aio block access'
010/20:[] [--] 'fdc: Switch to byte-based block access'
011/20:[] [--] 'nand: Switch to byte-based block access'
012/20:[] [--] 'onenand: Switch to byte-based block access'
013/20:[] [--] 'pflash: Switch to byte-based block access'
014/20:[] [--] 'sd: Switch to byte-based block access'
015/20:[0020] [FC] 'm25p80: Switch to byte-based block access'
016/20:[] [--] 'atapi: Switch to byte-based block access'
017/20:[] [--] 'nbd: Switch to byte-based block access'
018/20:[] [--] 'qemu-img: Switch to byte-based block access'
019/20:[0041] [FC] 'qemu-io: Switch to byte-based block access'
020/20:[down] 'block: Kill unused sector-based blk_* functions'

Eric Blake (20):
  block: Allow BDRV_REQ_FUA through blk_pwrite()
  block: Drop private ioctl-only members of BlockRequest
  block: Switch blk_read_unthrottled() to byte interface
  block: Switch blk_*write_zeroes() to byte interface
  block: Introduce byte-based aio read/write
  ide: Switch to byte-based aio block access
  scsi-disk: Switch to byte-based aio block access
  virtio: Switch to byte-based aio block access
  xen_disk: Switch to byte-based aio block access
  fdc: Switch to byte-based block access
  nand: Switch to byte-based block access
  onenand: Switch to byte-based block access
  pflash: Switch to byte-based block access
  sd: Switch to byte-based block access
  m25p80: Switch to byte-based block access
  atapi: Switch to byte-based block access
  nbd: Switch to byte-based block access
  qemu-img: Switch to byte-based block access
  qemu-io: Switch to byte-based block access
  block: Kill unused sector-based blk_* functions

 hw/ide/internal.h  |   2 +-
 include/block/block.h  |  16 ++-
 include/sysemu/block-backend.h |  33 ++---
 include/sysemu/dma.h   |   4 +-
 block/block-backend.c  | 104 -
 block/crypto.c |   2 +-
 block/io.c |   8 ++--
 block/parallels.c  |   5 +-
 block/qcow.c   |   8 ++--
 block/qcow2.c  |   4 +-
 block/qed.c|   6 +--
 block/sheepdog.c   |   2 +-
 block/vdi.c|   4 +-
 block/vhdx.c   |   5 +-
 block/vmdk.c   |  10 ++--
 block/vpc.c|  10 ++--
 dma-helpers.c  |  14 +++---
 hw/block/fdc.c |  25 ++
 hw/block/hd-geometry.c |   2 +-
 hw/block/m25p80.c  |  23 +++--
 hw/block/nand.c|  36 --
 hw/block/onenand.c |  41 ++--
 hw/block/pflash_cfi01.c|  12 ++---
 hw/block/pflash_cfi02.c|  12 ++---
 hw/block/virtio-blk.c  |  18 ---
 hw/block/xen_disk.c|  10 ++--
 hw/ide/atapi.c |  19 
 hw/ide/core.c  |  10 ++--
 hw/ide/macio.c   

[Qemu-block] [PATCH v6 06/20] ide: Switch to byte-based aio block access

2016-05-04 Thread Eric Blake
Sector-based blk_aio_readv() and blk_aio_writev() should die; switch
to byte-based blk_aio_preadv() and blk_aio_pwritev() instead.

The patch had to touch multiple files at once, because dma_blk_io()
takes pointers to the functions, and ide_issue_trim() piggybacks on
the same interface (while ignoring offset under the hood).

Signed-off-by: Eric Blake 
---
 hw/ide/internal.h|  2 +-
 include/sysemu/dma.h |  4 ++--
 dma-helpers.c| 14 +++---
 hw/ide/core.c| 10 +-
 hw/ide/macio.c   |  9 +++--
 5 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index d2c458f..ceb9e59 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -614,7 +614,7 @@ void ide_transfer_start(IDEState *s, uint8_t *buf, int size,
 void ide_transfer_stop(IDEState *s);
 void ide_set_inactive(IDEState *s, bool more);
 BlockAIOCB *ide_issue_trim(BlockBackend *blk,
-int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags,
 BlockCompletionFunc *cb, void *opaque);
 BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num,
QEMUIOVector *iov, int nb_sectors,
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index b0fbb9b..0f7cd4d 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -197,8 +197,8 @@ void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, 
dma_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 #endif

-typedef BlockAIOCB *DMAIOFunc(BlockBackend *blk, int64_t sector_num,
-  QEMUIOVector *iov, int nb_sectors,
+typedef BlockAIOCB *DMAIOFunc(BlockBackend *blk, int64_t offset,
+  QEMUIOVector *iov, BdrvRequestFlags flags,
   BlockCompletionFunc *cb, void *opaque);

 BlockAIOCB *dma_blk_io(BlockBackend *blk,
diff --git a/dma-helpers.c b/dma-helpers.c
index 4ad0bca..a6cc15f 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -73,7 +73,7 @@ typedef struct {
 BlockBackend *blk;
 BlockAIOCB *acb;
 QEMUSGList *sg;
-uint64_t sector_num;
+uint64_t offset;
 DMADirection dir;
 int sg_cur_index;
 dma_addr_t sg_cur_byte;
@@ -130,7 +130,7 @@ static void dma_blk_cb(void *opaque, int ret)
 trace_dma_blk_cb(dbs, ret);

 dbs->acb = NULL;
-dbs->sector_num += dbs->iov.size / 512;
+dbs->offset += dbs->iov.size;

 if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
 dma_complete(dbs, ret);
@@ -164,8 +164,8 @@ static void dma_blk_cb(void *opaque, int ret)
 qemu_iovec_discard_back(>iov, dbs->iov.size & ~BDRV_SECTOR_MASK);
 }

-dbs->acb = dbs->io_func(dbs->blk, dbs->sector_num, >iov,
-dbs->iov.size / 512, dma_blk_cb, dbs);
+dbs->acb = dbs->io_func(dbs->blk, dbs->offset, >iov, 0,
+dma_blk_cb, dbs);
 assert(dbs->acb);
 }

@@ -203,7 +203,7 @@ BlockAIOCB *dma_blk_io(
 dbs->acb = NULL;
 dbs->blk = blk;
 dbs->sg = sg;
-dbs->sector_num = sector_num;
+dbs->offset = sector_num << BDRV_SECTOR_BITS;
 dbs->sg_cur_index = 0;
 dbs->sg_cur_byte = 0;
 dbs->dir = dir;
@@ -219,7 +219,7 @@ BlockAIOCB *dma_blk_read(BlockBackend *blk,
  QEMUSGList *sg, uint64_t sector,
  void (*cb)(void *opaque, int ret), void *opaque)
 {
-return dma_blk_io(blk, sg, sector, blk_aio_readv, cb, opaque,
+return dma_blk_io(blk, sg, sector, blk_aio_preadv, cb, opaque,
   DMA_DIRECTION_FROM_DEVICE);
 }

@@ -227,7 +227,7 @@ BlockAIOCB *dma_blk_write(BlockBackend *blk,
   QEMUSGList *sg, uint64_t sector,
   void (*cb)(void *opaque, int ret), void *opaque)
 {
-return dma_blk_io(blk, sg, sector, blk_aio_writev, cb, opaque,
+return dma_blk_io(blk, sg, sector, blk_aio_pwritev, cb, opaque,
   DMA_DIRECTION_TO_DEVICE);
 }

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 41e6a2d..fe2bfba 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -442,7 +442,7 @@ static void ide_issue_trim_cb(void *opaque, int ret)
 }

 BlockAIOCB *ide_issue_trim(BlockBackend *blk,
-int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags,
 BlockCompletionFunc *cb, void *opaque)
 {
 TrimAIOCB *iocb;
@@ -616,8 +616,8 @@ BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t 
sector_num,
 req->iov.iov_len = iov->size;
 qemu_iovec_init_external(>qiov, >iov, 1);

-aioreq = blk_aio_readv(s->blk, sector_num, >qiov, nb_sectors,
-   ide_buffered_readv_cb, req);
+aioreq = blk_aio_preadv(s->blk, sector_num << BDRV_SECTOR_BITS,
+>qiov, 0, ide_buffered_readv_cb, req);

 QLIST_INSERT_HEAD(>buffered_requests, req, list);
 return aioreq;
@@ 

Re: [Qemu-block] [PATCH v2 02/13] block: Introduce BlockBackendPublic

2016-05-04 Thread Eric Blake
On 04/22/2016 11:42 AM, Kevin Wolf wrote:
> Some features, like I/O throttling, are implemented outside
> block-backend.c, but still want to keep information in BlockBackend,
> e.g. list entries that allow keeping a list of BlockBackends.
> 
> In order to avoid exposing the whole struct layout in the public header
> file, this patch introduces an embedded public struct where such
> information can be added and a pair of functions to convert between
> BlockBackend and BlockBackendPublic.
> 
> Signed-off-by: Kevin Wolf 
> ---

> +/* This struct is embedded in (the private) BlockBackend struct and contains
> + * fields that must be public. This is in particular for QLIST_ENTRY() and
> + * friends so that BlockBackends can be kept in lists outside 
> block-backend.c */
> +typedef struct BlockBackendPublic {
> +int dummy; /* empty structs are illegal */

Nit - it's not breaking any laws, so s/illegal/invalid/ sounds slightly
better.  But not enough to stop me from reviewing.

Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v2 11/13] block: Remove bdrv_move_feature_fields()

2016-05-04 Thread Alberto Garcia
On Fri 22 Apr 2016 07:42:40 PM CEST, Kevin Wolf wrote:
> bdrv_move_feature_fields() and swap_feature_fields() are empty now, they
> can be removed.
>
> Signed-off-by: Kevin Wolf 
> Reviewed-by: Max Reitz 

Reviewed-by: Alberto Garcia 

Berto



Re: [Qemu-block] [PATCH v5 11/14] qemu-io: Switch to byte-based block access

2016-05-04 Thread Eric Blake
On 05/04/2016 07:10 AM, Kevin Wolf wrote:
> Am 03.05.2016 um 17:42 hat Eric Blake geschrieben:
>> blk_write() and blk_read() are now very simple wrappers around
>> blk_pwrite() and blk_pread().  There's no reason to require
>> the user to pass in aligned numbers.  Keep 'read -p' and
>> 'write -p' so that I don't have to hunt down and update all
>> users of qemu-io, but make the default 'read' and 'write' now
>> do the same behavior that used to require -p.
>>
>> Signed-off-by: Eric Blake 
> 
> This breaks at least qemu-iotests 023.

Whoops - shows how often I run qemu-iotests.  Will fix.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v2 07/13] block: Move I/O throttling configuration functions to BlockBackend

2016-05-04 Thread Alberto Garcia
On Fri 22 Apr 2016 07:42:36 PM CEST, Kevin Wolf wrote:

> +void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
> +{
> +throttle_group_config(blk, cfg);
> +}
> +
> +void blk_io_limits_disable(BlockBackend *blk)
> +{
> +assert(blk->public.throttle_state);
> +bdrv_no_throttling_begin(blk_bs(blk));
> +throttle_group_unregister_blk(blk);
> +bdrv_no_throttling_end(blk_bs(blk));
> +}
> +
> +/* should be called before bdrv_set_io_limits if a limit is set */
> +void blk_io_limits_enable(BlockBackend *blk, const char *group)

s/bdrv_set_io_limits/blk_set_io_limits/ in the comment above.

Otherwise the patch looks good to me.

Reviewed-by: Alberto Garcia 

Berto



Re: [Qemu-block] [Qemu-devel] [PATCH v3 08/27] osdep: Add qemu_lock_fd and qemu_unlock_fd

2016-05-04 Thread Richard W.M. Jones
On Thu, Apr 28, 2016 at 08:57:27PM +0800, Fam Zheng wrote:
> They are wrappers of POSIX fcntl file locking, with the additional
> interception of open/close (through qemu_open and qemu_close) to offer a
> better semantics that preserves the locks across multiple life cycles of
> different fds on the same file.  The reason to make this semantics
> change over the fcntl locks is to make the API cleaner for QEMU
> subsystems.
> 
> More precisely, we remove this "feature" of fcntl lock:
> 
> If a process closes any file descriptor referring to a file, then
> all of the process's locks on that file are released, regardless of
> the file descriptor(s) on which the locks were obtained.
> 
> as long as the fd is always open/closed through qemu_open and
> qemu_close.

> +ret = qemu_lock_do(fd, start, len, readonly ? F_RDLCK : F_WRLCK);
> +if (ret == -1) {
> +return -errno;
> +}

It still appears this patch would break libguestfs's valid use case.

How about addressing what Dan wrote here:

  https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg02927.html

?

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
virt-p2v converts physical machines to virtual machines.  Boot with a
live CD or over the network (PXE) and turn machines into KVM guests.
http://libguestfs.org/virt-v2v



Re: [Qemu-block] [PATCH v2 05/13] block: Move throttling fields from BDS to BB

2016-05-04 Thread Alberto Garcia
On Fri 22 Apr 2016 07:42:34 PM CEST, Kevin Wolf wrote:
> This patch changes where the throttling state is stored (used to be the
> BlockDriverState, now it is the BlockBackend), but it doesn't actually
> make it a BB level feature yet. For example, throttling is still
> disabled when the BDS is detached from the BB.
>
> Signed-off-by: Kevin Wolf 

Reviewed-by: Alberto Garcia 

Berto



Re: [Qemu-block] [PATCH v5 11/14] qemu-io: Switch to byte-based block access

2016-05-04 Thread Kevin Wolf
Am 03.05.2016 um 17:42 hat Eric Blake geschrieben:
> blk_write() and blk_read() are now very simple wrappers around
> blk_pwrite() and blk_pread().  There's no reason to require
> the user to pass in aligned numbers.  Keep 'read -p' and
> 'write -p' so that I don't have to hunt down and update all
> users of qemu-io, but make the default 'read' and 'write' now
> do the same behavior that used to require -p.
> 
> Signed-off-by: Eric Blake 

This breaks at least qemu-iotests 023.

Kevin



Re: [Qemu-block] [PATCH v2 06/13] block: Move actual I/O throttling to BlockBackend

2016-05-04 Thread Alberto Garcia
On Fri 22 Apr 2016 07:42:35 PM CEST, Kevin Wolf wrote:
> Signed-off-by: Kevin Wolf 

Reviewed-by: Alberto Garcia 

Berto



Re: [Qemu-block] [PATCH v2 04/13] block: Convert throttle_group_get_name() to BlockBackend

2016-05-04 Thread Alberto Garcia
On Fri 22 Apr 2016 07:42:33 PM CEST, Kevin Wolf wrote:
> Signed-off-by: Kevin Wolf 

Reviewed-by: Alberto Garcia 

Berto



Re: [Qemu-block] [PATCH v2 03/13] block: throttle-groups: Use BlockBackend pointers internally

2016-05-04 Thread Alberto Garcia
On Fri 22 Apr 2016 07:42:32 PM CEST, Kevin Wolf wrote:
> As a first step towards moving I/O throttling to the BlockBackend level,
> this patch changes all pointers in struct ThrottleGroup from referencing
> a BlockDriverState to referencing a BlockBackend.
>
> This change is valid because we made sure that throttling can only be
> enabled on BDSes which have a BB attached.
>
> Signed-off-by: Kevin Wolf 

Reviewed-by: Alberto Garcia 

Berto



Re: [Qemu-block] [PATCH v2 01/13] block: Make sure throttled BDSes always have a BB

2016-05-04 Thread Alberto Garcia
On Fri 22 Apr 2016 07:42:30 PM CEST, Kevin Wolf wrote:
> It was already true in principle that a throttled BDS always has a BB
> attached, except that the order of operations while attaching or
> detaching a BDS to/from a BB wasn't careful enough.
>
> This commit breaks graph manipulations while I/O throttling is enabled.
> It would have been possible to keep things working with some temporary
> hacks, but quite cumbersome, so it's not worth the hassle. We'll fix
> things again in a minute.
>
> Signed-off-by: Kevin Wolf 
> Reviewed-by: Eric Blake 

Reviewed-by: Alberto Garcia 

Berto



Re: [Qemu-block] [PATCH v2 02/13] block: Introduce BlockBackendPublic

2016-05-04 Thread Alberto Garcia
On Fri 22 Apr 2016 07:42:31 PM CEST, Kevin Wolf wrote:
> Some features, like I/O throttling, are implemented outside
> block-backend.c, but still want to keep information in BlockBackend,
> e.g. list entries that allow keeping a list of BlockBackends.
>
> In order to avoid exposing the whole struct layout in the public header
> file, this patch introduces an embedded public struct where such
> information can be added and a pair of functions to convert between
> BlockBackend and BlockBackendPublic.
>
> Signed-off-by: Kevin Wolf 
Reviewed-by: Alberto Garcia 

Berto



Re: [Qemu-block] [Nbd] question on ioctl NBD_SET_FLAGS

2016-05-04 Thread Alex Bligh

On 20 Apr 2016, at 17:18, Wouter Verhelst  wrote:

> Hi Eric,
> 
> On Wed, Apr 20, 2016 at 09:42:02AM -0600, Eric Blake wrote:
> [...]
>> But in 3.9, the overlap bug was still present, and the set of global
>> flags had grown to include NBD_FLAG_NO_ZEROS (commit 038e066), which
>> overlaps with NBD_FLAG_READ_ONLY.  Ouch.  This means that a client
>> talking to a server that advertises NO_ZEROES means that the client will
>> mistakenly tell the kernel to treat EVERY export as read-only, even if
>> the client doesn't respond with NBD_FLAG_C_NO_ZEROES.
>> 
>> 3.10 fixed things; negotiate() now uses uint16_t *flags (instead of u32
>> *), and no longer tries to merge global flags with transmission flags;
>> only the transmission flags are ever passed to the kernel via
>> NBD_SET_FLAGS.  Maybe it's good that there was only 2 weeks between 3.9
>> and 3.10, so hopefully few distros are trying to ship that broken version.
> 
> Well, yeah, since 3.10 was an "oops" release when 3.9 exposed that bug
> (which indeed had existed for a while) and which was reported quite
> quickly on the list. Released versions of nbd which have the bug exist
> though, and trying to have a 3.8 (or below) client talk to a 3.9 (or
> above) server has the same issue.
> 
> I decided that there was no way in which I could fix it, and that "the
> export is readonly" is bad but not a "critical data loss" kind of bug,
> so releasing 3.10 was pretty much the only sane thing I could do (other
> than delaying NO_ZEROES, which might have worked too).

For what it's worth, Ubuntu 14.04 (still under long term
support) ships with nbd-client 3.7 and I've just had a bug
report filed against gonbdserver where all exports go readonly.
   https://github.com/abligh/gonbdserver/issues/4

I've added an option to disable NBD_FLAG_NO_ZEROS on nbd-server
but that's pretty horrible. And I think this will affect all
pre-3.10 clients talking to 3.9 and later servers.

I'm going to find a minimal patch to nbd-client and offer that
to Ubuntu / Debian.

This message is here in part so I have something to point them at
on the mailing list :-)

-- 
Alex Bligh







Re: [Qemu-block] [PATCH 1/2] block: Invalidate all children

2016-05-04 Thread Kevin Wolf
Am 19.04.2016 um 03:42 hat Fam Zheng geschrieben:
> Currently we only recurse to bs->file, which will miss the children in quorum
> and VMDK.
> 
> Recurse into the whole subtree to avoid that.
> 
> Signed-off-by: Fam Zheng 
> ---
>  block.c | 20 ++--
>  1 file changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/block.c b/block.c
> index d4939b4..fa8b38f 100644
> --- a/block.c
> +++ b/block.c
> @@ -3201,6 +3201,7 @@ void bdrv_init_with_whitelist(void)
>  
>  void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
>  {
> +BdrvChild *child;
>  Error *local_err = NULL;
>  int ret;
>  
> @@ -3215,13 +3216,20 @@ void bdrv_invalidate_cache(BlockDriverState *bs, 
> Error **errp)
>  
>  if (bs->drv->bdrv_invalidate_cache) {
>  bs->drv->bdrv_invalidate_cache(bs, _err);
> -} else if (bs->file) {
> -bdrv_invalidate_cache(bs->file->bs, _err);

The old behaviour was that we only recurse for bs->file if the block
driver doesn't have its own implementation.

This means that in qcow2, for example, we call bdrv_invalidate_cache()
explicitly for bs->file. If we can already invalidate it here, the call
inside qcow2 and probably other drivers could go away.

Kevin



Re: [Qemu-block] [PATCH v4] block: always compile-check debug prints

2016-05-04 Thread Kevin Wolf
Am 29.04.2016 um 07:12 hat Zhou Jie geschrieben:
> Files with conditional debug statements should ensure that the printf is
> always compiled. This prevents bitrot of the format string of the debug
> statement. And switch debug output to stderr.
> 
> Signed-off-by: Zhou Jie 
> Reviewed-by: Eric Blake 

Thanks, applied to block-next.

Kevin



[Qemu-block] [PATCH 12/14] backup: Use BlockBackend for I/O

2016-05-04 Thread Kevin Wolf
This changes the backup block job to use the job's BlockBackend for
performing its I/O. job->bs isn't used by the backup code any more
afterwards.

Signed-off-by: Kevin Wolf 
---
 block/backup.c | 42 ++
 block/block-backend.c  | 14 ++
 block/io.c |  9 -
 blockdev.c |  4 +---
 include/block/block.h  |  2 --
 include/sysemu/block-backend.h |  2 ++
 trace-events   |  2 +-
 7 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 670ba50..ba9eb42 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -36,7 +36,7 @@ typedef struct CowRequest {
 
 typedef struct BackupBlockJob {
 BlockJob common;
-BlockDriverState *target;
+BlockBackend *target;
 /* bitmap for sync=incremental */
 BdrvDirtyBitmap *sync_bitmap;
 MirrorSyncMode sync_mode;
@@ -99,7 +99,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
   bool *error_is_read,
   bool is_write_notifier)
 {
-BlockDriverState *bs = job->common.bs;
+BlockBackend *blk = job->common.blk;
 CowRequest cow_request;
 struct iovec iov;
 QEMUIOVector bounce_qiov;
@@ -132,19 +132,18 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
 start * sectors_per_cluster);
 
 if (!bounce_buffer) {
-bounce_buffer = qemu_blockalign(bs, job->cluster_size);
+bounce_buffer = blk_blockalign(blk, job->cluster_size);
 }
 iov.iov_base = bounce_buffer;
 iov.iov_len = n * BDRV_SECTOR_SIZE;
 qemu_iovec_init_external(_qiov, , 1);
 
 if (is_write_notifier) {
-ret = bdrv_co_readv_no_serialising(bs,
-   start * sectors_per_cluster,
-   n, _qiov);
+ret = blk_co_readv_no_serialising(blk, start * sectors_per_cluster,
+  n, _qiov);
 } else {
-ret = bdrv_co_readv(bs, start * sectors_per_cluster, n,
-_qiov);
+ret = blk_co_readv(blk, start * sectors_per_cluster, n,
+   _qiov);
 }
 if (ret < 0) {
 trace_backup_do_cow_read_fail(job, start, ret);
@@ -155,13 +154,13 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
 }
 
 if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
-ret = bdrv_co_write_zeroes(job->target,
+ret = blk_co_write_zeroes(job->target,
start * sectors_per_cluster,
n, BDRV_REQ_MAY_UNMAP);
 } else {
-ret = bdrv_co_writev(job->target,
- start * sectors_per_cluster, n,
- _qiov);
+ret = blk_co_writev(job->target,
+start * sectors_per_cluster, n,
+_qiov);
 }
 if (ret < 0) {
 trace_backup_do_cow_write_fail(job, start, ret);
@@ -203,7 +202,7 @@ static int coroutine_fn backup_before_write_notify(
 int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
 int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;
 
-assert(req->bs == job->common.bs);
+assert(req->bs == blk_bs(job->common.blk));
 assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
 assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
 
@@ -224,7 +223,7 @@ static void backup_set_speed(BlockJob *job, int64_t speed, 
Error **errp)
 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
 {
 BdrvDirtyBitmap *bm;
-BlockDriverState *bs = job->common.bs;
+BlockDriverState *bs = blk_bs(job->common.blk);
 
 if (ret < 0 || block_job_is_cancelled(>common)) {
 /* Merge the successor back into the parent, delete nothing. */
@@ -282,7 +281,7 @@ static void backup_complete(BlockJob *job, void *opaque)
 BackupBlockJob *s = container_of(job, BackupBlockJob, common);
 BackupCompleteData *data = opaque;
 
-bdrv_unref(s->target);
+blk_unref(s->target);
 
 block_job_completed(job, data->ret);
 g_free(data);
@@ -378,8 +377,8 @@ static void coroutine_fn backup_run(void *opaque)
 {
 BackupBlockJob *job = opaque;
 BackupCompleteData *data;
-BlockDriverState *bs = job->common.bs;
-BlockDriverState *target = job->target;
+BlockDriverState *bs = blk_bs(job->common.blk);
+BlockBackend *target = job->target;
 int64_t start, end;
 int64_t sectors_per_cluster = cluster_size_sectors(job);
 int ret = 0;
@@ -468,7 +467,7 @@ static void coroutine_fn backup_run(void *opaque)
 qemu_co_rwlock_unlock(>flush_rwlock);
 g_free(job->done_bitmap);
 
-

[Qemu-block] [PATCH 14/14] blockjob: Remove BlockJob.bs

2016-05-04 Thread Kevin Wolf
There is a single remaining user in qemu-img, which can be trivially
converted to using BlockJob.blk instead.

Signed-off-by: Kevin Wolf 
---
 blockjob.c   | 1 -
 include/block/blockjob.h | 1 -
 qemu-img.c   | 2 +-
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index 01b58c7..c0d8ee9 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -83,7 +83,6 @@ void *block_job_create(const BlockJobDriver *driver, 
BlockDriverState *bs,
 
 job->driver= driver;
 job->id= g_strdup(bdrv_get_device_name(bs));
-job->bs= bs;
 job->blk   = blk;
 job->cb= cb;
 job->opaque= opaque;
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 32012af..86d2807 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -82,7 +82,6 @@ struct BlockJob {
 const BlockJobDriver *driver;
 
 /** The block device on which the job is operating.  */
-BlockDriverState *bs; /* TODO Remove */
 BlockBackend *blk;
 
 /**
diff --git a/qemu-img.c b/qemu-img.c
index 46f2a6d..77148c2 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -775,7 +775,7 @@ static void common_block_job_cb(void *opaque, int ret)
 
 static void run_block_job(BlockJob *job, Error **errp)
 {
-AioContext *aio_context = bdrv_get_aio_context(job->bs);
+AioContext *aio_context = blk_get_aio_context(job->blk);
 
 do {
 aio_poll(aio_context, true);
-- 
1.8.3.1




[Qemu-block] [PATCH 09/14] backup: Pack Notifier within BackupBlockJob

2016-05-04 Thread Kevin Wolf
From: John Snow 

Instead of relying on peeking at bs->job, we want to explicitly get
a reference to the job that was involved in this notifier callback.

Pack the Notifier inside of the BackupBlockJob so we can use
container_of to get a reference back to the BackupBlockJob object.

This cuts out one more case where we rely unnecessarily on bs->job.

Suggested-by: Paolo Bonzini 
Signed-off-by: John Snow 
Signed-off-by: Kevin Wolf 
---
 block/backup.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index a990cf1..a57288f 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -47,6 +47,7 @@ typedef struct BackupBlockJob {
 uint64_t sectors_read;
 unsigned long *done_bitmap;
 int64_t cluster_size;
+NotifierWithReturn before_write;
 QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;
 
@@ -94,11 +95,11 @@ static void cow_request_end(CowRequest *req)
 }
 
 static int coroutine_fn backup_do_cow(BlockDriverState *bs,
+  BackupBlockJob *job,
   int64_t sector_num, int nb_sectors,
   bool *error_is_read,
   bool is_write_notifier)
 {
-BackupBlockJob *job = (BackupBlockJob *)bs->job;
 CowRequest cow_request;
 struct iovec iov;
 QEMUIOVector bounce_qiov;
@@ -197,6 +198,7 @@ static int coroutine_fn backup_before_write_notify(
 NotifierWithReturn *notifier,
 void *opaque)
 {
+BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
 BdrvTrackedRequest *req = opaque;
 int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
 int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;
@@ -204,7 +206,8 @@ static int coroutine_fn backup_before_write_notify(
 assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
 assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
 
-return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true);
+return backup_do_cow(req->bs, job, sector_num,
+ nb_sectors, NULL, true);
 }
 
 static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -343,7 +346,7 @@ static int coroutine_fn 
backup_run_incremental(BackupBlockJob *job)
 if (yield_and_check(job)) {
 return ret;
 }
-ret = backup_do_cow(bs, cluster * sectors_per_cluster,
+ret = backup_do_cow(bs, job, cluster * sectors_per_cluster,
 sectors_per_cluster, _is_read,
 false);
 if ((ret < 0) &&
@@ -378,9 +381,6 @@ static void coroutine_fn backup_run(void *opaque)
 BackupCompleteData *data;
 BlockDriverState *bs = job->common.bs;
 BlockDriverState *target = job->target;
-NotifierWithReturn before_write = {
-.notify = backup_before_write_notify,
-};
 int64_t start, end;
 int64_t sectors_per_cluster = cluster_size_sectors(job);
 int ret = 0;
@@ -393,7 +393,8 @@ static void coroutine_fn backup_run(void *opaque)
 
 job->done_bitmap = bitmap_new(end);
 
-bdrv_add_before_write_notifier(bs, _write);
+job->before_write.notify = backup_before_write_notify;
+bdrv_add_before_write_notifier(bs, >before_write);
 
 if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
 while (!block_job_is_cancelled(>common)) {
@@ -445,7 +446,7 @@ static void coroutine_fn backup_run(void *opaque)
 }
 }
 /* FULL sync mode we copy the whole drive. */
-ret = backup_do_cow(bs, start * sectors_per_cluster,
+ret = backup_do_cow(bs, job, start * sectors_per_cluster,
 sectors_per_cluster, _is_read, false);
 if (ret < 0) {
 /* Depending on error action, fail now or retry cluster */
@@ -461,7 +462,7 @@ static void coroutine_fn backup_run(void *opaque)
 }
 }
 
-notifier_with_return_remove(_write);
+notifier_with_return_remove(>before_write);
 
 /* wait until pending backup_do_cow() calls have completed */
 qemu_co_rwlock_wrlock(>flush_rwlock);
-- 
1.8.3.1




[Qemu-block] [PATCH 08/14] backup: Don't leak BackupBlockJob in error path

2016-05-04 Thread Kevin Wolf
Signed-off-by: Kevin Wolf 
---
 block/backup.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index fec45e8..a990cf1 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -485,6 +485,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState 
*target,
 {
 int64_t len;
 BlockDriverInfo bdi;
+BackupBlockJob *job = NULL;
 int ret;
 
 assert(bs);
@@ -542,8 +543,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState 
*target,
 goto error;
 }
 
-BackupBlockJob *job = block_job_create(_job_driver, bs, speed,
-   cb, opaque, errp);
+job = block_job_create(_job_driver, bs, speed, cb, opaque, errp);
 if (!job) {
 goto error;
 }
@@ -584,4 +584,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState 
*target,
 if (sync_bitmap) {
 bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
 }
+if (job) {
+block_job_unref(>common);
+}
 }
-- 
1.8.3.1




[Qemu-block] [PATCH 13/14] commit: Use BlockBackend for I/O

2016-05-04 Thread Kevin Wolf
This changes the commit block job to use the job's BlockBackend for
performing its I/O. job->bs isn't used by the commit code any more
afterwards.

Signed-off-by: Kevin Wolf 
---
 block/commit.c | 42 +++---
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index f308c8c..863e059 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -36,28 +36,34 @@ typedef struct CommitBlockJob {
 BlockJob common;
 RateLimit limit;
 BlockDriverState *active;
-BlockDriverState *top;
-BlockDriverState *base;
+BlockBackend *top;
+BlockBackend *base;
 BlockdevOnError on_error;
 int base_flags;
 int orig_overlay_flags;
 char *backing_file_str;
 } CommitBlockJob;
 
-static int coroutine_fn commit_populate(BlockDriverState *bs,
-BlockDriverState *base,
+static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
 int64_t sector_num, int nb_sectors,
 void *buf)
 {
 int ret = 0;
+QEMUIOVector qiov;
+struct iovec iov = {
+.iov_base = (void *)buf,
+.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+};
 
-ret = bdrv_read(bs, sector_num, buf, nb_sectors);
-if (ret) {
+qemu_iovec_init_external(, , 1);
+
+ret = blk_co_readv(bs, sector_num, nb_sectors, );
+if (ret < 0) {
 return ret;
 }
 
-ret = bdrv_write(base, sector_num, buf, nb_sectors);
-if (ret) {
+ret = blk_co_writev(base, sector_num, nb_sectors, );
+if (ret < 0) {
 return ret;
 }
 
@@ -73,8 +79,8 @@ static void commit_complete(BlockJob *job, void *opaque)
 CommitBlockJob *s = container_of(job, CommitBlockJob, common);
 CommitCompleteData *data = opaque;
 BlockDriverState *active = s->active;
-BlockDriverState *top = s->top;
-BlockDriverState *base = s->base;
+BlockDriverState *top = blk_bs(s->top);
+BlockDriverState *base = blk_bs(s->base);
 BlockDriverState *overlay_bs;
 int ret = data->ret;
 
@@ -94,6 +100,8 @@ static void commit_complete(BlockJob *job, void *opaque)
 bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
 }
 g_free(s->backing_file_str);
+blk_unref(s->top);
+blk_unref(s->base);
 block_job_completed(>common, ret);
 g_free(data);
 }
@@ -102,8 +110,8 @@ static void coroutine_fn commit_run(void *opaque)
 {
 CommitBlockJob *s = opaque;
 CommitCompleteData *data;
-BlockDriverState *top = s->top;
-BlockDriverState *base = s->base;
+BlockDriverState *top = blk_bs(s->top);
+BlockDriverState *base = blk_bs(s->base);
 int64_t sector_num, end;
 int ret = 0;
 int n = 0;
@@ -158,7 +166,7 @@ wait:
 goto wait;
 }
 }
-ret = commit_populate(top, base, sector_num, n, buf);
+ret = commit_populate(s->top, s->base, sector_num, n, buf);
 bytes_written += n * BDRV_SECTOR_SIZE;
 }
 if (ret < 0) {
@@ -253,8 +261,12 @@ void commit_start(BlockDriverState *bs, BlockDriverState 
*base,
 return;
 }
 
-s->base   = base;
-s->top= top;
+s->base = blk_new(_abort);
+blk_insert_bs(s->base, base);
+
+s->top = blk_new(_abort);
+blk_insert_bs(s->top, top);
+
 s->active = bs;
 
 s->base_flags  = orig_base_flags;
-- 
1.8.3.1




[Qemu-block] [PATCH 02/14] block: Cancel jobs first in bdrv_close_all()

2016-05-04 Thread Kevin Wolf
So far, bdrv_close_all() first removed all root BlockDriverStates of
BlockBackends and monitor owned BDSes, and then assumed that the
remaining BDSes must be related to jobs and cancelled these jobs.

This order doesn't work that well any more when block jobs use
BlockBackends internally because then they will lose their BDS before
being cancelled.

This patch changes bdrv_close_all() to first cancel all jobs and then
remove all root BDSes from the remaining BBs.

Signed-off-by: Kevin Wolf 
---
 block.c  | 23 +--
 blockjob.c   | 13 +
 include/block/blockjob.h |  7 +++
 3 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/block.c b/block.c
index 37aa0a3..bf6c204 100644
--- a/block.c
+++ b/block.c
@@ -2165,8 +2165,7 @@ static void bdrv_close(BlockDriverState *bs)
 
 void bdrv_close_all(void)
 {
-BlockDriverState *bs;
-AioContext *aio_context;
+block_job_cancel_sync_all();
 
 /* Drop references from requests still in flight, such as canceled block
  * jobs whose AIO context has not been polled yet */
@@ -2174,26 +2173,6 @@ void bdrv_close_all(void)
 
 blk_remove_all_bs();
 blockdev_close_all_bdrv_states();
-
-/* Cancel all block jobs */
-while (!QTAILQ_EMPTY(_bdrv_states)) {
-QTAILQ_FOREACH(bs, _bdrv_states, bs_list) {
-aio_context = bdrv_get_aio_context(bs);
-
-aio_context_acquire(aio_context);
-if (bs->job) {
-block_job_cancel_sync(bs->job);
-aio_context_release(aio_context);
-break;
-}
-aio_context_release(aio_context);
-}
-
-/* All the remaining BlockDriverStates are referenced directly or
- * indirectly from block jobs, so there needs to be at least one BDS
- * directly used by a block job */
-assert(bs);
-}
 }
 
 static void change_parent_backing_link(BlockDriverState *from,
diff --git a/blockjob.c b/blockjob.c
index 0f1bc77..e916b41 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -331,6 +331,19 @@ int block_job_cancel_sync(BlockJob *job)
 return block_job_finish_sync(job, _job_cancel_err, NULL);
 }
 
+void block_job_cancel_sync_all(void)
+{
+BlockJob *job;
+AioContext *aio_context;
+
+while ((job = QLIST_FIRST(_jobs))) {
+aio_context = bdrv_get_aio_context(job->bs);
+aio_context_acquire(aio_context);
+block_job_cancel_sync(job);
+aio_context_release(aio_context);
+}
+}
+
 int block_job_complete_sync(BlockJob *job, Error **errp)
 {
 return block_job_finish_sync(job, _job_complete, errp);
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 30bb2c6..4ac6831 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -371,6 +371,13 @@ bool block_job_is_paused(BlockJob *job);
 int block_job_cancel_sync(BlockJob *job);
 
 /**
+ * block_job_cancel_sync_all:
+ *
+ * Synchronously cancels all jobs using block_job_cancel_sync().
+ */
+void block_job_cancel_sync_all(void);
+
+/**
  * block_job_complete_sync:
  * @job: The job to be completed.
  * @errp: Error object which may be set by block_job_complete(); this is not
-- 
1.8.3.1




[Qemu-block] [PATCH 01/14] block: keep a list of block jobs

2016-05-04 Thread Kevin Wolf
From: Alberto Garcia 

The current way to obtain the list of existing block jobs is to
iterate over all root nodes and check which ones own a job.

Since we want to be able to support block jobs in other nodes as well,
this patch keeps a list of jobs that is updated every time one is
created or destroyed.

Signed-off-by: Alberto Garcia 
Signed-off-by: Kevin Wolf 
---
 blockjob.c   | 13 +
 include/block/blockjob.h | 14 ++
 2 files changed, 27 insertions(+)

diff --git a/blockjob.c b/blockjob.c
index 5b840a7..0f1bc77 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -50,6 +50,16 @@ struct BlockJobTxn {
 int refcnt;
 };
 
+static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);
+
+BlockJob *block_job_next(BlockJob *job)
+{
+if (!job) {
+return QLIST_FIRST(_jobs);
+}
+return QLIST_NEXT(job, job_list);
+}
+
 void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
int64_t speed, BlockCompletionFunc *cb,
void *opaque, Error **errp)
@@ -76,6 +86,8 @@ void *block_job_create(const BlockJobDriver *driver, 
BlockDriverState *bs,
 job->refcnt= 1;
 bs->job = job;
 
+QLIST_INSERT_HEAD(_jobs, job, job_list);
+
 /* Only set speed when necessary to avoid NotSupported error */
 if (speed != 0) {
 Error *local_err = NULL;
@@ -103,6 +115,7 @@ void block_job_unref(BlockJob *job)
 bdrv_unref(job->bs);
 error_free(job->blocker);
 g_free(job->id);
+QLIST_REMOVE(job, job_list);
 g_free(job);
 }
 }
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 073a433..30bb2c6 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -135,6 +135,9 @@ struct BlockJob {
  */
 bool deferred_to_main_loop;
 
+/** Element of the list of block jobs */
+QLIST_ENTRY(BlockJob) job_list;
+
 /** Status that is published by the query-block-jobs QMP API */
 BlockDeviceIoStatus iostatus;
 
@@ -173,6 +176,17 @@ struct BlockJob {
 };
 
 /**
+ * block_job_next:
+ * @job: A block job, or %NULL.
+ *
+ * Get the next element from the list of block jobs after @job, or the
+ * first one if @job is %NULL.
+ *
+ * Returns the requested job, or %NULL if there are no more jobs left.
+ */
+BlockJob *block_job_next(BlockJob *job);
+
+/**
  * block_job_create:
  * @job_type: The class object for the newly-created job.
  * @bs: The block
-- 
1.8.3.1




[Qemu-block] [PATCH 11/14] block: Add blk_co_readv/writev()

2016-05-04 Thread Kevin Wolf
Signed-off-by: Kevin Wolf 
---
 block/block-backend.c  | 26 ++
 include/sysemu/block-backend.h |  4 
 trace-events   |  2 ++
 3 files changed, 32 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index 25bb5ca..8dc3aa5 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -794,6 +794,19 @@ static int coroutine_fn blk_co_pwritev(BlockBackend *blk, 
int64_t offset,
 return bdrv_co_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
 }
 
+int coroutine_fn blk_co_readv(BlockBackend *blk, int64_t sector_num,
+  int nb_sectors, QEMUIOVector *qiov)
+{
+trace_blk_co_readv(blk, blk_bs(blk), sector_num, nb_sectors);
+
+if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+return -EINVAL;
+}
+
+return blk_co_preadv(blk, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, qiov, 0);
+}
+
 int coroutine_fn blk_co_copy_on_readv(BlockBackend *blk,
 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 {
@@ -808,6 +821,19 @@ int coroutine_fn blk_co_copy_on_readv(BlockBackend *blk,
  BDRV_REQ_COPY_ON_READ);
 }
 
+int coroutine_fn blk_co_writev(BlockBackend *blk, int64_t sector_num,
+   int nb_sectors, QEMUIOVector *qiov)
+{
+trace_blk_co_writev(blk, blk_bs(blk), sector_num, nb_sectors);
+
+if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+return -EINVAL;
+}
+
+return blk_co_pwritev(blk, sector_num << BDRV_SECTOR_BITS,
+  nb_sectors << BDRV_SECTOR_BITS, qiov, 0);
+}
+
 typedef struct BlkRwCo {
 BlockBackend *blk;
 int64_t offset;
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index eac4d88..d98552c 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -116,8 +116,12 @@ int blk_read(BlockBackend *blk, int64_t sector_num, 
uint8_t *buf,
  int nb_sectors);
 int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
  int nb_sectors);
+int coroutine_fn blk_co_readv(BlockBackend *blk, int64_t sector_num,
+  int nb_sectors, QEMUIOVector *qiov);
 int coroutine_fn blk_co_copy_on_readv(BlockBackend *blk,
 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn blk_co_writev(BlockBackend *blk, int64_t sector_num,
+   int nb_sectors, QEMUIOVector *qiov);
 int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
   int nb_sectors);
 int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
diff --git a/trace-events b/trace-events
index 9beb588..3b5cce3 100644
--- a/trace-events
+++ b/trace-events
@@ -62,6 +62,8 @@ bdrv_open_common(void *bs, const char *filename, int flags, 
const char *format_n
 bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
 
 # block/block-backend.c
+blk_co_readv(void *blk, void *bs, int64_t sector_num, int nb_sector) "blk %p 
bs %p sector_num %"PRId64" nb_sectors %d"
+blk_co_writev(void *blk, void *bs, int64_t sector_num, int nb_sector) "blk %p 
bs %p sector_num %"PRId64" nb_sectors %d"
 blk_co_copy_on_readv(void *blk, void *bs, int64_t sector_num, int nb_sector) 
"blk %p bs %p sector_num %"PRId64" nb_sectors %d"
 
 # block/io.c
-- 
1.8.3.1




[Qemu-block] [PATCH 06/14] mirror: Allow target that already has a BlockBackend

2016-05-04 Thread Kevin Wolf
We had to forbid mirroring to a target BDS that already had a BB
attached because the node swapping at job completion would add a second
BB and we didn't support multiple BBs on a single BDS at the time. Now
we do, so we can lift the restriction.

As we allow additional BlockBackends for the target, we must expect
other users to be sending requests. There may no requests be in flight
during the graph modification, so we have to drain those users now.

The core part of this patch is a revert of commit 40365552.

Signed-off-by: Kevin Wolf 
---
 block/mirror.c | 33 ++---
 blockdev.c |  4 
 tests/qemu-iotests/041 | 27 ---
 tests/qemu-iotests/041.out |  4 ++--
 4 files changed, 8 insertions(+), 60 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index efca8fc..23aa10e 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -20,7 +20,6 @@
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
-#include "qemu/error-report.h"
 
 #define SLICE_TIME1ULL /* ns */
 #define MAX_IN_FLIGHT 16
@@ -466,24 +465,20 @@ static void mirror_exit(BlockJob *job, void *opaque)
 to_replace = s->to_replace;
 }
 
-/* This was checked in mirror_start_job(), but meanwhile one of the
- * nodes could have been newly attached to a BlockBackend. */
-if (bdrv_has_blk(to_replace) && bdrv_has_blk(s->target)) {
-error_report("block job: Can't create node with two 
BlockBackends");
-data->ret = -EINVAL;
-goto out;
-}
-
 if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
 bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
 }
+
+/* The mirror job has no requests in flight any more, but we need to
+ * drain potential other users of the BDS before changing the graph. */
+bdrv_drained_begin(s->target);
 bdrv_replace_in_backing_chain(to_replace, s->target);
+bdrv_drained_end(s->target);
+
 /* We just changed the BDS the job BB refers to */
 blk_remove_bs(job->blk);
 blk_insert_bs(job->blk, src);
 }
-
-out:
 if (s->to_replace) {
 bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
 error_free(s->replace_blocker);
@@ -807,7 +802,6 @@ static void mirror_start_job(BlockDriverState *bs, 
BlockDriverState *target,
  bool is_none_mode, BlockDriverState *base)
 {
 MirrorBlockJob *s;
-BlockDriverState *replaced_bs;
 
 if (granularity == 0) {
 granularity = bdrv_get_default_bitmap_granularity(target);
@@ -824,21 +818,6 @@ static void mirror_start_job(BlockDriverState *bs, 
BlockDriverState *target,
 buf_size = DEFAULT_MIRROR_BUF_SIZE;
 }
 
-/* We can't support this case as long as the block layer can't handle
- * multiple BlockBackends per BlockDriverState. */
-if (replaces) {
-replaced_bs = bdrv_lookup_bs(replaces, replaces, errp);
-if (replaced_bs == NULL) {
-return;
-}
-} else {
-replaced_bs = bs;
-}
-if (bdrv_has_blk(replaced_bs) && bdrv_has_blk(target)) {
-error_setg(errp, "Can't create node with two BlockBackends");
-return;
-}
-
 s = block_job_create(driver, bs, speed, cb, opaque, errp);
 if (!s) {
 return;
diff --git a/blockdev.c b/blockdev.c
index 516c6cb..da9eeff 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3434,10 +3434,6 @@ static void blockdev_mirror_common(BlockDriverState *bs,
 if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_MIRROR_TARGET, errp)) {
 return;
 }
-if (bdrv_has_blk(target)) {
-error_setg(errp, "Cannot mirror to an attached block device");
-return;
-}
 
 if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) {
 sync = MIRROR_SYNC_MODE_FULL;
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index b1c542f..ed1d9d4 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -207,33 +207,6 @@ class TestSingleBlockdev(TestSingleDrive):
 test_image_not_found = None
 test_small_buffer2 = None
 
-class TestBlockdevAttached(iotests.QMPTestCase):
-image_len = 1 * 1024 * 1024 # MB
-
-def setUp(self):
-iotests.create_image(backing_img, self.image_len)
-qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % 
backing_img, test_img)
-qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % 
backing_img, target_img)
-self.vm = iotests.VM().add_drive(test_img)
-self.vm.launch()
-
-def tearDown(self):
-self.vm.shutdown()
-os.remove(test_img)
-os.remove(target_img)
-
-def test_blockdev_attached(self):
-self.assert_no_active_block_jobs()
-args = {'options':
-{'driver': iotests.imgfmt,
- 'id': 

[Qemu-block] [PATCH 05/14] stream: Use BlockBackend for I/O

2016-05-04 Thread Kevin Wolf
This changes the streaming block job to use the job's BlockBackend for
performing the COR reads. job->bs isn't used by the streaming code any
more afterwards.

Signed-off-by: Kevin Wolf 
---
 block/block-backend.c  | 15 +++
 block/io.c |  9 -
 block/stream.c | 14 --
 include/block/block.h  |  2 --
 include/sysemu/block-backend.h |  2 ++
 trace-events   |  4 +++-
 6 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 2932a58..25bb5ca 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -19,6 +19,7 @@
 #include "sysemu/sysemu.h"
 #include "qapi-event.h"
 #include "qemu/id.h"
+#include "trace.h"
 
 /* Number of coroutines to reserve per attached device model */
 #define COROUTINE_POOL_RESERVATION 64
@@ -793,6 +794,20 @@ static int coroutine_fn blk_co_pwritev(BlockBackend *blk, 
int64_t offset,
 return bdrv_co_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
 }
 
+int coroutine_fn blk_co_copy_on_readv(BlockBackend *blk,
+int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+trace_blk_co_copy_on_readv(blk, blk_bs(blk), sector_num, nb_sectors);
+
+if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+return -EINVAL;
+}
+
+return blk_co_preadv(blk, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, qiov,
+ BDRV_REQ_COPY_ON_READ);
+}
+
 typedef struct BlkRwCo {
 BlockBackend *blk;
 int64_t offset;
diff --git a/block/io.c b/block/io.c
index 9aae030..8280e14 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1102,15 +1102,6 @@ int coroutine_fn 
bdrv_co_readv_no_serialising(BlockDriverState *bs,
 BDRV_REQ_NO_SERIALISING);
 }
 
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
-int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
-
-return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
-BDRV_REQ_COPY_ON_READ);
-}
-
 #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
 
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
diff --git a/block/stream.c b/block/stream.c
index 40aa322..70ed9be 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -39,7 +39,7 @@ typedef struct StreamBlockJob {
 char *backing_file_str;
 } StreamBlockJob;
 
-static int coroutine_fn stream_populate(BlockDriverState *bs,
+static int coroutine_fn stream_populate(BlockBackend *blk,
 int64_t sector_num, int nb_sectors,
 void *buf)
 {
@@ -52,7 +52,7 @@ static int coroutine_fn stream_populate(BlockDriverState *bs,
 qemu_iovec_init_external(, , 1);
 
 /* Copy-on-read the unallocated clusters */
-return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, );
+return blk_co_copy_on_readv(blk, sector_num, nb_sectors, );
 }
 
 typedef struct {
@@ -64,6 +64,7 @@ static void stream_complete(BlockJob *job, void *opaque)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common);
 StreamCompleteData *data = opaque;
+BlockDriverState *bs = blk_bs(job->blk);
 BlockDriverState *base = s->base;
 
 if (!block_job_is_cancelled(>common) && data->reached_end &&
@@ -75,8 +76,8 @@ static void stream_complete(BlockJob *job, void *opaque)
 base_fmt = base->drv->format_name;
 }
 }
-data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
-bdrv_set_backing_hd(job->bs, base);
+data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
+bdrv_set_backing_hd(bs, base);
 }
 
 g_free(s->backing_file_str);
@@ -88,7 +89,8 @@ static void coroutine_fn stream_run(void *opaque)
 {
 StreamBlockJob *s = opaque;
 StreamCompleteData *data;
-BlockDriverState *bs = s->common.bs;
+BlockBackend *blk = s->common.blk;
+BlockDriverState *bs = blk_bs(blk);
 BlockDriverState *base = s->base;
 int64_t sector_num = 0;
 int64_t end = -1;
@@ -159,7 +161,7 @@ wait:
 goto wait;
 }
 }
-ret = stream_populate(bs, sector_num, n, buf);
+ret = stream_populate(blk, sector_num, n, buf);
 }
 if (ret < 0) {
 BlockErrorAction action =
diff --git a/include/block/block.h b/include/block/block.h
index a4eb5be..39b5812 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -243,8 +243,6 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
 const void *buf, int count);
 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
 int nb_sectors, QEMUIOVector *qiov);
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
-int64_t sector_num, int nb_sectors, QEMUIOVector 

[Qemu-block] [PATCH 04/14] block: Convert block job core to BlockBackend

2016-05-04 Thread Kevin Wolf
This adds a new BlockBackend field to the BlockJob struct, which
coexists with the BlockDriverState while converting the individual jobs.

When creating a block job, a new BlockBackend is created on top of the
given BlockDriverState, and it is destroyed when the BlockJob ends. The
reference to the BDS is now held by the BlockBackend instead of calling
bdrv_ref/unref manually.

We have to be careful when we use bdrv_replace_in_backing_chain() in
block jobs because this changes the BDS that job->blk points to. At the
moment block jobs are too tightly coupled with their BDS, so that moving
a job to another BDS isn't easily possible; therefore, we need to just
manually undo this change afterwards.

Signed-off-by: Kevin Wolf 
---
 block/mirror.c   |  3 +++
 blockjob.c   | 37 -
 include/block/blockjob.h |  3 ++-
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index b9986d8..efca8fc 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -478,6 +478,9 @@ static void mirror_exit(BlockJob *job, void *opaque)
 bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
 }
 bdrv_replace_in_backing_chain(to_replace, s->target);
+/* We just changed the BDS the job BB refers to */
+blk_remove_bs(job->blk);
+blk_insert_bs(job->blk, src);
 }
 
 out:
diff --git a/blockjob.c b/blockjob.c
index e916b41..01b58c7 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -64,13 +64,17 @@ void *block_job_create(const BlockJobDriver *driver, 
BlockDriverState *bs,
int64_t speed, BlockCompletionFunc *cb,
void *opaque, Error **errp)
 {
+BlockBackend *blk;
 BlockJob *job;
 
 if (bs->job) {
 error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
 return NULL;
 }
-bdrv_ref(bs);
+
+blk = blk_new(_abort);
+blk_insert_bs(blk, bs);
+
 job = g_malloc0(driver->instance_size);
 error_setg(>blocker, "block device is in use by block job: %s",
BlockJobType_lookup[driver->job_type]);
@@ -80,6 +84,7 @@ void *block_job_create(const BlockJobDriver *driver, 
BlockDriverState *bs,
 job->driver= driver;
 job->id= g_strdup(bdrv_get_device_name(bs));
 job->bs= bs;
+job->blk   = blk;
 job->cb= cb;
 job->opaque= opaque;
 job->busy  = true;
@@ -110,9 +115,10 @@ void block_job_ref(BlockJob *job)
 void block_job_unref(BlockJob *job)
 {
 if (--job->refcnt == 0) {
-job->bs->job = NULL;
-bdrv_op_unblock_all(job->bs, job->blocker);
-bdrv_unref(job->bs);
+BlockDriverState *bs = blk_bs(job->blk);
+bs->job = NULL;
+bdrv_op_unblock_all(bs, job->blocker);
+blk_unref(job->blk);
 error_free(job->blocker);
 g_free(job->id);
 QLIST_REMOVE(job, job_list);
@@ -153,7 +159,7 @@ static void block_job_completed_txn_abort(BlockJob *job)
 txn->aborting = true;
 /* We are the first failed job. Cancel other jobs. */
 QLIST_FOREACH(other_job, >jobs, txn_list) {
-ctx = bdrv_get_aio_context(other_job->bs);
+ctx = blk_get_aio_context(other_job->blk);
 aio_context_acquire(ctx);
 }
 QLIST_FOREACH(other_job, >jobs, txn_list) {
@@ -170,7 +176,7 @@ static void block_job_completed_txn_abort(BlockJob *job)
 assert(other_job->completed);
 }
 QLIST_FOREACH_SAFE(other_job, >jobs, txn_list, next) {
-ctx = bdrv_get_aio_context(other_job->bs);
+ctx = blk_get_aio_context(other_job->blk);
 block_job_completed_single(other_job);
 aio_context_release(ctx);
 }
@@ -192,7 +198,7 @@ static void block_job_completed_txn_success(BlockJob *job)
 }
 /* We are the last completed job, commit the transaction. */
 QLIST_FOREACH_SAFE(other_job, >jobs, txn_list, next) {
-ctx = bdrv_get_aio_context(other_job->bs);
+ctx = blk_get_aio_context(other_job->blk);
 aio_context_acquire(ctx);
 assert(other_job->ret == 0);
 block_job_completed_single(other_job);
@@ -202,9 +208,7 @@ static void block_job_completed_txn_success(BlockJob *job)
 
 void block_job_completed(BlockJob *job, int ret)
 {
-BlockDriverState *bs = job->bs;
-
-assert(bs->job == job);
+assert(blk_bs(job->blk)->job == job);
 assert(!job->completed);
 job->completed = true;
 job->ret = ret;
@@ -295,11 +299,10 @@ static int block_job_finish_sync(BlockJob *job,
  void (*finish)(BlockJob *, Error **errp),
  Error **errp)
 {
-BlockDriverState *bs = job->bs;
 Error *local_err = NULL;
 int ret;
 
-assert(bs->job == job);
+assert(blk_bs(job->blk)->job == job);
 
 block_job_ref(job);
 finish(job, _err);
@@ -310,7 +313,7 @@ static int 

[Qemu-block] [PATCH 00/14] block jobs: Convert I/O to BlockBackend

2016-05-04 Thread Kevin Wolf
This series changes the block jobs so that they have a separate BlockBackend
for every node on which they perform I/O. This doesn't only get us closer to
the goal of only doing I/O through blk_*() from external sources (and block
jobs are considered external), but can also later be used to use BlockBackend
features for jobs: One could imagine replacing the very basic job throttling by
the already existing and more advanced BlockBackend throttling, or using the
BlockBackend error handling for block jobs.

This depends on the following queue of series on top of block-next:
* [PATCH v2 00/13] block: Move I/O throttling to BlockBackend
* [PATCH v2 0/9] block: Remove BlockDriverState.blk
* [PATCH v3 for-2.7 0/8] blockdev: (Nearly) free clean-up work

Alberto Garcia (1):
  block: keep a list of block jobs

John Snow (1):
  backup: Pack Notifier within BackupBlockJob

Kevin Wolf (12):
  block: Cancel jobs first in bdrv_close_all()
  block: Default to enabled write cache in blk_new()
  block: Convert block job core to BlockBackend
  stream: Use BlockBackend for I/O
  mirror: Allow target that already has a BlockBackend
  mirror: Use BlockBackend for I/O
  backup: Don't leak BackupBlockJob in error path
  backup: Remove bs parameter from backup_do_cow()
  block: Add blk_co_readv/writev()
  backup: Use BlockBackend for I/O
  commit: Use BlockBackend for I/O
  blockjob: Remove BlockJob.bs

 block.c|  23 +
 block/backup.c |  67 ++
 block/block-backend.c  |  58 ++-
 block/commit.c |  42 +++--
 block/io.c |  18 ---
 block/mirror.c | 105 ++---
 block/stream.c |  14 +++---
 blockdev.c |  12 +
 blockjob.c |  62 +---
 include/block/block.h  |   4 --
 include/block/blockjob.h   |  23 -
 include/sysemu/block-backend.h |   8 
 qemu-img.c |   2 +-
 tests/qemu-iotests/041 |  27 ---
 tests/qemu-iotests/041.out |   4 +-
 trace-events   |   8 +++-
 16 files changed, 261 insertions(+), 216 deletions(-)

-- 
1.8.3.1




[Qemu-block] [PATCH 03/14] block: Default to enabled write cache in blk_new()

2016-05-04 Thread Kevin Wolf
The existing users of the function are:

1. blk_new_open(), which already enabled the write cache
2. Some test cases that don't care about the setting
3. blockdev_init() for empty drives, where the cache mode is overridden
   with the value from the options when a medium is inserted

Therefore, this patch doesn't change the current behaviour. It will be
convenient, however, for additional users of blk_new() (like block
jobs) if the most sensible WCE setting is the default.

Signed-off-by: Kevin Wolf 
---
 block/block-backend.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 6732fb4..2932a58 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -125,6 +125,8 @@ BlockBackend *blk_new(Error **errp)
 
 blk = g_new0(BlockBackend, 1);
 blk->refcnt = 1;
+blk_set_enable_write_cache(blk, true);
+
 qemu_co_queue_init(>public.throttled_reqs[0]);
 qemu_co_queue_init(>public.throttled_reqs[1]);
 
@@ -165,7 +167,6 @@ BlockBackend *blk_new_open(const char *filename, const char 
*reference,
 return NULL;
 }
 
-blk_set_enable_write_cache(blk, true);
 blk->root = bdrv_root_attach_child(bs, "root", _root);
 blk->root->opaque = blk;
 
-- 
1.8.3.1




[Qemu-block] [PATCH 07/14] mirror: Use BlockBackend for I/O

2016-05-04 Thread Kevin Wolf
This changes the mirror block job to use the job's BlockBackend for
performing its I/O. job->bs isn't used by the mirroring code any more
afterwards.

Signed-off-by: Kevin Wolf 
---
 block/mirror.c | 75 +++---
 blockdev.c |  4 +---
 2 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 23aa10e..dc66340 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -35,7 +35,7 @@ typedef struct MirrorBuffer {
 typedef struct MirrorBlockJob {
 BlockJob common;
 RateLimit limit;
-BlockDriverState *target;
+BlockBackend *target;
 BlockDriverState *base;
 /* The name of the graph node to replace */
 char *replaces;
@@ -156,8 +156,8 @@ static void mirror_read_complete(void *opaque, int ret)
 mirror_iteration_done(op, ret);
 return;
 }
-bdrv_aio_writev(s->target, op->sector_num, >qiov, op->nb_sectors,
-mirror_write_complete, op);
+blk_aio_writev(s->target, op->sector_num, >qiov, op->nb_sectors,
+   mirror_write_complete, op);
 }
 
 static inline void mirror_clip_sectors(MirrorBlockJob *s,
@@ -185,7 +185,7 @@ static int mirror_cow_align(MirrorBlockJob *s,
 need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
   s->cow_bitmap);
 if (need_cow) {
-bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors,
+bdrv_round_to_clusters(blk_bs(s->target), *sector_num, *nb_sectors,
_sector_num, _nb_sectors);
 }
 
@@ -223,7 +223,7 @@ static inline void mirror_wait_for_io(MirrorBlockJob *s)
 static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
   int nb_sectors)
 {
-BlockDriverState *source = s->common.bs;
+BlockBackend *source = s->common.blk;
 int sectors_per_chunk, nb_chunks;
 int ret = nb_sectors;
 MirrorOp *op;
@@ -273,8 +273,8 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t 
sector_num,
 s->sectors_in_flight += nb_sectors;
 trace_mirror_one_iteration(s, sector_num, nb_sectors);
 
-bdrv_aio_readv(source, sector_num, >qiov, nb_sectors,
-   mirror_read_complete, op);
+blk_aio_readv(source, sector_num, >qiov, nb_sectors,
+  mirror_read_complete, op);
 return ret;
 }
 
@@ -295,18 +295,18 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
 s->in_flight++;
 s->sectors_in_flight += nb_sectors;
 if (is_discard) {
-bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
- mirror_write_complete, op);
+blk_aio_discard(s->target, sector_num, op->nb_sectors,
+mirror_write_complete, op);
 } else {
-bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
-  s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
-  mirror_write_complete, op);
+blk_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
+ s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
+ mirror_write_complete, op);
 }
 }
 
 static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
-BlockDriverState *source = s->common.bs;
+BlockDriverState *source = blk_bs(s->common.blk);
 int64_t sector_num, first_chunk;
 uint64_t delay_ns = 0;
 /* At least the first dirty chunk is mirrored in one iteration. */
@@ -383,7 +383,7 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
 int64_t target_sector_num;
 int target_nb_sectors;
-bdrv_round_to_clusters(s->target, sector_num, io_sectors,
+bdrv_round_to_clusters(blk_bs(s->target), sector_num, io_sectors,
_sector_num, _nb_sectors);
 if (target_sector_num == sector_num &&
 target_nb_sectors == io_sectors) {
@@ -448,7 +448,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
 MirrorExitData *data = opaque;
 AioContext *replace_aio_context = NULL;
-BlockDriverState *src = s->common.bs;
+BlockDriverState *src = blk_bs(s->common.blk);
+BlockDriverState *target_bs = blk_bs(s->target);
 
 /* Make sure that the source BDS doesn't go away before we called
  * block_job_completed(). */
@@ -460,20 +461,20 @@ static void mirror_exit(BlockJob *job, void *opaque)
 }
 
 if (s->should_complete && data->ret == 0) {
-BlockDriverState *to_replace = s->common.bs;
+BlockDriverState *to_replace = src;
 if (s->to_replace) {
 to_replace = s->to_replace;
 }
 
-if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
-bdrv_reopen(s->target, bdrv_get_flags(to_replace),