[Qemu-block] [PATCH v8] ssh: switch from libssh2 to libssh

2019-06-12 Thread Pino Toscano
Rewrite the implementation of the ssh block driver to use libssh instead
of libssh2.  The libssh library has various advantages over libssh2:
- easier API for authentication (for example for using ssh-agent)
- easier API for known_hosts handling
- supports newer types of keys in known_hosts

Use APIs/features available in libssh 0.8 conditionally, to support
older versions (which are not recommended though).

Adjust the various Docker/Travis scripts to use libssh when available
instead of libssh2.

Signed-off-by: Pino Toscano 
---

Changes from v7:
- #if HAVE_LIBSSH_0_8 -> #ifdef HAVE_LIBSSH_0_8
- ptrdiff_t -> size_t

Changes from v6:
- fixed few checkpatch style issues
- detect libssh 0.8 via symbol detection
- adjust travis/docker test material
- remove dead "default" case in a switch
- use variables for storing MIN() results
- adapt a documentation bit

Changes from v5:
- adapt to newer tracing APIs
- disable ssh compression (mimic what libssh2 does by default)
- use build time checks for libssh 0.8, and use newer APIs directly

Changes from v4:
- fix wrong usages of error_setg/session_error_setg/sftp_error_setg
- fix few return code checks
- remove now-unused parameters in few internal functions
- allow authentication with "none" method
- switch to unsigned int for the port number
- enable TCP_NODELAY on the socket
- fix one reference error message in iotest 207

Changes from v3:
- fix socket cleanup in connect_to_ssh()
- add comments about the socket cleanup
- improve the error reporting (closer to what was with libssh2)
- improve EOF detection on sftp_read()

Changes from v2:
- used again an own fd
- fixed co_yield() implementation

Changes from v1:
- fixed jumbo packets writing
- fixed missing 'err' assignment
- fixed commit message

 .travis.yml   |   4 +-
 block/Makefile.objs   |   6 +-
 block/ssh.c   | 622 +-
 block/trace-events|  14 +-
 configure |  65 +-
 docs/qemu-block-drivers.texi  |   2 +-
 .../dockerfiles/debian-win32-cross.docker |   1 -
 .../dockerfiles/debian-win64-cross.docker |   1 -
 tests/docker/dockerfiles/fedora.docker|   4 +-
 tests/docker/dockerfiles/ubuntu.docker|   2 +-
 tests/docker/dockerfiles/ubuntu1804.docker|   2 +-
 tests/qemu-iotests/207.out|   2 +-
 12 files changed, 374 insertions(+), 351 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index a08a7b7278..c70dd055ed 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -31,7 +31,7 @@ addons:
   - libseccomp-dev
   - libspice-protocol-dev
   - libspice-server-dev
-  - libssh2-1-dev
+  - libssh-dev
   - liburcu-dev
   - libusb-1.0-0-dev
   - libvte-2.91-dev
@@ -261,7 +261,7 @@ matrix:
 - libseccomp-dev
 - libspice-protocol-dev
 - libspice-server-dev
-- libssh2-1-dev
+- libssh-dev
 - liburcu-dev
 - libusb-1.0-0-dev
 - libvte-2.91-dev
diff --git a/block/Makefile.objs b/block/Makefile.objs
index ae11605c9f..bf01429dd5 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -31,7 +31,7 @@ block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 block-obj-$(CONFIG_VXHS) += vxhs.o
-block-obj-$(CONFIG_LIBSSH2) += ssh.o
+block-obj-$(CONFIG_LIBSSH) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
 block-obj-y += backup.o
@@ -52,8 +52,8 @@ rbd.o-libs := $(RBD_LIBS)
 gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
 gluster.o-libs := $(GLUSTERFS_LIBS)
 vxhs.o-libs:= $(VXHS_LIBS)
-ssh.o-cflags   := $(LIBSSH2_CFLAGS)
-ssh.o-libs := $(LIBSSH2_LIBS)
+ssh.o-cflags   := $(LIBSSH_CFLAGS)
+ssh.o-libs := $(LIBSSH_LIBS)
 block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
 block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
 dmg-bz2.o-libs := $(BZIP2_LIBS)
diff --git a/block/ssh.c b/block/ssh.c
index 6da7b9cbfe..fb458d4548 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -24,8 +24,8 @@
 
 #include "qemu/osdep.h"
 
-#include 
-#include 
+#include 
+#include 
 
 #include "block/block_int.h"
 #include "block/qdict.h"
@@ -46,13 +46,11 @@
 #include "trace.h"
 
 /*
- * TRACE_LIBSSH2= enables tracing in libssh2 itself.  Note
- * that this requires that libssh2 was specially compiled with the
- * `./configure --enable-debug' option, so most likely you will have
- * to compile it yourself.  The meaning of  is described
- * here: http://www.libssh2.org/libssh2_trace.html
+ * TRACE_LIBSSH= enables tracing in libssh itself.
+ * The meaning of  is described here:
+ * http://api.libssh.org/master/group__libssh__log.html
  */
-#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
+#define TRACE_LIBSSH  0 /* see: SSH_LOG_* */
 
 typedef struct BDRVSSHState {
 /*

Re: [Qemu-block] [Qemu-devel] [PATCH v2 09/11] monitor: Split out monitor/qmp.c

2019-06-12 Thread Markus Armbruster
Kevin Wolf  writes:

> Am 12.06.2019 um 15:11 hat Markus Armbruster geschrieben:
>> Kevin Wolf  writes:
>> 
>> > Move QMP infrastructure from monitor/misc.c to monitor/qmp.c. This is
>> > code that can be shared for all targets, so compile it only once.
>> 
>> Less code compiled per target, yay!
>> 
>> > The amount of function and particularly extern variables in
>> > monitor_int.h is probably a bit larger than it needs to be, but this way
>> > no non-trivial code modifications are needed. The interfaces between QMP
>> > and the monitor core can be cleaned up later.
>> 
>> That's okay.
>> 
>> I have to admit I naively expected the previous patch moved everything
>> to the new header we need in a header for splitting up monitor/misc.c.
>> How did you decide what to move to the header in which patch?
>
> The previous patch moved only the Monitor{HMP,QMP} data structures and
> their dependencies as I was sure these would be shared. Everything else
> was added to address linker complaints as I was going. I'll clarify this
> in the commit message of the previous patch.
>
>> > Signed-off-by: Kevin Wolf 
>> > Reviewed-by: Dr. David Alan Gilbert 
>> > ---
>> >  include/monitor/monitor.h |   1 +
>> >  monitor/monitor_int.h |  30 ++-
>> >  monitor/misc.c| 394 +
>> >  monitor/qmp.c | 404 ++
>> >  Makefile.objs |   1 +
>> >  monitor/Makefile.objs |   1 +
>> >  monitor/trace-events  |   4 +-
>> >  7 files changed, 448 insertions(+), 387 deletions(-)
>> >  create mode 100644 monitor/qmp.c
>> >
>> > diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
>> > index 1ba354f811..7bbab05320 100644
>> > --- a/include/monitor/monitor.h
>> > +++ b/include/monitor/monitor.h
>> > @@ -21,6 +21,7 @@ bool monitor_cur_is_qmp(void);
>> >  
>> >  void monitor_init_globals(void);
>> >  void monitor_init(Chardev *chr, int flags);
>> > +void monitor_init_qmp(Chardev *chr, int flags);
>> 
>> Why does this one go to the non-internal header?
>
> Most callers already know whether they want QMP or HMP, so they can just
> directly create the right thing instead of going through the
> monitor_init() wrapper.
>
> If you prefer, I can move it to the internal header, though. It's not
> called externally yet.

As is, monitor_init_qmp() and monitor_init_hmp() are awkward interfaces:
what if you pass MONITOR_USE_CONTROL to monitor_init_hmp()?

I can see just one call passing flags that aren't compile-time
constant.  I think a better interface would be

monitor_init_hmp(Chardev *chr);
monitor_init_qmp(Chardev *chr, bool pretty);

replacing monitor_init() entirely.  This is my first preference.

My (somewhat distant) second is hiding the awkward interfaces in the
internal header for now, and clean them up later.

Your choice.

>> >  void monitor_cleanup(void);
>> >  
>> >  int monitor_suspend(Monitor *mon);
>> > diff --git a/monitor/monitor_int.h b/monitor/monitor_int.h
>> > index 7122418955..4aabee54e1 100644
>> > --- a/monitor/monitor_int.h
>> > +++ b/monitor/monitor_int.h
>> > @@ -30,10 +30,11 @@
>> >  
>> >  #include "qapi/qmp/qdict.h"
>> >  #include "qapi/qmp/json-parser.h"
>> > -#include "qapi/qapi-commands.h"
>> > +#include "qapi/qmp/dispatch.h"
>> 
>> This part should be squashed into the previous patch.  You'll
>> additionally need qapi/qapi-types-misc.h for QMP_CAPABILITY__MAX there,
>> or keep monitor/monitor.h, even though you need it only here for
>> MONITOR_USE_CONTROL.
>
> Yes, already happened while addressing the comments you had for the
> header.
>
>> >  
>> >  #include "qemu/readline.h"
>> >  #include "chardev/char-fe.h"
>> > +#include "sysemu/iothread.h"
>> 
>> Perhaps IOThread should be typedef'ed in qemu/typedefs.h.  I'm not
>> asking you to do that.
>> 
>> >  
>> >  /*
>> >   * Supported types:
>> > @@ -145,4 +146,31 @@ typedef struct {
>> >  GQueue *qmp_requests;
>> >  } MonitorQMP;
>> >  
>> > +/**
>> > + * Is @mon a QMP monitor?
>> > + */
>> > +static inline bool monitor_is_qmp(const Monitor *mon)
>> > +{
>> > +return (mon->flags & MONITOR_USE_CONTROL);
>> > +}
>> > +
>> > +typedef QTAILQ_HEAD(MonitorList, Monitor) MonitorList;
>> > +extern IOThread *mon_iothread;
>> > +extern QEMUBH *qmp_dispatcher_bh;
>> > +extern QmpCommandList qmp_commands, qmp_cap_negotiation_commands;
>> > +extern QemuMutex monitor_lock;
>> > +extern MonitorList mon_list;
>> > +extern int mon_refcount;
>> > +
>> > +int monitor_puts(Monitor *mon, const char *str);
>> > +void monitor_data_init(Monitor *mon, int flags, bool skip_flush,
>> > +   bool use_io_thread);
>> > +int monitor_can_read(void *opaque);
>> > +void monitor_list_append(Monitor *mon);
>> > +void monitor_fdsets_cleanup(void);
>> > +
>> > +void qmp_send_response(MonitorQMP *mon, const QDict *rsp);
>> > +void monitor_data_destroy_qmp(MonitorQMP *mon);
>> > +void monitor_qmp_bh_dispatcher(void *data);
>> > +
>> >  #endif
>> 
>> I 

Re: [Qemu-block] [Qemu-devel] [PATCH v2 10/11] monitor: Split out monitor/hmp.c

2019-06-12 Thread Markus Armbruster
Kevin Wolf  writes:

> Am 12.06.2019 um 15:17 hat Markus Armbruster geschrieben:
>> Kevin Wolf  writes:
>> 
>> > Move HMP infrastructure from monitor/misc.c to monitor/hmp.c. This is
>> > code that can be shared for all targets, so compile it only once.
>> >
>> > The amount of function and particularly extern variables in
>> > monitor_int.h is probably a bit larger than it needs to be, but this way
>> > no non-trivial code modifications are needed. The interfaces between HMP
>> > and the monitor core can be cleaned up later.
>> >
>> > Signed-off-by: Kevin Wolf 
>> > Reviewed-by: Dr. David Alan Gilbert 
>> > ---
>> >  include/monitor/monitor.h |1 +
>> >  monitor/monitor_int.h |   31 +
>> >  monitor/hmp.c | 1387 +
>> >  monitor/misc.c| 1338 +--
>> >  monitor/Makefile.objs |2 +-
>> >  monitor/trace-events  |4 +-
>> >  6 files changed, 1429 insertions(+), 1334 deletions(-)
>> >  create mode 100644 monitor/hmp.c
>> >
>> > diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
>> > index 7bbab05320..8547529e49 100644
>> > --- a/include/monitor/monitor.h
>> > +++ b/include/monitor/monitor.h
>> > @@ -22,6 +22,7 @@ bool monitor_cur_is_qmp(void);
>> >  void monitor_init_globals(void);
>> >  void monitor_init(Chardev *chr, int flags);
>> >  void monitor_init_qmp(Chardev *chr, int flags);
>> > +void monitor_init_hmp(Chardev *chr, int flags);
>> >  void monitor_cleanup(void);
>> >  
>> >  int monitor_suspend(Monitor *mon);
>> > diff --git a/monitor/monitor_int.h b/monitor/monitor_int.h
>> > index 4aabee54e1..88eaed9c5c 100644
>> > --- a/monitor/monitor_int.h
>> > +++ b/monitor/monitor_int.h
>> > @@ -27,6 +27,7 @@
>> >  
>> >  #include "qemu-common.h"
>> >  #include "monitor/monitor.h"
>> > +#include "qemu/cutils.h"
>> >  
>> >  #include "qapi/qmp/qdict.h"
>> >  #include "qapi/qmp/json-parser.h"
>> > @@ -154,6 +155,29 @@ static inline bool monitor_is_qmp(const Monitor *mon)
>> >  return (mon->flags & MONITOR_USE_CONTROL);
>> >  }
>> >  
>> > +/**
>> > + * Is @name in the '|' separated list of names @list?
>> > + */
>> > +static inline int compare_cmd(const char *name, const char *list)
>> > +{
>> > +const char *p, *pstart;
>> > +int len;
>> > +len = strlen(name);
>> > +p = list;
>> > +for (;;) {
>> > +pstart = p;
>> > +p = qemu_strchrnul(p, '|');
>> > +if ((p - pstart) == len && !memcmp(pstart, name, len)) {
>> > +return 1;
>> > +}
>> > +if (*p == '\0') {
>> > +break;
>> > +}
>> > +p++;
>> > +}
>> > +return 0;
>> > +}
>> > +
>> 
>> What's the justification for inline?
>
> It seemed small enough, but maybe it isn't (it has also grown by two
> lines after fixing the coding style). I can leave it in misc.c and just
> make it public.

Yes, please.

> I'd just need a more specific name than compare_cmd() to make it public.

Arguably a good idea even for static inlines in an internal header :)

>> >  typedef QTAILQ_HEAD(MonitorList, Monitor) MonitorList;
>> >  extern IOThread *mon_iothread;
>> >  extern QEMUBH *qmp_dispatcher_bh;
>> > @@ -162,6 +186,8 @@ extern QemuMutex monitor_lock;
>> >  extern MonitorList mon_list;
>> >  extern int mon_refcount;
>> >  
>> > +extern mon_cmd_t mon_cmds[];
>> > +
>> 
>> Any particular reason for not moving this one to hmp.c, along with
>> info_cmds?  Question, not demand :)
>
> Yes, it's not part of the core infrastructure, but contains commands
> specific to the system emulator. If a tool were to use HMP, it would
> have to provide its own command tables.
>
> If we ever create a monitor/hmp-sysemu.c or something like it, this
> would be a good place for the tables.

I'm very much in favor of splitting up monitor/misc.c further.  It's
okay to leave that for later, of course.



[Qemu-block] [PATCH] iotests: Fix 129

2019-06-12 Thread Max Reitz
Throttling on the BB have not affected block jobs in a while, so 129
is prone to break (at least on tmpfs with high system load).  We can
fix the problem by running the job from a throttle node.

Signed-off-by: Max Reitz 
---
Depends on "[PATCH v5 00/42] block: Deal with filters".

Based-on: <20190612221004.2317-1-mre...@redhat.com>

Yes, I know, that is a long dependency chain.  For now, I just want to
show that this patch exists.
---
 tests/qemu-iotests/129 | 33 +
 1 file changed, 9 insertions(+), 24 deletions(-)

diff --git a/tests/qemu-iotests/129 b/tests/qemu-iotests/129
index 9e87e1c8d9..c729e2bde5 100755
--- a/tests/qemu-iotests/129
+++ b/tests/qemu-iotests/129
@@ -31,52 +31,37 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
 iotests.qemu_img('create', '-f', iotests.imgfmt, self.base_img, "1G")
 iotests.qemu_img('create', '-f', iotests.imgfmt, self.test_img, "-b", 
self.base_img)
 iotests.qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x5d 1M 128M', 
self.test_img)
-self.vm = iotests.VM().add_drive(self.test_img)
+self.vm = iotests.VM()
+self.vm.add_object('throttle-group,id=tg0,x-bps-total=1024')
+self.vm.add_drive(None, 
'driver=throttle,throttle-group=tg0,file.driver=%s,file.file.filename=%s' % 
(iotests.imgfmt, self.test_img))
 self.vm.launch()
 
 def tearDown(self):
-params = {"device": "drive0",
-  "bps": 0,
-  "bps_rd": 0,
-  "bps_wr": 0,
-  "iops": 0,
-  "iops_rd": 0,
-  "iops_wr": 0,
- }
-result = self.vm.qmp("block_set_io_throttle", conv_keys=False,
- **params)
 self.vm.shutdown()
 
 def do_test_stop(self, cmd, **args):
 """Test 'stop' while block job is running on a throttled drive.
 The 'stop' command shouldn't drain the job"""
-params = {"device": "drive0",
-  "bps": 1024,
-  "bps_rd": 0,
-  "bps_wr": 0,
-  "iops": 0,
-  "iops_rd": 0,
-  "iops_wr": 0,
- }
-result = self.vm.qmp("block_set_io_throttle", conv_keys=False,
- **params)
-self.assert_qmp(result, 'return', {})
 result = self.vm.qmp(cmd, **args)
 self.assert_qmp(result, 'return', {})
+
 result = self.vm.qmp("stop")
 self.assert_qmp(result, 'return', {})
 result = self.vm.qmp("query-block-jobs")
+
 self.assert_qmp(result, 'return[0]/busy', True)
 self.assert_qmp(result, 'return[0]/ready', False)
 
+self.vm.qmp("block-job-cancel", device="drive0", force=True)
+
 def test_drive_mirror(self):
 self.do_test_stop("drive-mirror", device="drive0",
-  target=self.target_img,
+  target=self.target_img, format=iotests.imgfmt,
   sync="full")
 
 def test_drive_backup(self):
 self.do_test_stop("drive-backup", device="drive0",
-  target=self.target_img,
+  target=self.target_img, format=iotests.imgfmt,
   sync="full")
 
 def test_block_commit(self):
-- 
2.21.0




[Qemu-block] [PATCH v5 39/42] iotests: Add filter commit test cases

2019-06-12 Thread Max Reitz
This patch adds some tests on how commit copes with filter nodes.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/040 | 177 +
 tests/qemu-iotests/040.out |   4 +-
 2 files changed, 179 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index aa0b1847e3..31c2a8da3b 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -432,5 +432,182 @@ class TestReopenOverlay(ImageCommitTestCase):
 def test_reopen_overlay(self):
 self.run_commit_test(self.img1, self.img0)
 
+class TestCommitWithFilters(iotests.QMPTestCase):
+img0 = os.path.join(iotests.test_dir, '0.img')
+img1 = os.path.join(iotests.test_dir, '1.img')
+img2 = os.path.join(iotests.test_dir, '2.img')
+img3 = os.path.join(iotests.test_dir, '3.img')
+
+def setUp(self):
+qemu_img('create', '-f', iotests.imgfmt, self.img0, '64M')
+qemu_img('create', '-f', iotests.imgfmt, self.img1, '64M')
+qemu_img('create', '-f', iotests.imgfmt, self.img2, '64M')
+qemu_img('create', '-f', iotests.imgfmt, self.img3, '64M')
+
+qemu_io('-f', iotests.imgfmt, '-c', 'write -P 1 0M 1M', self.img0)
+qemu_io('-f', iotests.imgfmt, '-c', 'write -P 2 1M 1M', self.img1)
+qemu_io('-f', iotests.imgfmt, '-c', 'write -P 3 2M 1M', self.img2)
+qemu_io('-f', iotests.imgfmt, '-c', 'write -P 4 3M 1M', self.img3)
+
+# Distributions of the patterns in the files; this is checked
+# by tearDown() and should be changed by the test cases as is
+# necessary
+self.pattern_files = [self.img0, self.img1, self.img2, self.img3]
+
+self.vm = iotests.VM()
+self.vm.launch()
+self.has_quit = False
+
+result = self.vm.qmp('object-add', qom_type='throttle-group', id='tg')
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-add', **{
+'node-name': 'top-filter',
+'driver': 'throttle',
+'throttle-group': 'tg',
+'file': {
+'node-name': 'cow-3',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': self.img3
+},
+'backing': {
+'node-name': 'cow-2',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': self.img2
+},
+'backing': {
+'node-name': 'cow-1',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': self.img1
+},
+'backing': {
+'node-name': 'bottom-filter',
+'driver': 'throttle',
+'throttle-group': 'tg',
+'file': {
+'node-name': 'cow-0',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': self.img0
+}
+}
+}
+}
+}
+}
+})
+self.assert_qmp(result, 'return', {})
+
+def tearDown(self):
+self.vm.shutdown(has_quit=self.has_quit)
+
+for index in range(len(self.pattern_files)):
+result = qemu_io('-f', iotests.imgfmt,
+ '-c', 'read -P %i %iM 1M' % (index + 1, index),
+ self.pattern_files[index])
+self.assertFalse('Pattern verification failed' in result)
+
+os.remove(self.img3)
+os.remove(self.img2)
+os.remove(self.img1)
+os.remove(self.img0)
+
+# Filters make for funny filenames, so we cannot just use
+# self.imgX to get them
+def get_filename(self, node):
+return self.vm.node_info(node)['image']['filename']
+
+def test_filterless_commit(self):
+self.assert_no_active_block_jobs()
+result = self.vm.qmp('block-commit',
+ job_id='commit',
+ device='top-filter',
+ top_node='cow-2',
+ base_node='cow-1')
+self.assert_qmp(result, 'return', {})
+self.wait_until_completed(drive='commit')
+
+self.assertIsNotNone(self.vm.node_info('cow-3'))
+self.assertIsNone(self.vm.node_info('cow-2'))
+self.assertIsNo

[Qemu-block] [PATCH v5 36/42] iotests: Add tests for mirror @replaces loops

2019-06-12 Thread Max Reitz
This adds two tests for cases where our old check_to_replace_node()
function failed to detect that executing this job with these parameters
would result in a cyclic graph.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
---
 tests/qemu-iotests/041 | 124 +
 tests/qemu-iotests/041.out |   4 +-
 2 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index 26bf1701eb..0c1432f189 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -1067,5 +1067,129 @@ class TestOrphanedSource(iotests.QMPTestCase):
  target='dest-ro')
 self.assert_qmp(result, 'error/class', 'GenericError')
 
+# Various tests for the @replaces option (independent of quorum)
+class TestReplaces(iotests.QMPTestCase):
+def setUp(self):
+self.vm = iotests.VM()
+self.vm.launch()
+
+def tearDown(self):
+self.vm.shutdown()
+
+def test_drive_mirror_loop(self):
+qemu_img('create', '-f', iotests.imgfmt, test_img, '1M')
+
+result = self.vm.qmp('object-add', qom_type='throttle-group', id='tg')
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-add', **{
+'node-name': 'source',
+'driver': 'throttle',
+'throttle-group': 'tg',
+'file': {
+'node-name': 'filtered',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': test_img
+}
+}
+})
+self.assert_qmp(result, 'return', {})
+
+# Mirror from @source to @target in sync=none, so that @source
+# will be @target's backing file; but replace @filtered.
+# Then, @target's backing file will be @source, whose backing
+# file is now @target instead of @filtered.  That is a loop.
+# (But apart from the loop, replacing @filtered instead of
+# @source is fine, because both are just filtered versions of
+# each other.)
+result = self.vm.qmp('drive-mirror',
+ job_id='mirror',
+ device='source',
+ target=target_img,
+ format=iotests.imgfmt,
+ node_name='target',
+ sync='none',
+ replaces='filtered')
+if 'error' in result:
+# This is the correct result
+self.assert_qmp(result, 'error/class', 'GenericError')
+else:
+# This is wrong, but let's run it to the bitter conclusion
+self.complete_and_wait(drive='mirror')
+# Fail for good measure, although qemu should have crashed
+# anyway
+self.fail('Loop creation was successful')
+
+os.remove(test_img)
+try:
+os.remove(target_img)
+except OSError:
+pass
+
+def test_blockdev_mirror_loop(self):
+qemu_img('create', '-f', iotests.imgfmt, test_img, '1M')
+qemu_img('create', '-f', iotests.imgfmt, target_img, '1M')
+
+result = self.vm.qmp('object-add', qom_type='throttle-group', id='tg')
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-add', **{
+'node-name': 'source',
+'driver': 'throttle',
+'throttle-group': 'tg',
+'file': {
+'node-name': 'middle',
+'driver': 'throttle',
+'throttle-group': 'tg',
+'file': {
+'node-name': 'bottom',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': test_img
+}
+}
+}
+})
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-add', **{
+'node-name': 'target',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': target_img
+},
+'backing': 'middle'
+})
+
+# Mirror from @source to @target.  With blockdev-mirror, the
+# current (old) backing file is retained (which is @middle).
+# By replacing @bottom, @middle's file will be @target, whose
+# backing file is @middle again.  That is a loop.
+# (But apart from the loop, replacing @bottom instead of
+# @source is fine, because both are just filte

[Qemu-block] [PATCH v5 34/42] block: Inline bdrv_co_block_status_from_*()

2019-06-12 Thread Max Reitz
With bdrv_filtered_rw_bs(), we can easily handle this default filter
behavior in bdrv_co_block_status().

blkdebug wants to have an additional assertion, so it keeps its own
implementation, except bdrv_co_block_status_from_file() needs to be
inlined there.

Suggested-by: Eric Blake 
Signed-off-by: Max Reitz 
---
 include/block/block_int.h | 22 -
 block/blkdebug.c  |  7 --
 block/blklogwrites.c  |  1 -
 block/commit.c|  1 -
 block/copy-on-read.c  |  2 --
 block/io.c| 51 +--
 block/mirror.c|  1 -
 block/throttle.c  |  1 -
 8 files changed, 22 insertions(+), 64 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index cfefb00104..431fa38ea0 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1203,28 +1203,6 @@ void bdrv_format_default_perms(BlockDriverState *bs, 
BdrvChild *c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared);
 
-/*
- * Default implementation for drivers to pass bdrv_co_block_status() to
- * their file.
- */
-int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
-bool want_zero,
-int64_t offset,
-int64_t bytes,
-int64_t *pnum,
-int64_t *map,
-BlockDriverState **file);
-/*
- * Default implementation for drivers to pass bdrv_co_block_status() to
- * their backing file.
- */
-int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
-   bool want_zero,
-   int64_t offset,
-   int64_t bytes,
-   int64_t *pnum,
-   int64_t *map,
-   BlockDriverState **file);
 const char *bdrv_get_parent_name(const BlockDriverState *bs);
 void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
 bool blk_dev_has_removable_media(BlockBackend *blk);
diff --git a/block/blkdebug.c b/block/blkdebug.c
index efd9441625..7950ae729c 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -637,8 +637,11 @@ static int coroutine_fn 
blkdebug_co_block_status(BlockDriverState *bs,
  BlockDriverState **file)
 {
 assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
-return bdrv_co_block_status_from_file(bs, want_zero, offset, bytes,
-  pnum, map, file);
+assert(bs->file && bs->file->bs);
+*pnum = bytes;
+*map = offset;
+*file = bs->file->bs;
+return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
 }
 
 static void blkdebug_close(BlockDriverState *bs)
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index eb2b4901a5..1eb4a5c613 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -518,7 +518,6 @@ static BlockDriver bdrv_blk_log_writes = {
 .bdrv_co_pwrite_zeroes  = blk_log_writes_co_pwrite_zeroes,
 .bdrv_co_flush_to_disk  = blk_log_writes_co_flush_to_disk,
 .bdrv_co_pdiscard   = blk_log_writes_co_pdiscard,
-.bdrv_co_block_status   = bdrv_co_block_status_from_file,
 
 .is_filter  = true,
 .strong_runtime_opts= blk_log_writes_strong_runtime_opts,
diff --git a/block/commit.c b/block/commit.c
index ec5a8c8edf..a5b58eadeb 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -257,7 +257,6 @@ static void bdrv_commit_top_child_perm(BlockDriverState 
*bs, BdrvChild *c,
 static BlockDriver bdrv_commit_top = {
 .format_name= "commit_top",
 .bdrv_co_preadv = bdrv_commit_top_preadv,
-.bdrv_co_block_status   = bdrv_co_block_status_from_backing,
 .bdrv_refresh_filename  = bdrv_commit_top_refresh_filename,
 .bdrv_child_perm= bdrv_commit_top_child_perm,
 
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 88e1c1f538..5a292de000 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -161,8 +161,6 @@ static BlockDriver bdrv_copy_on_read = {
 .bdrv_eject = cor_eject,
 .bdrv_lock_medium   = cor_lock_medium,
 
-.bdrv_co_block_status   = bdrv_co_block_status_from_file,
-
 .bdrv_recurse_is_first_non_filter   = cor_recurse_is_first_non_filter,
 
 .has_variable_length= true,
diff --git a/block/io.c b/block/io.c
index 14f99e1c00..0a832e30a3 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1998,36 +1998,6 @@ typedef struct BdrvCoBlockStatusData {

Re: [Qemu-block] [PATCH 0/2] vl: Drain before (block) job cancel when quitting

2019-06-12 Thread Max Reitz
On 13.06.19 00:08, Max Reitz wrote:
> Quitting qemu should lead to qemu exiting pretty much immediately.  That
> means if you have a block job running on a throttled block node, the
> node should ignore its throttling and the job should be cancelled
> immediately.
> 
> Unfortunately, that is not what happens.  Currently, the node will be
> drained (with a bdrv_drain_all()), and then again unquiesced (because
> bdrv_drain_all() ends).  Then, the block job is cancelled; but at this
> point, the node is no longer drained, so it will block, as it befits a
> throttling node.
> 
> To fix this issue, we have to keep all nodes drained while we cancel all
> block jobs when quitting qemu.  This will make the throttle node ignore
> its throttling and thus let the block job cancel immediately.

I forgot to mention: This series depends on “block: Keep track of parent
quiescing”, specifically patch 3 (“iotests: Add @has_quit to
vm.shutdown()”).

Based-on: <20190605161118.14544-1-mre...@redhat.com>

Max

> Max Reitz (2):
>   vl: Drain before (block) job cancel when quitting
>   iotests: Test quitting with job on throttled node
> 
>  vl.c   | 11 
>  tests/qemu-iotests/218 | 55 --
>  tests/qemu-iotests/218.out |  4 +++
>  3 files changed, 68 insertions(+), 2 deletions(-)
> 




signature.asc
Description: OpenPGP digital signature


[Qemu-block] [PATCH v5 40/42] iotests: Add filter mirror test cases

2019-06-12 Thread Max Reitz
This patch adds some test cases how mirroring relates to filters.  One
of them tests what happens when you mirror off a filtered COW node, two
others use the mirror filter node as basically our only example of an
implicitly created filter node so far (besides the commit filter).

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/041 | 146 -
 tests/qemu-iotests/041.out |   4 +-
 2 files changed, 147 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index 0c1432f189..c2b5299f62 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -20,8 +20,9 @@
 
 import time
 import os
+import json
 import iotests
-from iotests import qemu_img, qemu_io
+from iotests import qemu_img, qemu_img_pipe, qemu_io
 
 backing_img = os.path.join(iotests.test_dir, 'backing.img')
 target_backing_img = os.path.join(iotests.test_dir, 'target-backing.img')
@@ -1191,5 +1192,148 @@ class TestReplaces(iotests.QMPTestCase):
 os.remove(test_img)
 os.remove(target_img)
 
+# Tests for mirror with filters (and how the mirror filter behaves, as
+# an example for an implicit filter)
+class TestFilters(iotests.QMPTestCase):
+def setUp(self):
+qemu_img('create', '-f', iotests.imgfmt, backing_img, '1M')
+qemu_img('create', '-f', iotests.imgfmt, '-b', backing_img, test_img)
+qemu_img('create', '-f', iotests.imgfmt, '-b', backing_img, target_img)
+
+qemu_io('-c', 'write -P 1 0 512k', backing_img)
+qemu_io('-c', 'write -P 2 512k 512k', test_img)
+
+self.vm = iotests.VM()
+self.vm.launch()
+
+result = self.vm.qmp('blockdev-add', **{
+'node-name': 'target',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': target_img
+},
+'backing': None
+})
+self.assert_qmp(result, 'return', {})
+
+self.filterless_chain = {
+'node-name': 'source',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': test_img
+},
+'backing': {
+'node-name': 'backing',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': backing_img
+}
+}
+}
+
+def tearDown(self):
+self.vm.shutdown()
+
+os.remove(test_img)
+os.remove(target_img)
+os.remove(backing_img)
+
+def test_cor(self):
+result = self.vm.qmp('blockdev-add', **{
+'node-name': 'filter',
+'driver': 'copy-on-read',
+'file': self.filterless_chain
+})
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-mirror',
+ job_id='mirror',
+ device='filter',
+ target='target',
+ sync='top')
+self.assert_qmp(result, 'return', {})
+
+self.complete_and_wait('mirror')
+
+self.vm.qmp('blockdev-del', node_name='target')
+
+target_map = qemu_img_pipe('map', '--output=json', target_img)
+target_map = json.loads(target_map)
+
+assert target_map[0]['start'] == 0
+assert target_map[0]['length'] == 512 * 1024
+assert target_map[0]['depth'] == 1
+
+assert target_map[1]['start'] == 512 * 1024
+assert target_map[1]['length'] == 512 * 1024
+assert target_map[1]['depth'] == 0
+
+def test_implicit_mirror_filter(self):
+result = self.vm.qmp('blockdev-add', **self.filterless_chain)
+self.assert_qmp(result, 'return', {})
+
+# We need this so we can query from above the mirror node
+result = self.vm.qmp('device_add',
+ driver='virtio-blk',
+ id='virtio',
+ bus='pci.0',
+ drive='source')
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-mirror',
+ job_id='mirror',
+ device='source',
+ target='target',
+ sync='top')
+self.assert_qmp(result, 'return', {})
+
+# The mirror filter is now an implicit node, so it should be
+# invisible when querying the backing chain
+device_info = self.vm.qmp('query-block')['return'][0]
+assert device_info['qdev'] == 
'/machine/peripheral/vir

[Qemu-block] [PATCH v5 28/42] stream: Deal with filters

2019-06-12 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 qapi/block-core.json |  4 
 block/stream.c   | 23 +++
 blockdev.c   |  2 +-
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index df52a90736..a3c5298cf5 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2518,6 +2518,10 @@
 # On successful completion the image file is updated to drop the backing file
 # and the BLOCK_JOB_COMPLETED event is emitted.
 #
+# In case @device is a filter node, block-stream modifies the first non-filter
+# overlay node below it to point to base's backing node (or NULL if @base was
+# not specified) instead of modifying @device itself.
+#
 # @job-id: identifier for the newly-created block job. If
 #  omitted, the device name will be used. (Since 2.7)
 #
diff --git a/block/stream.c b/block/stream.c
index 1a906fd860..9271e1821a 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -63,6 +63,7 @@ static int stream_prepare(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = &s->common;
 BlockDriverState *bs = blk_bs(bjob->blk);
+BlockDriverState *unfiltered_bs = bdrv_skip_rw_filters(bs);
 BlockDriverState *base = s->base;
 Error *local_err = NULL;
 int ret = 0;
@@ -70,7 +71,7 @@ static int stream_prepare(Job *job)
 bdrv_unfreeze_backing_chain(bs, base);
 s->chain_frozen = false;
 
-if (bs->backing) {
+if (bdrv_filtered_cow_child(unfiltered_bs)) {
 const char *base_id = NULL, *base_fmt = NULL;
 if (base) {
 base_id = s->backing_file_str;
@@ -78,8 +79,8 @@ static int stream_prepare(Job *job)
 base_fmt = base->drv->format_name;
 }
 }
-ret = bdrv_change_backing_file(bs, base_id, base_fmt);
-bdrv_set_backing_hd(bs, base, &local_err);
+ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt);
+bdrv_set_backing_hd(unfiltered_bs, base, &local_err);
 if (local_err) {
 error_report_err(local_err);
 return -EPERM;
@@ -110,7 +111,9 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
 BlockDriverState *bs = blk_bs(blk);
+BlockDriverState *unfiltered_bs = bdrv_skip_rw_filters(bs);
 BlockDriverState *base = s->base;
+BlockDriverState *filtered_base;
 int64_t len;
 int64_t offset = 0;
 uint64_t delay_ns = 0;
@@ -119,10 +122,12 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 int64_t n = 0; /* bytes */
 void *buf;
 
-if (!bs->backing) {
+if (!bdrv_filtered_cow_child(unfiltered_bs)) {
 goto out;
 }
 
+filtered_base = bdrv_filtered_cow_bs(bdrv_find_overlay(bs, base));
+
 len = bdrv_getlength(bs);
 if (len < 0) {
 ret = len;
@@ -154,14 +159,14 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 
 copy = false;
 
-ret = bdrv_is_allocated(bs, offset, STREAM_BUFFER_SIZE, &n);
+ret = bdrv_is_allocated(unfiltered_bs, offset, STREAM_BUFFER_SIZE, &n);
 if (ret == 1) {
 /* Allocated in the top, no need to copy.  */
 } else if (ret >= 0) {
 /* Copy if allocated in the intermediate images.  Limit to the
  * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).  */
-ret = bdrv_is_allocated_above(backing_bs(bs), base,
-  offset, n, &n);
+ret = bdrv_is_allocated_above(bdrv_filtered_cow_bs(unfiltered_bs),
+  filtered_base, offset, n, &n);
 
 /* Finish early if end of backing file has been reached */
 if (ret == 0 && n == 0) {
@@ -266,7 +271,9 @@ void stream_start(const char *job_id, BlockDriverState *bs,
  * disappear from the chain after this operation. The streaming job reads
  * every block only once, assuming that it doesn't change, so block writes
  * and resizes. */
-for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) 
{
+for (iter = bdrv_filtered_bs(bs); iter && iter != base;
+ iter = bdrv_filtered_bs(iter))
+{
 block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
&error_abort);
diff --git a/blockdev.c b/blockdev.c
index 5036d064d4..a464cabf9e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3235,7 +3235,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 
 /* Check for op blockers in the whole chain between bs and base */
-for (iter = bs; iter && iter != base_bs; iter = backing_bs(iter)) {
+for (iter = bs; iter && iter != base_bs; iter = bdrv_filtered_bs(iter)) {
 if (bdrv_op_is_blocked(i

Re: [Qemu-block] [PATCH v5 22/42] block: Use CAFs in bdrv_get_allocated_file_size()

2019-06-12 Thread Max Reitz
On 13.06.19 00:09, Max Reitz wrote:
> Signed-off-by: Max Reitz 
> ---
>  block.c | 26 --
>  1 file changed, 24 insertions(+), 2 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 11b7ba8cf6..856d9b58be 100644
> --- a/block.c
> +++ b/block.c
> @@ -4511,15 +4511,37 @@ exit:
>  int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
>  {
>  BlockDriver *drv = bs->drv;
> +BlockDriverState *storage_bs, *metadata_bs;
> +
>  if (!drv) {
>  return -ENOMEDIUM;
>  }
> +
>  if (drv->bdrv_get_allocated_file_size) {
>  return drv->bdrv_get_allocated_file_size(bs);
>  }
> -if (bs->file) {
> -return bdrv_get_allocated_file_size(bs->file->bs);
> +
> +storage_bs = bdrv_storage_bs(bs);
> +metadata_bs = bdrv_metadata_bs(bs);
> +
> +if (storage_bs) {
> +int64_t data_size, metadata_size = 0;
> +
> +data_size = bdrv_get_allocated_file_size(storage_bs);
> +if (data_size < 0) {
> +return data_size;
> +}
> +
> +if (storage_bs != metadata_bs) {

Let this be a lesson to you: If you run all tests, then prepare to send
the series and just change “a minor thing”, you really should rerun the
tests.  Well, I should have, at least.

That should read “if (metadata_bs && storage_bs != metadata_bs) {”.

(Damn.  Why did I only remember to do so literally five minutes after
sending the series?)

Max

> +metadata_size = bdrv_get_allocated_file_size(metadata_bs);
> +if (metadata_size < 0) {
> +return metadata_size;
> +}
> +}
> +
> +return data_size + metadata_size;
>  }
> +
>  return -ENOTSUP;
>  }
>  
> 




signature.asc
Description: OpenPGP digital signature


[Qemu-block] [PATCH v5 42/42] iotests: Test committing to overridden backing

2019-06-12 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/040 | 61 ++
 tests/qemu-iotests/040.out |  4 +--
 2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index 31c2a8da3b..5cbbd30ee3 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -609,5 +609,66 @@ class TestCommitWithFilters(iotests.QMPTestCase):
 # 3 has been comitted into 2
 self.pattern_files[3] = self.img2
 
+class TestCommitWithOverriddenBacking(iotests.QMPTestCase):
+img_base_a = os.path.join(iotests.test_dir, 'base_a.img')
+img_base_b = os.path.join(iotests.test_dir, 'base_b.img')
+img_top = os.path.join(iotests.test_dir, 'top.img')
+
+def setUp(self):
+qemu_img('create', '-f', iotests.imgfmt, self.img_base_a, '1M')
+qemu_img('create', '-f', iotests.imgfmt, self.img_base_b, '1M')
+qemu_img('create', '-f', iotests.imgfmt, '-b', self.img_base_a, \
+ self.img_top)
+
+self.vm = iotests.VM()
+self.vm.launch()
+
+# Use base_b instead of base_a as the backing of top
+result = self.vm.qmp('blockdev-add', **{
+'node-name': 'top',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': self.img_top
+},
+'backing': {
+'node-name': 'base',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': self.img_base_b
+}
+}
+})
+self.assert_qmp(result, 'return', {})
+
+def tearDown(self):
+self.vm.shutdown()
+os.remove(self.img_top)
+os.remove(self.img_base_a)
+os.remove(self.img_base_b)
+
+def test_commit_to_a(self):
+# Try committing to base_a (which should fail, as top's
+# backing image is base_b instead)
+result = self.vm.qmp('block-commit',
+ job_id='commit',
+ device='top',
+ base=self.img_base_a)
+self.assert_qmp(result, 'error/class', 'GenericError')
+
+def test_commit_to_b(self):
+# Try committing to base_b (which should work, since that is
+# actually top's backing image)
+result = self.vm.qmp('block-commit',
+ job_id='commit',
+ device='top',
+ base=self.img_base_b)
+self.assert_qmp(result, 'return', {})
+
+self.vm.event_wait('BLOCK_JOB_READY')
+self.vm.qmp('block-job-complete', device='commit')
+self.vm.event_wait('BLOCK_JOB_COMPLETED')
+
 if __name__ == '__main__':
 iotests.main(supported_fmts=['qcow2', 'qed'])
diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out
index fe58934d7a..499af0e2ff 100644
--- a/tests/qemu-iotests/040.out
+++ b/tests/qemu-iotests/040.out
@@ -1,5 +1,5 @@
-...
+.
 --
-Ran 51 tests
+Ran 53 tests
 
 OK
-- 
2.21.0




[Qemu-block] [PATCH v5 41/42] iotests: Add test for commit in sub directory

2019-06-12 Thread Max Reitz
Add a test for committing an overlay in a sub directory to one of the
images in its backing chain, using both relative and absolute filenames.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/020 | 36 
 tests/qemu-iotests/020.out | 10 ++
 2 files changed, 46 insertions(+)

diff --git a/tests/qemu-iotests/020 b/tests/qemu-iotests/020
index 6b0ebb37d2..94633c3a32 100755
--- a/tests/qemu-iotests/020
+++ b/tests/qemu-iotests/020
@@ -31,6 +31,11 @@ _cleanup()
_cleanup_test_img
 rm -f "$TEST_IMG.base"
 rm -f "$TEST_IMG.orig"
+
+rm -f "$TEST_DIR/subdir/t.$IMGFMT.base"
+rm -f "$TEST_DIR/subdir/t.$IMGFMT.mid"
+rm -f "$TEST_DIR/subdir/t.$IMGFMT"
+rmdir "$TEST_DIR/subdir" &> /dev/null
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
 
@@ -133,6 +138,37 @@ $QEMU_IO -c 'writev 0 64k' "$TEST_IMG" | _filter_qemu_io
 $QEMU_IMG commit "$TEST_IMG"
 _cleanup
 
+
+echo
+echo 'Testing commit in sub-directory with relative filenames'
+echo
+
+pushd "$TEST_DIR" > /dev/null
+
+mkdir subdir
+
+TEST_IMG="subdir/t.$IMGFMT.base" _make_test_img 1M
+TEST_IMG="subdir/t.$IMGFMT.mid" _make_test_img -b "t.$IMGFMT.base"
+TEST_IMG="subdir/t.$IMGFMT" _make_test_img -b "t.$IMGFMT.mid"
+
+# Should work
+$QEMU_IMG commit -b "t.$IMGFMT.mid" "subdir/t.$IMGFMT"
+
+# Might theoretically work, but does not in practice (we have to
+# decide between this and the above; and since we always represent
+# backing file names as relative to the overlay, we go for the above)
+$QEMU_IMG commit -b "subdir/t.$IMGFMT.mid" "subdir/t.$IMGFMT" 2>&1 | \
+_filter_imgfmt
+
+# This should work as well
+$QEMU_IMG commit -b "$TEST_DIR/subdir/t.$IMGFMT.mid" "subdir/t.$IMGFMT"
+
+popd > /dev/null
+
+# Now let's try with just absolute filenames
+$QEMU_IMG commit -b "$TEST_DIR/subdir/t.$IMGFMT.mid" \
+"$TEST_DIR/subdir/t.$IMGFMT"
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/020.out b/tests/qemu-iotests/020.out
index 4b722b2dd0..228c37dded 100644
--- a/tests/qemu-iotests/020.out
+++ b/tests/qemu-iotests/020.out
@@ -1094,4 +1094,14 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 
backing_file=json:{'driv
 wrote 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 qemu-img: Block job failed: No space left on device
+
+Testing commit in sub-directory with relative filenames
+
+Formatting 'subdir/t.IMGFMT.base', fmt=IMGFMT size=1048576
+Formatting 'subdir/t.IMGFMT.mid', fmt=IMGFMT size=1048576 
backing_file=t.IMGFMT.base
+Formatting 'subdir/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=t.IMGFMT.mid
+Image committed.
+qemu-img: Did not find 'subdir/t.IMGFMT.mid' in the backing chain of 
'subdir/t.IMGFMT'
+Image committed.
+Image committed.
 *** done
-- 
2.21.0




[Qemu-block] [PATCH v5 37/42] block: Leave BDS.backing_file constant

2019-06-12 Thread Max Reitz
Parts of the block layer treat BDS.backing_file as if it were whatever
the image header says (i.e., if it is a relative path, it is relative to
the overlay), other parts treat it like a cache for
bs->backing->bs->filename (relative paths are relative to the CWD).
Considering bs->backing->bs->filename exists, let us make it mean the
former.

Among other things, this now allows the user to specify a base when
using qemu-img to commit an image file in a directory that is not the
CWD (assuming, everything uses relative filenames).

Before this patch:

$ ./qemu-img create -f qcow2 foo/bot.qcow2 1M
$ ./qemu-img create -f qcow2 -b bot.qcow2 foo/mid.qcow2
$ ./qemu-img create -f qcow2 -b mid.qcow2 foo/top.qcow2
$ ./qemu-img commit -b mid.qcow2 foo/top.qcow2
qemu-img: Did not find 'mid.qcow2' in the backing chain of 'foo/top.qcow2'
$ ./qemu-img commit -b foo/mid.qcow2 foo/top.qcow2
qemu-img: Did not find 'foo/mid.qcow2' in the backing chain of 'foo/top.qcow2'
$ ./qemu-img commit -b $PWD/foo/mid.qcow2 foo/top.qcow2
qemu-img: Did not find '[...]/foo/mid.qcow2' in the backing chain of 
'foo/top.qcow2'

After this patch:

$ ./qemu-img commit -b mid.qcow2 foo/top.qcow2
Image committed.
$ ./qemu-img commit -b foo/mid.qcow2 foo/top.qcow2
qemu-img: Did not find 'foo/mid.qcow2' in the backing chain of 'foo/top.qcow2'
$ ./qemu-img commit -b $PWD/foo/mid.qcow2 foo/top.qcow2
Image committed.

With this change, bdrv_find_backing_image() must look at whether the
user has overridden a BDS's backing file.  If so, it can no longer use
bs->backing_file, but must instead compare the given filename against
the backing node's filename directly.

Note that this changes the QAPI output for a node's backing_file.  We
had very inconsistent output there (sometimes what the image header
said, sometimes the actual filename of the backing image).  This
inconsistent output was effectively useless, so we have to decide one
way or the other.  Considering that bs->backing_file usually at runtime
contained the path to the image relative to qemu's CWD (or absolute),
this patch changes QAPI's backing_file to always report the
bs->backing->bs->filename from now on.  If you want to receive the image
header information, you have to refer to full-backing-filename.

This necessitates a change to iotest 228.  The interesting information
it really wanted is the image header, and it can get that now, but it
has to use full-backing-filename instead of backing_file.  Because of
this patch's changes to bs->backing_file's behavior, we also need some
reference output changes.

Along with the changes to bs->backing_file, stop updating
BDS.backing_format in bdrv_backing_attach() as well.  This necessitates
a change to the reference output of iotest 191.

iotest 245 changes in behavior: With the backing node no longer
overriding the parent node's backing_file string, you can now omit the
@backing option when reopening a node with neither a default nor a
current backing file even if it used to have a backing node at some
point.

Signed-off-by: Max Reitz 
---
 include/block/block_int.h  | 19 ++-
 block.c| 35 ---
 block/qapi.c   |  7 ---
 tests/qemu-iotests/191.out |  1 -
 tests/qemu-iotests/228 |  6 +++---
 tests/qemu-iotests/228.out |  6 +++---
 tests/qemu-iotests/245 |  4 +++-
 7 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 431fa38ea0..02b55cff91 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -777,11 +777,20 @@ struct BlockDriverState {
 bool walking_aio_notifiers; /* to make removal during iteration safe */
 
 char filename[PATH_MAX];
-char backing_file[PATH_MAX]; /* if non zero, the image is a diff of
-this file image */
-/* The backing filename indicated by the image header; if we ever
- * open this file, then this is replaced by the resulting BDS's
- * filename (i.e. after a bdrv_refresh_filename() run). */
+/*
+ * If not empty, this image is a diff in relation to backing_file.
+ * Note that this is the name given in the image header and
+ * therefore may or may not be equal to .backing->bs->filename.
+ * If this field contains a relative path, it is to be resolved
+ * relatively to the overlay's location.
+ */
+char backing_file[PATH_MAX];
+/*
+ * The backing filename indicated by the image header.  Contrary
+ * to backing_file, if we ever open this file, auto_backing_file
+ * is replaced by the resulting BDS's filename (i.e. after a
+ * bdrv_refresh_filename() run).
+ */
 char auto_backing_file[PATH_MAX];
 char backing_format[16]; /* if non-zero and backing_file exists */
 
diff --git a/block.c b/block.c
index e129869a7e..a962e346ab 100644
--- a/block.c
+++ b/block.c
@@ -78,6 +78,8 @@ static BlockDriverState *bdrv_open_inherit(const char 
*filena

[Qemu-block] [PATCH v5 35/42] block: Fix check_to_replace_node()

2019-06-12 Thread Max Reitz
Currently, check_to_replace_node() only allows mirror to replace a node
in the chain of the source node, and only if it is the first non-filter
node below the source.  Well, technically, the idea is that you can
exactly replace a quorum child by mirroring from quorum.

This has (probably) two reasons:
(1) We do not want to create loops.
(2) @replaces and @device should have exactly the same content so
replacing them does not cause visible data to change.

This has two issues:
(1) It is overly restrictive.  It is completely fine for @replaces to be
a filter.
(2) It is not restrictive enough.  You can create loops with this as
follows:

$ qemu-img create -f qcow2 /tmp/source.qcow2 64M
$ qemu-system-x86_64 -qmp stdio
{"execute": "qmp_capabilities"}
{"execute": "object-add",
 "arguments": {"qom-type": "throttle-group", "id": "tg0"}}
{"execute": "blockdev-add",
 "arguments": {
 "node-name": "source",
 "driver": "throttle",
 "throttle-group": "tg0",
 "file": {
 "node-name": "filtered",
 "driver": "qcow2",
 "file": {
 "driver": "file",
 "filename": "/tmp/source.qcow2"
 } } } }
{"execute": "drive-mirror",
 "arguments": {
 "job-id": "mirror",
 "device": "source",
 "target": "/tmp/target.qcow2",
 "format": "qcow2",
 "node-name": "target",
 "sync" :"none",
 "replaces": "filtered"
 } }
{"execute": "block-job-complete", "arguments": {"device": "mirror"}}

And qemu crashes because of a stack overflow due to the loop being
created (target's backing file is source, so when it replaces filtered,
it points to itself through source).

(blockdev-mirror can be broken similarly.)

So let us make the checks for the two conditions above explicit, which
makes the whole function exactly as restrictive as it needs to be.

Signed-off-by: Max Reitz 
---
 include/block/block.h |  1 +
 block.c   | 83 +++
 blockdev.c| 34 --
 3 files changed, 110 insertions(+), 8 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 7835c5b370..484c0af766 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -404,6 +404,7 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate);
 
 /* check if a named node can be replaced when doing drive-mirror */
 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
+BlockDriverState *backing_bs,
 const char *node_name, Error **errp);
 
 /* async block I/O */
diff --git a/block.c b/block.c
index 59d1d4b2b1..e129869a7e 100644
--- a/block.c
+++ b/block.c
@@ -6142,7 +6142,59 @@ bool bdrv_is_first_non_filter(BlockDriverState 
*candidate)
 return false;
 }
 
+static bool is_child_of(BlockDriverState *child, BlockDriverState *parent)
+{
+BdrvChild *c;
+
+if (!parent) {
+return false;
+}
+
+QLIST_FOREACH(c, &parent->children, next) {
+if (c->bs == child || is_child_of(child, c->bs)) {
+return true;
+}
+}
+
+return false;
+}
+
+/*
+ * Return true if there are only filters in [@top, @base).  Note that
+ * this may include quorum (which bdrv_chain_contains() cannot
+ * handle).
+ */
+static bool is_filtered_child(BlockDriverState *top, BlockDriverState *base)
+{
+BdrvChild *c;
+
+if (!top) {
+return false;
+}
+
+if (top == base) {
+return true;
+}
+
+if (!top->drv->is_filter) {
+return false;
+}
+
+QLIST_FOREACH(c, &top->children, next) {
+if (is_filtered_child(c->bs, base)) {
+return true;
+}
+}
+
+return false;
+}
+
+/*
+ * @parent_bs is mirror's source BDS, @backing_bs is the BDS which
+ * will be attached to the target when mirror completes.
+ */
 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
+BlockDriverState *backing_bs,
 const char *node_name, Error **errp)
 {
 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
@@ -6161,13 +6213,32 @@ BlockDriverState 
*check_to_replace_node(BlockDriverState *parent_bs,
 goto out;
 }
 
-/* We don't want arbitrary node of the BDS chain to be replaced only the 
top
- * most non filter in order to prevent data corruption.
- * Another benefit is that this tests exclude backing files which are
- * blocked by the backing blockers.
+/*
+ * If to_replace_bs is (recursively) a child of backing_bs,
+ * replacing it may create a loop.  We cannot allow that.
  */
-if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
-error_setg(errp, "Only top most non filter can be replaced");
+if (to_replace_bs == backing_bs || is_child_of(to_replace_bs, backing_bs)) 
{
+error_setg(errp, "Replacing this node would result in a loop");
+t

[Qemu-block] [PATCH v5 38/42] iotests: Let complete_and_wait() work with commit

2019-06-12 Thread Max Reitz
complete_and_wait() and wait_ready() currently only work for mirror
jobs.  Let them work for active commit jobs, too.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/iotests.py | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index dc77d3fba0..55066d62bb 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -697,8 +697,12 @@ class QMPTestCase(unittest.TestCase):
 
 def wait_ready(self, drive='drive0'):
 '''Wait until a block job BLOCK_JOB_READY event'''
-f = {'data': {'type': 'mirror', 'device': drive } }
-event = self.vm.event_wait(name='BLOCK_JOB_READY', match=f)
+event = self.vm.events_wait([
+('BLOCK_JOB_READY',
+ {'data': {'type': 'mirror', 'device': drive } }),
+('BLOCK_JOB_READY',
+ {'data': {'type': 'commit', 'device': drive } })
+])
 
 def wait_ready_and_cancel(self, drive='drive0'):
 self.wait_ready(drive=drive)
@@ -716,7 +720,7 @@ class QMPTestCase(unittest.TestCase):
 self.assert_qmp(result, 'return', {})
 
 event = self.wait_until_completed(drive=drive)
-self.assert_qmp(event, 'data/type', 'mirror')
+self.assertTrue(event['data']['type'] in ['mirror', 'commit'])
 
 def pause_wait(self, job_id='job0'):
 with Timeout(1, "Timeout waiting for job to pause"):
-- 
2.21.0




[Qemu-block] [PATCH v5 32/42] block: Make bdrv_get_cumulative_perm() public

2019-06-12 Thread Max Reitz
This is useful in other files like blockdev.c to determine for example
whether a node can be written to or not.

Signed-off-by: Max Reitz 
---
 include/block/block_int.h | 3 +++
 block.c   | 6 ++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index c0a05beec3..cfefb00104 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1181,6 +1181,9 @@ void bdrv_root_unref_child(BdrvChild *child);
 int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
 Error **errp);
 
+void bdrv_get_cumulative_perm(BlockDriverState *bs,
+  uint64_t *perm, uint64_t *shared_perm);
+
 /* Default implementation for BlockDriver.bdrv_child_perm() that can be used by
  * block filters: Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED and RESIZE to
  * all children */
diff --git a/block.c b/block.c
index 856d9b58be..59d1d4b2b1 100644
--- a/block.c
+++ b/block.c
@@ -1711,8 +1711,6 @@ static int bdrv_child_check_perm(BdrvChild *c, 
BlockReopenQueue *q,
  GSList *ignore_children, Error **errp);
 static void bdrv_child_abort_perm_update(BdrvChild *c);
 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);
-static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
- uint64_t *shared_perm);
 
 typedef struct BlockReopenQueueEntry {
  bool prepared;
@@ -1904,8 +1902,8 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t 
cumulative_perms,
 }
 }
 
-static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
- uint64_t *shared_perm)
+void bdrv_get_cumulative_perm(BlockDriverState *bs,
+  uint64_t *perm, uint64_t *shared_perm)
 {
 BdrvChild *c;
 uint64_t cumulative_perms = 0;
-- 
2.21.0




[Qemu-block] [PATCH v5 33/42] blockdev: Fix active commit choice

2019-06-12 Thread Max Reitz
We have to perform an active commit whenever the top node has a parent
that has taken the WRITE permission on it.

Signed-off-by: Max Reitz 
---
 blockdev.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index a464cabf9e..5370f3b738 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3294,6 +3294,7 @@ void qmp_block_commit(bool has_job_id, const char 
*job_id, const char *device,
  */
 BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT;
 int job_flags = JOB_DEFAULT;
+uint64_t top_perm, top_shared;
 
 if (!has_speed) {
 speed = 0;
@@ -3406,14 +3407,31 @@ void qmp_block_commit(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 
-if (top_bs == bs) {
+/*
+ * Active commit is required if and only if someone has taken a
+ * WRITE permission on the top node.  Historically, we have always
+ * used active commit for top nodes, so continue that practice.
+ * (Active commit is never really wrong.)
+ */
+bdrv_get_cumulative_perm(top_bs, &top_perm, &top_shared);
+if (top_perm & BLK_PERM_WRITE ||
+bdrv_skip_rw_filters(top_bs) == bdrv_skip_rw_filters(bs))
+{
 if (has_backing_file) {
 error_setg(errp, "'backing-file' specified,"
  " but 'top' is the active layer");
 goto out;
 }
-commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
-job_flags, speed, on_error,
+if (!has_job_id) {
+/*
+ * Emulate here what block_job_create() does, because it
+ * is possible that @bs != @top_bs (the block job should
+ * be named after @bs, even if @top_bs is the actual
+ * source)
+ */
+job_id = bdrv_get_device_name(bs);
+}
+commit_active_start(job_id, top_bs, base_bs, job_flags, speed, 
on_error,
 filter_node_name, NULL, NULL, false, &local_err);
 } else {
 BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
-- 
2.21.0




[Qemu-block] [PATCH v5 25/42] mirror: Deal with filters

2019-06-12 Thread Max Reitz
This includes some permission limiting (for example, we only need to
take the RESIZE permission for active commits where the base is smaller
than the top).

Signed-off-by: Max Reitz 
---
 block/mirror.c | 110 +
 blockdev.c |  47 +
 2 files changed, 124 insertions(+), 33 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 4fa8f57c80..3d767e3030 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -660,8 +660,10 @@ static int mirror_exit_common(Job *job)
 &error_abort);
 if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
 BlockDriverState *backing = s->is_none_mode ? src : s->base;
-if (backing_bs(target_bs) != backing) {
-bdrv_set_backing_hd(target_bs, backing, &local_err);
+BlockDriverState *unfiltered_target = bdrv_skip_rw_filters(target_bs);
+
+if (bdrv_filtered_cow_bs(unfiltered_target) != backing) {
+bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
 if (local_err) {
 error_report_err(local_err);
 ret = -EPERM;
@@ -711,7 +713,7 @@ static int mirror_exit_common(Job *job)
 block_job_remove_all_bdrv(bjob);
 bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
 &error_abort);
-bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
+bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
 
 /* We just changed the BDS the job BB refers to (with either or both of the
  * bdrv_replace_node() calls), so switch the BB back so the cleanup does
@@ -757,6 +759,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 {
 int64_t offset;
 BlockDriverState *base = s->base;
+BlockDriverState *filtered_base;
 BlockDriverState *bs = s->mirror_top_bs->backing->bs;
 BlockDriverState *target_bs = blk_bs(s->target);
 int ret;
@@ -795,6 +798,9 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 s->initial_zeroing_ongoing = false;
 }
 
+/* Will be NULL if @base is not in @bs's chain */
+filtered_base = bdrv_filtered_cow_bs(bdrv_find_overlay(bs, base));
+
 /* First part, loop on the sectors and initialize the dirty bitmap.  */
 for (offset = 0; offset < s->bdev_length; ) {
 /* Just to make sure we are not exceeding int limit. */
@@ -807,7 +813,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 return 0;
 }
 
-ret = bdrv_is_allocated_above(bs, base, offset, bytes, &count);
+ret = bdrv_is_allocated_above(bs, filtered_base, offset, bytes, 
&count);
 if (ret < 0) {
 return ret;
 }
@@ -903,7 +909,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 } else {
 s->target_cluster_size = BDRV_SECTOR_SIZE;
 }
-if (backing_filename[0] && !target_bs->backing &&
+if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) &&
 s->granularity < s->target_cluster_size) {
 s->buf_size = MAX(s->buf_size, s->target_cluster_size);
 s->cow_bitmap = bitmap_new(length);
@@ -1083,8 +1089,9 @@ static void mirror_complete(Job *job, Error **errp)
 if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
 int ret;
 
-assert(!target->backing);
-ret = bdrv_open_backing_file(target, NULL, "backing", errp);
+assert(!bdrv_backing_chain_next(target));
+ret = bdrv_open_backing_file(bdrv_skip_rw_filters(target), NULL,
+ "backing", errp);
 if (ret < 0) {
 return;
 }
@@ -1503,8 +1510,8 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
 MirrorBlockJob *s;
 MirrorBDSOpaque *bs_opaque;
 BlockDriverState *mirror_top_bs;
-bool target_graph_mod;
 bool target_is_backing;
+uint64_t target_perms, target_shared_perms;
 Error *local_err = NULL;
 int ret;
 
@@ -1523,7 +1530,7 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
 buf_size = DEFAULT_MIRROR_BUF_SIZE;
 }
 
-if (bs == target) {
+if (bdrv_skip_rw_filters(bs) == bdrv_skip_rw_filters(target)) {
 error_setg(errp, "Can't mirror node into itself");
 return;
 }
@@ -1583,15 +1590,42 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
  * In the case of active commit, things look a bit different, though,
  * because the target is an already populated backing file in active use.
  * We can allow anything except resize there.*/
+
+target_perms = BLK_PERM_WRITE;
+target_shared_perms = BLK_PERM_WRITE_UNCHANGED;
+
 target_is_backing = bdrv_chain_contains(bs, target);
-target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
+if (target_is_backing) {
+int64_t bs_size, target_size;
+   

[Qemu-block] [PATCH v5 24/42] block: Use child access functions for QAPI queries

2019-06-12 Thread Max Reitz
query-block and query-named-block-nodes now return any filtered child
under "backing", not just bs->backing or COW children.  This is so that
filters do not interrupt the reported backing chain.  This changes the
output for iotest 184, as the throttled node now appears as a backing
child.

Signed-off-by: Max Reitz 
---
 block/qapi.c   | 35 ---
 tests/qemu-iotests/184.out |  7 ++-
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/block/qapi.c b/block/qapi.c
index 0c13c86f4e..1fd2937abc 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -150,9 +150,13 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
 return NULL;
 }
 
-if (bs0->drv && bs0->backing) {
+if (bs0->drv && bdrv_filtered_child(bs0)) {
+/*
+ * Put any filtered child here (for backwards compatibility to when
+ * we put bs0->backing here, which might be any filtered child).
+ */
 info->backing_file_depth++;
-bs0 = bs0->backing->bs;
+bs0 = bdrv_filtered_bs(bs0);
 (*p_image_info)->has_backing_image = true;
 p_image_info = &((*p_image_info)->backing_image);
 } else {
@@ -161,9 +165,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
 
 /* Skip automatically inserted nodes that the user isn't aware of for
  * query-block (blk != NULL), but not for query-named-block-nodes */
-while (blk && bs0->drv && bs0->implicit) {
-bs0 = backing_bs(bs0);
-assert(bs0);
+if (blk) {
+bs0 = bdrv_skip_implicit_filters(bs0);
 }
 }
 
@@ -348,9 +351,9 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo 
**p_info,
 BlockDriverState *bs = blk_bs(blk);
 char *qdev;
 
-/* Skip automatically inserted nodes that the user isn't aware of */
-while (bs && bs->drv && bs->implicit) {
-bs = backing_bs(bs);
+if (bs) {
+/* Skip automatically inserted nodes that the user isn't aware of */
+bs = bdrv_skip_implicit_filters(bs);
 }
 
 info->device = g_strdup(blk_name(blk));
@@ -507,6 +510,7 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, 
BlockBackend *blk)
 static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs,
 bool blk_level)
 {
+BlockDriverState *storage_bs, *cow_bs;
 BlockStats *s = NULL;
 
 s = g_malloc0(sizeof(*s));
@@ -519,9 +523,8 @@ static BlockStats *bdrv_query_bds_stats(BlockDriverState 
*bs,
 /* Skip automatically inserted nodes that the user isn't aware of in
  * a BlockBackend-level command. Stay at the exact node for a node-level
  * command. */
-while (blk_level && bs->drv && bs->implicit) {
-bs = backing_bs(bs);
-assert(bs);
+if (blk_level) {
+bs = bdrv_skip_implicit_filters(bs);
 }
 
 if (bdrv_get_node_name(bs)[0]) {
@@ -531,14 +534,16 @@ static BlockStats *bdrv_query_bds_stats(BlockDriverState 
*bs,
 
 s->stats->wr_highest_offset = stat64_get(&bs->wr_highest_offset);
 
-if (bs->file) {
+storage_bs = bdrv_storage_bs(bs);
+if (storage_bs) {
 s->has_parent = true;
-s->parent = bdrv_query_bds_stats(bs->file->bs, blk_level);
+s->parent = bdrv_query_bds_stats(storage_bs, blk_level);
 }
 
-if (blk_level && bs->backing) {
+cow_bs = bdrv_filtered_cow_bs(bs);
+if (blk_level && cow_bs) {
 s->has_backing = true;
-s->backing = bdrv_query_bds_stats(bs->backing->bs, blk_level);
+s->backing = bdrv_query_bds_stats(cow_bs, blk_level);
 }
 
 return s;
diff --git a/tests/qemu-iotests/184.out b/tests/qemu-iotests/184.out
index 3deb3cfb94..1d61f7e224 100644
--- a/tests/qemu-iotests/184.out
+++ b/tests/qemu-iotests/184.out
@@ -27,6 +27,11 @@ Testing:
 "iops_rd": 0,
 "detect_zeroes": "off",
 "image": {
+"backing-image": {
+"virtual-size": 1073741824,
+"filename": "null-co://",
+"format": "null-co"
+},
 "virtual-size": 1073741824,
 "filename": "json:{\"throttle-group\": \"group0\", \"driver\": 
\"throttle\", \"file\": {\"driver\": \"null-co\"}}",
 "format": "throttle"
@@ -34,7 +39,7 @@ Testing:
 "iops_wr": 0,
 "ro": false,
 "node-name": "throttle0",
-"backing_file_depth": 0,
+"backing_file_depth": 1,
 "drv": "throttle",
 "iops": 0,
 "bps_wr": 0,
-- 
2.21.0




[Qemu-block] [PATCH v5 26/42] backup: Deal with filters

2019-06-12 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 block/backup.c |  9 +
 blockdev.c | 19 +++
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 715e1d3be8..88435f883d 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -502,6 +502,7 @@ static int64_t 
backup_calculate_cluster_size(BlockDriverState *target,
 {
 int ret;
 BlockDriverInfo bdi;
+bool target_does_cow = bdrv_backing_chain_next(target);
 
 /*
  * If there is no backing file on the target, we cannot rely on COW if our
@@ -509,7 +510,7 @@ static int64_t 
backup_calculate_cluster_size(BlockDriverState *target,
  * targets with a backing file, try to avoid COW if possible.
  */
 ret = bdrv_get_info(target, &bdi);
-if (ret == -ENOTSUP && !target->backing) {
+if (ret == -ENOTSUP && !target_does_cow) {
 /* Cluster size is not defined */
 warn_report("The target block device doesn't provide "
 "information about the block size and it doesn't have a "
@@ -518,14 +519,14 @@ static int64_t 
backup_calculate_cluster_size(BlockDriverState *target,
 "this default, the backup may be unusable",
 BACKUP_CLUSTER_SIZE_DEFAULT);
 return BACKUP_CLUSTER_SIZE_DEFAULT;
-} else if (ret < 0 && !target->backing) {
+} else if (ret < 0 && !target_does_cow) {
 error_setg_errno(errp, -ret,
 "Couldn't determine the cluster size of the target image, "
 "which has no backing file");
 error_append_hint(errp,
 "Aborting, since this may create an unusable destination image\n");
 return ret;
-} else if (ret < 0 && target->backing) {
+} else if (ret < 0 && target_does_cow) {
 /* Not fatal; just trudge on ahead. */
 return BACKUP_CLUSTER_SIZE_DEFAULT;
 }
@@ -569,7 +570,7 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 return NULL;
 }
 
-if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
+if (compress && !bdrv_supports_compressed_writes(target)) {
 error_setg(errp, "Compression is not supported for this drive %s",
bdrv_get_device_name(target));
 return NULL;
diff --git a/blockdev.c b/blockdev.c
index 68e8d33447..605e7b0994 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3500,7 +3500,13 @@ static BlockJob *do_drive_backup(DriveBackup *backup, 
JobTxn *txn,
 /* See if we have a backing HD we can use to create our new image
  * on top of. */
 if (backup->sync == MIRROR_SYNC_MODE_TOP) {
-source = backing_bs(bs);
+/*
+ * Backup will not replace the source by the target, so none
+ * of the filters skipped here will be removed (in contrast to
+ * mirror).  Therefore, we can skip all of them when looking
+ * for the first COW relationship.
+ */
+source = bdrv_filtered_cow_bs(bdrv_skip_rw_filters(bs));
 if (!source) {
 backup->sync = MIRROR_SYNC_MODE_FULL;
 }
@@ -3520,9 +3526,14 @@ static BlockJob *do_drive_backup(DriveBackup *backup, 
JobTxn *txn,
 if (backup->mode != NEW_IMAGE_MODE_EXISTING) {
 assert(backup->format);
 if (source) {
-bdrv_refresh_filename(source);
-bdrv_img_create(backup->target, backup->format, source->filename,
-source->drv->format_name, NULL,
+/* Implicit filters should not appear in the filename */
+BlockDriverState *explicit_backing =
+bdrv_skip_implicit_filters(source);
+
+bdrv_refresh_filename(explicit_backing);
+bdrv_img_create(backup->target, backup->format,
+explicit_backing->filename,
+explicit_backing->drv->format_name, NULL,
 size, flags, false, &local_err);
 } else {
 bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL,
-- 
2.21.0




[Qemu-block] [PATCH v5 29/42] nbd: Use CAF when looking for dirty bitmap

2019-06-12 Thread Max Reitz
When looking for a dirty bitmap to share, we should handle filters by
just including them in the search (so they do not break backing chains).

Signed-off-by: Max Reitz 
---
 nbd/server.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index aeca3893fe..0d51d46b81 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1508,13 +1508,13 @@ NBDExport *nbd_export_new(BlockDriverState *bs, 
uint64_t dev_offset,
 if (bitmap) {
 BdrvDirtyBitmap *bm = NULL;
 
-while (true) {
+while (bs) {
 bm = bdrv_find_dirty_bitmap(bs, bitmap);
-if (bm != NULL || bs->backing == NULL) {
+if (bm != NULL) {
 break;
 }
 
-bs = bs->backing->bs;
+bs = bdrv_filtered_bs(bs);
 }
 
 if (bm == NULL) {
-- 
2.21.0




[Qemu-block] [PATCH v5 30/42] qemu-img: Use child access functions

2019-06-12 Thread Max Reitz
This changes iotest 204's output, because blkdebug on top of a COW node
used to make qemu-img map disregard the rest of the backing chain (the
backing chain was broken by the filter).  With this patch, the
allocation in the base image is reported correctly.

Signed-off-by: Max Reitz 
---
 qemu-img.c | 36 
 tests/qemu-iotests/204.out |  1 +
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 07b6e2a808..7bfa6e5d40 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -992,7 +992,7 @@ static int img_commit(int argc, char **argv)
 if (!blk) {
 return 1;
 }
-bs = blk_bs(blk);
+bs = bdrv_skip_implicit_filters(blk_bs(blk));
 
 qemu_progress_init(progress, 1.f);
 qemu_progress_print(0.f, 100);
@@ -1009,7 +1009,7 @@ static int img_commit(int argc, char **argv)
 /* This is different from QMP, which by default uses the deepest file 
in
  * the backing chain (i.e., the very base); however, the traditional
  * behavior of qemu-img commit is using the immediate backing file. */
-base_bs = backing_bs(bs);
+base_bs = bdrv_filtered_cow_bs(bs);
 if (!base_bs) {
 error_setg(&local_err, "Image does not have a backing file");
 goto done;
@@ -1626,19 +1626,18 @@ static int convert_iteration_sectors(ImgConvertState 
*s, int64_t sector_num)
 
 if (s->sector_next_status <= sector_num) {
 int64_t count = n * BDRV_SECTOR_SIZE;
+BlockDriverState *src_bs = blk_bs(s->src[src_cur]);
+BlockDriverState *base;
 
 if (s->target_has_backing) {
-
-ret = bdrv_block_status(blk_bs(s->src[src_cur]),
-(sector_num - src_cur_offset) *
-BDRV_SECTOR_SIZE,
-count, &count, NULL, NULL);
+base = bdrv_backing_chain_next(src_bs);
 } else {
-ret = bdrv_block_status_above(blk_bs(s->src[src_cur]), NULL,
-  (sector_num - src_cur_offset) *
-  BDRV_SECTOR_SIZE,
-  count, &count, NULL, NULL);
+base = NULL;
 }
+ret = bdrv_block_status_above(src_bs, base,
+  (sector_num - src_cur_offset) *
+  BDRV_SECTOR_SIZE,
+  count, &count, NULL, NULL);
 if (ret < 0) {
 error_report("error while reading block status of sector %" PRId64
  ": %s", sector_num, strerror(-ret));
@@ -2439,7 +2438,8 @@ static int img_convert(int argc, char **argv)
  * s.target_backing_sectors has to be negative, which it will
  * be automatically).  The backing file length is used only
  * for optimizations, so such a case is not fatal. */
-s.target_backing_sectors = bdrv_nb_sectors(out_bs->backing->bs);
+s.target_backing_sectors =
+bdrv_nb_sectors(bdrv_filtered_cow_bs(out_bs));
 } else {
 s.target_backing_sectors = -1;
 }
@@ -2802,6 +2802,7 @@ static int get_block_status(BlockDriverState *bs, int64_t 
offset,
 
 depth = 0;
 for (;;) {
+bs = bdrv_skip_rw_filters(bs);
 ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
 if (ret < 0) {
 return ret;
@@ -2810,7 +2811,7 @@ static int get_block_status(BlockDriverState *bs, int64_t 
offset,
 if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
 break;
 }
-bs = backing_bs(bs);
+bs = bdrv_filtered_cow_bs(bs);
 if (bs == NULL) {
 ret = 0;
 break;
@@ -2949,7 +2950,7 @@ static int img_map(int argc, char **argv)
 if (!blk) {
 return 1;
 }
-bs = blk_bs(blk);
+bs = bdrv_skip_implicit_filters(blk_bs(blk));
 
 if (output_format == OFORMAT_HUMAN) {
 printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
@@ -3165,6 +3166,7 @@ static int img_rebase(int argc, char **argv)
 uint8_t *buf_old = NULL;
 uint8_t *buf_new = NULL;
 BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
+BlockDriverState *unfiltered_bs;
 char *filename;
 const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
 int c, flags, src_flags, ret;
@@ -3299,6 +3301,8 @@ static int img_rebase(int argc, char **argv)
 }
 bs = blk_bs(blk);
 
+unfiltered_bs = bdrv_skip_rw_filters(bs);
+
 if (out_basefmt != NULL) {
 if (bdrv_find_format(out_basefmt) == NULL) {
 error_report("Invalid format name: '%s'", out_basefmt);
@@ -3310,7 +3314,7 @@ static int img_rebase(int argc, char **argv)
 /* For safe rebasing we need to compare old and new backing file */
 if (!unsafe) {
 QDict *options = NULL;
-BlockDriverState *b

[Qemu-block] [PATCH v5 21/42] block: Use CAFs for debug breakpoints

2019-06-12 Thread Max Reitz
When looking for a blkdebug node (which implements debug breakpoints),
use bdrv_primary_bs() to iterate through the graph, because that is
where a blkdebug node would be.

Signed-off-by: Max Reitz 
---
 block.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/block.c b/block.c
index 797bec0326..11b7ba8cf6 100644
--- a/block.c
+++ b/block.c
@@ -5097,7 +5097,7 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const 
char *event,
   const char *tag)
 {
 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
-bs = bs->file ? bs->file->bs : NULL;
+bs = bdrv_primary_bs(bs);
 }
 
 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
@@ -5110,7 +5110,7 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const 
char *event,
 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
 {
 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
-bs = bs->file ? bs->file->bs : NULL;
+bs = bdrv_primary_bs(bs);
 }
 
 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
@@ -5123,7 +5123,7 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, 
const char *tag)
 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
 {
 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
-bs = bs->file ? bs->file->bs : NULL;
+bs = bdrv_primary_bs(bs);
 }
 
 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
@@ -5136,7 +5136,7 @@ int bdrv_debug_resume(BlockDriverState *bs, const char 
*tag)
 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
 {
 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
-bs = bs->file ? bs->file->bs : NULL;
+bs = bdrv_primary_bs(bs);
 }
 
 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
-- 
2.21.0




[Qemu-block] [PATCH v5 27/42] commit: Deal with filters

2019-06-12 Thread Max Reitz
This includes some permission limiting (for example, we only need to
take the RESIZE permission if the base is smaller than the top).

Signed-off-by: Max Reitz 
---
 block/block-backend.c | 16 ---
 block/commit.c| 97 ---
 blockdev.c|  6 ++-
 3 files changed, 87 insertions(+), 32 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index f5d9407d20..227a6951a0 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2156,11 +2156,17 @@ int blk_commit_all(void)
 AioContext *aio_context = blk_get_aio_context(blk);
 
 aio_context_acquire(aio_context);
-if (blk_is_inserted(blk) && blk->root->bs->backing) {
-int ret = bdrv_commit(blk->root->bs);
-if (ret < 0) {
-aio_context_release(aio_context);
-return ret;
+if (blk_is_inserted(blk)) {
+BlockDriverState *non_filter;
+
+/* Legacy function, so skip implicit filters */
+non_filter = bdrv_skip_implicit_filters(blk->root->bs);
+if (bdrv_filtered_cow_child(non_filter)) {
+int ret = bdrv_commit(non_filter);
+if (ret < 0) {
+aio_context_release(aio_context);
+return ret;
+}
 }
 }
 aio_context_release(aio_context);
diff --git a/block/commit.c b/block/commit.c
index f20a26fecd..ec5a8c8edf 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -112,7 +112,7 @@ static void commit_abort(Job *job)
  * something to base, the intermediate images aren't valid any more. */
 bdrv_child_try_set_perm(s->commit_top_bs->backing, 0, BLK_PERM_ALL,
 &error_abort);
-bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs),
+bdrv_replace_node(s->commit_top_bs, s->commit_top_bs->backing->bs,
   &error_abort);
 
 bdrv_unref(s->commit_top_bs);
@@ -137,6 +137,7 @@ static void commit_clean(Job *job)
 static int coroutine_fn commit_run(Job *job, Error **errp)
 {
 CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
+BlockDriverState *filtered_base;
 int64_t offset;
 uint64_t delay_ns = 0;
 int ret = 0;
@@ -163,6 +164,9 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 }
 }
 
+filtered_base = bdrv_filtered_cow_bs(bdrv_find_overlay(blk_bs(s->top),
+   blk_bs(s->base)));
+
 buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
 
 for (offset = 0; offset < len; offset += n) {
@@ -176,7 +180,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 break;
 }
 /* Copy if allocated above the base */
-ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
+ret = bdrv_is_allocated_above(blk_bs(s->top), filtered_base,
   offset, COMMIT_BUFFER_SIZE, &n);
 copy = (ret == 1);
 trace_commit_one_iteration(s, offset, n, ret);
@@ -269,15 +273,35 @@ void commit_start(const char *job_id, BlockDriverState 
*bs,
 CommitBlockJob *s;
 BlockDriverState *iter;
 BlockDriverState *commit_top_bs = NULL;
+BlockDriverState *filtered_base;
 Error *local_err = NULL;
+int64_t base_size, top_size;
+uint64_t perms, iter_shared_perms;
 int ret;
 
 assert(top != bs);
-if (top == base) {
+if (bdrv_skip_rw_filters(top) == bdrv_skip_rw_filters(base)) {
 error_setg(errp, "Invalid files for merge: top and base are the same");
 return;
 }
 
+base_size = bdrv_getlength(base);
+if (base_size < 0) {
+error_setg_errno(errp, -base_size, "Could not inquire base image 
size");
+return;
+}
+
+top_size = bdrv_getlength(top);
+if (top_size < 0) {
+error_setg_errno(errp, -top_size, "Could not inquire top image size");
+return;
+}
+
+perms = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+if (base_size < top_size) {
+perms |= BLK_PERM_RESIZE;
+}
+
 s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL,
  speed, creation_flags, NULL, NULL, errp);
 if (!s) {
@@ -313,17 +337,43 @@ void commit_start(const char *job_id, BlockDriverState 
*bs,
 
 s->commit_top_bs = commit_top_bs;
 
-/* Block all nodes between top and base, because they will
- * disappear from the chain after this operation. */
+/*
+ * Block all nodes between top and base, because they will
+ * disappear from the chain after this operation.
+ * Note that this assumes that the user is fine with removing all
+ * nodes (including R/W filters) between top and base.  Assuring
+ * this is the responsibility of the interface (i.e. whoever calls
+ * commit_start()).
+ */
 assert(bdrv_chain_contains(top, base));
-for (it

[Qemu-block] [PATCH v5 31/42] block: Drop backing_bs()

2019-06-12 Thread Max Reitz
We want to make it explicit where bs->backing is used, and we have done
so.  The old role of backing_bs() is now effectively taken by
bdrv_filtered_cow_bs().

Signed-off-by: Max Reitz 
---
 include/block/block_int.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 875a33f255..c0a05beec3 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -925,11 +925,6 @@ typedef enum BlockMirrorBackingMode {
 MIRROR_LEAVE_BACKING_CHAIN,
 } BlockMirrorBackingMode;
 
-static inline BlockDriverState *backing_bs(BlockDriverState *bs)
-{
-return bs->backing ? bs->backing->bs : NULL;
-}
-
 
 /* Essential block drivers which must always be statically linked into qemu, 
and
  * which therefore can be accessed without using bdrv_find_format() */
-- 
2.21.0




[Qemu-block] [PATCH v5 16/42] block: Use child access functions when flushing

2019-06-12 Thread Max Reitz
If the driver does not support .bdrv_co_flush() so bdrv_co_flush()
itself has to flush the children of the given node, it should not flush
just bs->file->bs, but in fact both the child that stores data, and the
one that stores metadata (if they are separate).

In any case, the BLKDBG_EVENT() should be emitted on the primary child,
because that is where a blkdebug node would be if there is any.

Signed-off-by: Max Reitz 
---
 block/io.c | 21 ++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/block/io.c b/block/io.c
index 53aabf86b5..64408cf19a 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2533,6 +2533,8 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque)
 
 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 {
+BdrvChild *primary_child = bdrv_primary_child(bs);
+BlockDriverState *storage_bs, *metadata_bs;
 int current_gen;
 int ret = 0;
 
@@ -2562,7 +2564,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 }
 
 /* Write back cached data to the OS even with cache=unsafe */
-BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
+BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
 if (bs->drv->bdrv_co_flush_to_os) {
 ret = bs->drv->bdrv_co_flush_to_os(bs);
 if (ret < 0) {
@@ -2580,7 +2582,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 goto flush_parent;
 }
 
-BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
+BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
 if (!bs->drv) {
 /* bs->drv->bdrv_co_flush() might have ejected the BDS
  * (even in case of apparent success) */
@@ -2625,7 +2627,20 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
  * in the case of cache=unsafe, so there are no useless flushes.
  */
 flush_parent:
-ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
+storage_bs = bdrv_storage_bs(bs);
+metadata_bs = bdrv_metadata_bs(bs);
+
+ret = 0;
+if (storage_bs) {
+ret = bdrv_co_flush(storage_bs);
+}
+if (metadata_bs && metadata_bs != storage_bs) {
+int ret_metadata = bdrv_co_flush(metadata_bs);
+if (!ret) {
+ret = ret_metadata;
+}
+}
+
 out:
 /* Notify any pending flushes that we have completed */
 if (ret == 0) {
-- 
2.21.0




[Qemu-block] [PATCH v5 19/42] block: Use CAF in bdrv_co_rw_vmstate()

2019-06-12 Thread Max Reitz
If a node whose driver does not provide VM state functions has a
metadata child, the VM state should probably go there; if it is a
filter, the VM state should probably go there.  It follows that we
should generally go down to the primary child.

Signed-off-by: Max Reitz 
---
 block/io.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/io.c b/block/io.c
index 659ea0c52a..14f99e1c00 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2395,6 +2395,7 @@ bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector 
*qiov, int64_t pos,
bool is_read)
 {
 BlockDriver *drv = bs->drv;
+BlockDriverState *child_bs = bdrv_primary_bs(bs);
 int ret = -ENOTSUP;
 
 bdrv_inc_in_flight(bs);
@@ -2407,8 +2408,8 @@ bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector 
*qiov, int64_t pos,
 } else {
 ret = drv->bdrv_save_vmstate(bs, qiov, pos);
 }
-} else if (bs->file) {
-ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
+} else if (child_bs) {
+ret = bdrv_co_rw_vmstate(child_bs, qiov, pos, is_read);
 }
 
 bdrv_dec_in_flight(bs);
-- 
2.21.0




[Qemu-block] [PATCH v5 14/42] block: Use CAFs when working with backing chains

2019-06-12 Thread Max Reitz
Use child access functions when iterating through backing chains so
filters do not break the chain.

Signed-off-by: Max Reitz 
---
 block.c | 40 
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/block.c b/block.c
index 11f37983d9..505b3e9a01 100644
--- a/block.c
+++ b/block.c
@@ -4261,7 +4261,8 @@ int bdrv_change_backing_file(BlockDriverState *bs,
 }
 
 /*
- * Finds the image layer in the chain that has 'bs' as its backing file.
+ * Finds the image layer in the chain that has 'bs' (or a filter on
+ * top of it) as its backing file.
  *
  * active is the current topmost image.
  *
@@ -4273,11 +4274,18 @@ int bdrv_change_backing_file(BlockDriverState *bs,
 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
 BlockDriverState *bs)
 {
-while (active && bs != backing_bs(active)) {
-active = backing_bs(active);
+bs = bdrv_skip_rw_filters(bs);
+active = bdrv_skip_rw_filters(active);
+
+while (active) {
+BlockDriverState *next = bdrv_backing_chain_next(active);
+if (bs == next) {
+return active;
+}
+active = next;
 }
 
-return active;
+return NULL;
 }
 
 /* Given a BDS, searches for the base layer. */
@@ -4421,9 +4429,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, 
BlockDriverState *base,
  * other intermediate nodes have been dropped.
  * If 'top' is an implicit node (e.g. "commit_top") we should skip
  * it because no one inherits from it. We use explicit_top for that. */
-while (explicit_top && explicit_top->implicit) {
-explicit_top = backing_bs(explicit_top);
-}
+explicit_top = bdrv_skip_implicit_filters(explicit_top);
 update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top);
 
 /* success - we can delete the intermediate states, and link top->base */
@@ -4902,7 +4908,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
 {
 while (top && top != base) {
-top = backing_bs(top);
+top = bdrv_filtered_bs(top);
 }
 
 return top != NULL;
@@ -5141,7 +5147,17 @@ BlockDriverState 
*bdrv_find_backing_image(BlockDriverState *bs,
 
 is_protocol = path_has_protocol(backing_file);
 
-for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
+/*
+ * Being largely a legacy function, skip any filters here
+ * (because filters do not have normal filenames, so they cannot
+ * match anyway; and allowing json:{} filenames is a bit out of
+ * scope).
+ */
+for (curr_bs = bdrv_skip_rw_filters(bs);
+ bdrv_filtered_cow_child(curr_bs) != NULL;
+ curr_bs = bdrv_backing_chain_next(curr_bs))
+{
+BlockDriverState *bs_below = bdrv_backing_chain_next(curr_bs);
 
 /* If either of the filename paths is actually a protocol, then
  * compare unmodified paths; otherwise make paths relative */
@@ -5149,7 +5165,7 @@ BlockDriverState 
*bdrv_find_backing_image(BlockDriverState *bs,
 char *backing_file_full_ret;
 
 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
-retval = curr_bs->backing->bs;
+retval = bs_below;
 break;
 }
 /* Also check against the full backing filename for the image */
@@ -5159,7 +5175,7 @@ BlockDriverState 
*bdrv_find_backing_image(BlockDriverState *bs,
 bool equal = strcmp(backing_file, backing_file_full_ret) == 0;
 g_free(backing_file_full_ret);
 if (equal) {
-retval = curr_bs->backing->bs;
+retval = bs_below;
 break;
 }
 }
@@ -5185,7 +5201,7 @@ BlockDriverState 
*bdrv_find_backing_image(BlockDriverState *bs,
 g_free(filename_tmp);
 
 if (strcmp(backing_file_full, filename_full) == 0) {
-retval = curr_bs->backing->bs;
+retval = bs_below;
 break;
 }
 }
-- 
2.21.0




[Qemu-block] [PATCH v5 23/42] blockdev: Use CAF in external_snapshot_prepare()

2019-06-12 Thread Max Reitz
This allows us to differentiate between filters and nodes with COW
backing files: Filters cannot be used as overlays at all (for this
function).

Signed-off-by: Max Reitz 
---
 blockdev.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index b5c0fd3c49..0f0cf0d9ae 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1665,7 +1665,12 @@ static void external_snapshot_prepare(BlkActionState 
*common,
 goto out;
 }
 
-if (state->new_bs->backing != NULL) {
+if (state->new_bs->drv->is_filter) {
+error_setg(errp, "Filters cannot be used as overlays");
+goto out;
+}
+
+if (bdrv_filtered_cow_child(state->new_bs)) {
 error_setg(errp, "The overlay already has a backing image");
 goto out;
 }
-- 
2.21.0




[Qemu-block] [PATCH v5 20/42] block/snapshot: Fall back to storage child

2019-06-12 Thread Max Reitz
If the top node's driver does not provide snapshot functionality and we
want to go down the chain, we should go towards the child which stores
the data, i.e. the storage child.

bdrv_snapshot_goto() becomes a bit weird because we may have to redirect
the actual child pointer, so it only works if the storage child is
bs->file or bs->backing (and then we have to find out which it is).

Signed-off-by: Max Reitz 
---
 block/snapshot.c | 74 ++--
 1 file changed, 53 insertions(+), 21 deletions(-)

diff --git a/block/snapshot.c b/block/snapshot.c
index f2f48f926a..58cd667f3a 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -154,8 +154,9 @@ int bdrv_can_snapshot(BlockDriverState *bs)
 }
 
 if (!drv->bdrv_snapshot_create) {
-if (bs->file != NULL) {
-return bdrv_can_snapshot(bs->file->bs);
+BlockDriverState *storage_bs = bdrv_storage_bs(bs);
+if (storage_bs) {
+return bdrv_can_snapshot(storage_bs);
 }
 return 0;
 }
@@ -167,14 +168,15 @@ int bdrv_snapshot_create(BlockDriverState *bs,
  QEMUSnapshotInfo *sn_info)
 {
 BlockDriver *drv = bs->drv;
+BlockDriverState *storage_bs = bdrv_storage_bs(bs);
 if (!drv) {
 return -ENOMEDIUM;
 }
 if (drv->bdrv_snapshot_create) {
 return drv->bdrv_snapshot_create(bs, sn_info);
 }
-if (bs->file) {
-return bdrv_snapshot_create(bs->file->bs, sn_info);
+if (storage_bs) {
+return bdrv_snapshot_create(storage_bs, sn_info);
 }
 return -ENOTSUP;
 }
@@ -184,6 +186,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
Error **errp)
 {
 BlockDriver *drv = bs->drv;
+BlockDriverState *storage_bs;
 int ret, open_ret;
 
 if (!drv) {
@@ -204,39 +207,66 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
 return ret;
 }
 
-if (bs->file) {
-BlockDriverState *file;
-QDict *options = qdict_clone_shallow(bs->options);
+storage_bs = bdrv_storage_bs(bs);
+if (storage_bs) {
+QDict *options;
 QDict *file_options;
 Error *local_err = NULL;
+bool is_backing_child;
+BdrvChild **child_pointer;
+
+/*
+ * Filters may reference the storage child through
+ * bs->backing.  We need to know whether we are dealing with
+ * bs->backing or bs->file, so we check it here.
+ */
+if (storage_bs == bs->file->bs) {
+is_backing_child = false;
+child_pointer = &bs->file;
+} else if (storage_bs == bs->backing->bs) {
+is_backing_child = true;
+child_pointer = &bs->backing;
+} else {
+/*
+ * The storage child is not referenced by a field in the
+ * BDS object.  We cannot go on then.
+ */
+error_setg(errp, "Block driver does not support snapshots");
+return -ENOTSUP;
+}
+
+options = qdict_clone_shallow(bs->options);
 
-file = bs->file->bs;
 /* Prevent it from getting deleted when detached from bs */
-bdrv_ref(file);
+bdrv_ref(storage_bs);
 
-qdict_extract_subqdict(options, &file_options, "file.");
+qdict_extract_subqdict(options, &file_options,
+   is_backing_child ? "backing." : "file.");
 qobject_unref(file_options);
-qdict_put_str(options, "file", bdrv_get_node_name(file));
+qdict_put_str(options, is_backing_child ? "backing" : "file",
+  bdrv_get_node_name(storage_bs));
 
 if (drv->bdrv_close) {
 drv->bdrv_close(bs);
 }
-bdrv_unref_child(bs, bs->file);
-bs->file = NULL;
 
-ret = bdrv_snapshot_goto(file, snapshot_id, errp);
+assert(storage_bs == (*child_pointer)->bs);
+bdrv_unref_child(bs, *child_pointer);
+*child_pointer = NULL;
+
+ret = bdrv_snapshot_goto(storage_bs, snapshot_id, errp);
 open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
 qobject_unref(options);
 if (open_ret < 0) {
-bdrv_unref(file);
+bdrv_unref(storage_bs);
 bs->drv = NULL;
 /* A bdrv_snapshot_goto() error takes precedence */
 error_propagate(errp, local_err);
 return ret < 0 ? ret : open_ret;
 }
 
-assert(bs->file->bs == file);
-bdrv_unref(file);
+assert(storage_bs == (*child_pointer)->bs);
+bdrv_unref(storage_bs);
 return ret;
 }
 
@@ -272,6 +302,7 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
  Error **errp)
 {
 BlockDriver *drv = bs->drv;
+BlockDriverState *storage_bs = bdrv_storage_bs(bs);
 int ret;
 
 if (!drv) {
@@ -288,8 +319,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
 
 if (drv->bdrv_snapshot_delete) {
 

[Qemu-block] [PATCH v5 22/42] block: Use CAFs in bdrv_get_allocated_file_size()

2019-06-12 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 block.c | 26 --
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 11b7ba8cf6..856d9b58be 100644
--- a/block.c
+++ b/block.c
@@ -4511,15 +4511,37 @@ exit:
 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
 {
 BlockDriver *drv = bs->drv;
+BlockDriverState *storage_bs, *metadata_bs;
+
 if (!drv) {
 return -ENOMEDIUM;
 }
+
 if (drv->bdrv_get_allocated_file_size) {
 return drv->bdrv_get_allocated_file_size(bs);
 }
-if (bs->file) {
-return bdrv_get_allocated_file_size(bs->file->bs);
+
+storage_bs = bdrv_storage_bs(bs);
+metadata_bs = bdrv_metadata_bs(bs);
+
+if (storage_bs) {
+int64_t data_size, metadata_size = 0;
+
+data_size = bdrv_get_allocated_file_size(storage_bs);
+if (data_size < 0) {
+return data_size;
+}
+
+if (storage_bs != metadata_bs) {
+metadata_size = bdrv_get_allocated_file_size(metadata_bs);
+if (metadata_size < 0) {
+return metadata_size;
+}
+}
+
+return data_size + metadata_size;
 }
+
 return -ENOTSUP;
 }
 
-- 
2.21.0




[Qemu-block] [PATCH v5 17/42] block: Use CAFs in bdrv_refresh_limits()

2019-06-12 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 block/io.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/block/io.c b/block/io.c
index 64408cf19a..659ea0c52a 100644
--- a/block/io.c
+++ b/block/io.c
@@ -151,6 +151,8 @@ static void bdrv_merge_limits(BlockLimits *dst, const 
BlockLimits *src)
 void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 BlockDriver *drv = bs->drv;
+BlockDriverState *storage_bs = bdrv_storage_bs(bs);
+BlockDriverState *cow_bs = bdrv_filtered_cow_bs(bs);
 Error *local_err = NULL;
 
 memset(&bs->bl, 0, sizeof(bs->bl));
@@ -164,13 +166,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error 
**errp)
 drv->bdrv_aio_preadv) ? 1 : 512;
 
 /* Take some limits from the children as a default */
-if (bs->file) {
-bdrv_refresh_limits(bs->file->bs, &local_err);
+if (storage_bs) {
+bdrv_refresh_limits(storage_bs, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
 }
-bdrv_merge_limits(&bs->bl, &bs->file->bs->bl);
+bdrv_merge_limits(&bs->bl, &storage_bs->bl);
 } else {
 bs->bl.min_mem_alignment = 512;
 bs->bl.opt_mem_alignment = getpagesize();
@@ -179,13 +181,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error 
**errp)
 bs->bl.max_iov = IOV_MAX;
 }
 
-if (bs->backing) {
-bdrv_refresh_limits(bs->backing->bs, &local_err);
+if (cow_bs) {
+bdrv_refresh_limits(cow_bs, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
 }
-bdrv_merge_limits(&bs->bl, &bs->backing->bs->bl);
+bdrv_merge_limits(&bs->bl, &cow_bs->bl);
 }
 
 /* Then let the driver override it */
-- 
2.21.0




[Qemu-block] [PATCH v5 10/42] block: Use CAF in bdrv_is_encrypted()

2019-06-12 Thread Max Reitz
bdrv_is_encrypted() should not only check the BDS's backing child, but
any filtered child: If a filter's child is encrypted, the filter node
itself naturally is encrypted, too.  Furthermore, we need to recurse
down the chain.

(CAF means child access function.)

Signed-off-by: Max Reitz 
---
 block.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 45882a3470..567a0f82c8 100644
--- a/block.c
+++ b/block.c
@@ -4574,10 +4574,14 @@ bool bdrv_is_sg(BlockDriverState *bs)
 
 bool bdrv_is_encrypted(BlockDriverState *bs)
 {
-if (bs->backing && bs->backing->bs->encrypted) {
+BlockDriverState *filtered = bdrv_filtered_bs(bs);
+if (bs->encrypted) {
 return true;
 }
-return bs->encrypted;
+if (filtered && bdrv_is_encrypted(filtered)) {
+return true;
+}
+return false;
 }
 
 const char *bdrv_get_format_name(BlockDriverState *bs)
-- 
2.21.0




[Qemu-block] [PATCH v5 18/42] block: Use CAFs in bdrv_refresh_filename()

2019-06-12 Thread Max Reitz
bdrv_refresh_filename() and the kind of related bdrv_dirname() should
look to the primary child when they wish to copy the underlying file's
filename.

Signed-off-by: Max Reitz 
---
 block.c | 29 +
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/block.c b/block.c
index db2759c10d..797bec0326 100644
--- a/block.c
+++ b/block.c
@@ -6280,6 +6280,7 @@ void bdrv_refresh_filename(BlockDriverState *bs)
 {
 BlockDriver *drv = bs->drv;
 BdrvChild *child;
+BlockDriverState *primary_child_bs;
 QDict *opts;
 bool backing_overridden;
 bool generate_json_filename; /* Whether our default implementation should
@@ -6348,20 +6349,30 @@ void bdrv_refresh_filename(BlockDriverState *bs)
 qobject_unref(bs->full_open_options);
 bs->full_open_options = opts;
 
+primary_child_bs = bdrv_primary_bs(bs);
+
 if (drv->bdrv_refresh_filename) {
 /* Obsolete information is of no use here, so drop the old file name
  * information before refreshing it */
 bs->exact_filename[0] = '\0';
 
 drv->bdrv_refresh_filename(bs);
-} else if (bs->file) {
-/* Try to reconstruct valid information from the underlying file */
+} else if (primary_child_bs) {
+/*
+ * Try to reconstruct valid information from the underlying
+ * file -- this only works for format nodes (filter nodes
+ * cannot be probed and as such must be selected by the user
+ * either through an options dict, or through a special
+ * filename which the filter driver must construct in its
+ * .bdrv_refresh_filename() implementation).
+ */
 
 bs->exact_filename[0] = '\0';
 
 /*
  * We can use the underlying file's filename if:
  * - it has a filename,
+ * - the current BDS is not a filter,
  * - the file is a protocol BDS, and
  * - opening that file (as this BDS's format) will automatically create
  *   the BDS tree we have right now, that is:
@@ -6370,11 +6381,11 @@ void bdrv_refresh_filename(BlockDriverState *bs)
  *   - no non-file child of this BDS has been overridden by the user
  *   Both of these conditions are represented by 
generate_json_filename.
  */
-if (bs->file->bs->exact_filename[0] &&
-bs->file->bs->drv->bdrv_file_open &&
-!generate_json_filename)
+if (primary_child_bs->exact_filename[0] &&
+primary_child_bs->drv->bdrv_file_open &&
+!drv->is_filter && !generate_json_filename)
 {
-strcpy(bs->exact_filename, bs->file->bs->exact_filename);
+strcpy(bs->exact_filename, primary_child_bs->exact_filename);
 }
 }
 
@@ -6391,6 +6402,7 @@ void bdrv_refresh_filename(BlockDriverState *bs)
 char *bdrv_dirname(BlockDriverState *bs, Error **errp)
 {
 BlockDriver *drv = bs->drv;
+BlockDriverState *child_bs;
 
 if (!drv) {
 error_setg(errp, "Node '%s' is ejected", bs->node_name);
@@ -6401,8 +6413,9 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp)
 return drv->bdrv_dirname(bs, errp);
 }
 
-if (bs->file) {
-return bdrv_dirname(bs->file->bs, errp);
+child_bs = bdrv_primary_bs(bs);
+if (child_bs) {
+return bdrv_dirname(child_bs, errp);
 }
 
 bdrv_refresh_filename(bs);
-- 
2.21.0




[Qemu-block] [PATCH v5 15/42] block: Re-evaluate backing file handling in reopen

2019-06-12 Thread Max Reitz
Reopening a node's backing child needs a bit of special handling because
the "backing" child has different defaults than all other children
(among other things).  Adding filter support here is a bit more
difficult than just using the child access functions.  In fact, we often
have to directly use bs->backing because these functions are about the
"backing" child (which may or may not be the COW backing file).

Signed-off-by: Max Reitz 
---
 block.c | 36 +---
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/block.c b/block.c
index 505b3e9a01..db2759c10d 100644
--- a/block.c
+++ b/block.c
@@ -3542,17 +3542,39 @@ static int bdrv_reopen_parse_backing(BDRVReopenState 
*reopen_state,
 }
 }
 
+/*
+ * Ensure that @bs can really handle backing files, because we are
+ * about to give it one (or swap the existing one)
+ */
+if (bs->drv->is_filter) {
+/* Filters always have a file or a backing child */
+if (!bs->backing) {
+error_setg(errp, "'%s' is a %s filter node that does not support a 
"
+   "backing child", bs->node_name, bs->drv->format_name);
+return -EINVAL;
+}
+} else if (!bs->drv->supports_backing) {
+error_setg(errp, "Driver '%s' of node '%s' does not support backing "
+   "files", bs->drv->format_name, bs->node_name);
+return -EINVAL;
+}
+
 /*
  * Find the "actual" backing file by skipping all links that point
  * to an implicit node, if any (e.g. a commit filter node).
+ * We cannot use any of the bdrv_skip_*() functions here because
+ * those return the first explicit node, while we are looking for
+ * its overlay here.
  */
 overlay_bs = bs;
-while (backing_bs(overlay_bs) && backing_bs(overlay_bs)->implicit) {
-overlay_bs = backing_bs(overlay_bs);
+while (bdrv_filtered_bs(overlay_bs) &&
+   bdrv_filtered_bs(overlay_bs)->implicit)
+{
+overlay_bs = bdrv_filtered_bs(overlay_bs);
 }
 
 /* If we want to replace the backing file we need some extra checks */
-if (new_backing_bs != backing_bs(overlay_bs)) {
+if (new_backing_bs != bdrv_filtered_bs(overlay_bs)) {
 /* Check for implicit nodes between bs and its backing file */
 if (bs != overlay_bs) {
 error_setg(errp, "Cannot change backing link if '%s' has "
@@ -3560,8 +3582,8 @@ static int bdrv_reopen_parse_backing(BDRVReopenState 
*reopen_state,
 return -EPERM;
 }
 /* Check if the backing link that we want to replace is frozen */
-if (bdrv_is_backing_chain_frozen(overlay_bs, backing_bs(overlay_bs),
- errp)) {
+if (bdrv_is_backing_chain_frozen(overlay_bs,
+ child_bs(overlay_bs->backing), errp)) 
{
 return -EPERM;
 }
 reopen_state->replace_backing_bs = true;
@@ -3712,7 +3734,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, 
BlockReopenQueue *queue,
  * its metadata. Otherwise the 'backing' option can be omitted.
  */
 if (drv->supports_backing && reopen_state->backing_missing &&
-(backing_bs(reopen_state->bs) || reopen_state->bs->backing_file[0])) {
+(reopen_state->bs->backing || reopen_state->bs->backing_file[0])) {
 error_setg(errp, "backing is missing for '%s'",
reopen_state->bs->node_name);
 ret = -EINVAL;
@@ -3857,7 +3879,7 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
  * from bdrv_set_backing_hd()) has the new values.
  */
 if (reopen_state->replace_backing_bs) {
-BlockDriverState *old_backing_bs = backing_bs(bs);
+BlockDriverState *old_backing_bs = child_bs(bs->backing);
 assert(!old_backing_bs || !old_backing_bs->implicit);
 /* Abort the permission update on the backing bs we're detaching */
 if (old_backing_bs) {
-- 
2.21.0




[Qemu-block] [PATCH v5 07/42] block: *filtered_cow_child() for *has_zero_init()

2019-06-12 Thread Max Reitz
bdrv_has_zero_init() and the related bdrv_unallocated_blocks_are_zero()
should use bdrv_filtered_cow_child() if they want to check whether the
given BDS has a COW backing file.

Signed-off-by: Max Reitz 
---
 block.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index be18130944..64d6190984 100644
--- a/block.c
+++ b/block.c
@@ -4933,7 +4933,7 @@ int bdrv_has_zero_init(BlockDriverState *bs)
 
 /* If BS is a copy on write image, it is initialized to
the contents of the base image, which may not be zeroes.  */
-if (bs->backing) {
+if (bdrv_filtered_cow_child(bs)) {
 return 0;
 }
 if (bs->drv->bdrv_has_zero_init) {
@@ -4951,7 +4951,7 @@ bool bdrv_unallocated_blocks_are_zero(BlockDriverState 
*bs)
 {
 BlockDriverInfo bdi;
 
-if (bs->backing) {
+if (bdrv_filtered_cow_child(bs)) {
 return false;
 }
 
-- 
2.21.0




[Qemu-block] [PATCH v5 11/42] block: Add bdrv_supports_compressed_writes()

2019-06-12 Thread Max Reitz
Filters cannot compress data themselves but they have to implement
.bdrv_co_pwritev_compressed() still (or they cannot forward compressed
writes).  Therefore, checking whether
bs->drv->bdrv_co_pwritev_compressed is non-NULL is not sufficient to
know whether the node can actually handle compressed writes.  This
function looks down the filter chain to see whether there is a
non-filter that can actually convert the compressed writes into
compressed data (and thus normal writes).

Signed-off-by: Max Reitz 
---
 include/block/block.h |  1 +
 block.c   | 22 ++
 2 files changed, 23 insertions(+)

diff --git a/include/block/block.h b/include/block/block.h
index 687c03b275..7835c5b370 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -487,6 +487,7 @@ void bdrv_next_cleanup(BdrvNextIterator *it);
 
 BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
 bool bdrv_is_encrypted(BlockDriverState *bs);
+bool bdrv_supports_compressed_writes(BlockDriverState *bs);
 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
  void *opaque, bool read_only);
 const char *bdrv_get_node_name(const BlockDriverState *bs);
diff --git a/block.c b/block.c
index 567a0f82c8..97774b7b06 100644
--- a/block.c
+++ b/block.c
@@ -4584,6 +4584,28 @@ bool bdrv_is_encrypted(BlockDriverState *bs)
 return false;
 }
 
+/**
+ * Return whether the given node supports compressed writes.
+ */
+bool bdrv_supports_compressed_writes(BlockDriverState *bs)
+{
+BlockDriverState *filtered = bdrv_filtered_rw_bs(bs);
+
+if (!bs->drv || !bs->drv->bdrv_co_pwritev_compressed) {
+return false;
+}
+
+if (filtered) {
+/*
+ * Filters can only forward compressed writes, so we have to
+ * check the child.
+ */
+return bdrv_supports_compressed_writes(filtered);
+}
+
+return true;
+}
+
 const char *bdrv_get_format_name(BlockDriverState *bs)
 {
 return bs->drv ? bs->drv->format_name : NULL;
-- 
2.21.0




[Qemu-block] [PATCH v5 06/42] qcow2: Implement .bdrv_storage_child()

2019-06-12 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 block/qcow2.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/block/qcow2.c b/block/qcow2.c
index 9396d490d5..57675c9416 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -5085,6 +5085,13 @@ void qcow2_signal_corruption(BlockDriverState *bs, bool 
fatal, int64_t offset,
 s->signaled_corruption = true;
 }
 
+static BdrvChild *qcow2_storage_child(BlockDriverState *bs)
+{
+BDRVQcow2State *s = bs->opaque;
+
+return s->data_file;
+}
+
 static QemuOptsList qcow2_create_opts = {
 .name = "qcow2-create-opts",
 .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
@@ -5231,6 +5238,8 @@ BlockDriver bdrv_qcow2 = {
 .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw,
 .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap,
 .bdrv_remove_persistent_dirty_bitmap = 
qcow2_remove_persistent_dirty_bitmap,
+
+.bdrv_storage_child = qcow2_storage_child,
 };
 
 static void bdrv_qcow2_init(void)
-- 
2.21.0




[Qemu-block] [PATCH v5 13/42] block: Use CAFs in block status functions

2019-06-12 Thread Max Reitz
Use the child access functions in the block status inquiry functions as
appropriate.

Signed-off-by: Max Reitz 
---
 block/io.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/block/io.c b/block/io.c
index 73ade04834..53aabf86b5 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2150,11 +2150,12 @@ static int coroutine_fn 
bdrv_co_block_status(BlockDriverState *bs,
 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
 ret |= BDRV_BLOCK_ALLOCATED;
 } else if (want_zero) {
+BlockDriverState *cow_bs = bdrv_filtered_cow_bs(bs);
+
 if (bdrv_unallocated_blocks_are_zero(bs)) {
 ret |= BDRV_BLOCK_ZERO;
-} else if (bs->backing) {
-BlockDriverState *bs2 = bs->backing->bs;
-int64_t size2 = bdrv_getlength(bs2);
+} else if (cow_bs) {
+int64_t size2 = bdrv_getlength(cow_bs);
 
 if (size2 >= 0 && offset >= size2) {
 ret |= BDRV_BLOCK_ZERO;
@@ -2220,7 +2221,7 @@ static int coroutine_fn 
bdrv_co_block_status_above(BlockDriverState *bs,
 bool first = true;
 
 assert(bs != base);
-for (p = bs; p != base; p = backing_bs(p)) {
+for (p = bs; p != base; p = bdrv_filtered_bs(p)) {
 ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
file);
 if (ret < 0) {
@@ -2306,7 +2307,7 @@ int bdrv_block_status_above(BlockDriverState *bs, 
BlockDriverState *base,
 int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
   int64_t *pnum, int64_t *map, BlockDriverState **file)
 {
-return bdrv_block_status_above(bs, backing_bs(bs),
+return bdrv_block_status_above(bs, bdrv_filtered_bs(bs),
offset, bytes, pnum, map, file);
 }
 
@@ -2316,9 +2317,9 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, 
int64_t offset,
 int ret;
 int64_t dummy;
 
-ret = bdrv_common_block_status_above(bs, backing_bs(bs), false, offset,
- bytes, pnum ? pnum : &dummy, NULL,
- NULL);
+ret = bdrv_common_block_status_above(bs, bdrv_filtered_bs(bs), false,
+ offset, bytes, pnum ? pnum : &dummy,
+ NULL, NULL);
 if (ret < 0) {
 return ret;
 }
@@ -2372,7 +2373,7 @@ int bdrv_is_allocated_above(BlockDriverState *top,
 n = pnum_inter;
 }
 
-intermediate = backing_bs(intermediate);
+intermediate = bdrv_filtered_bs(intermediate);
 }
 
 *pnum = n;
-- 
2.21.0




[Qemu-block] [PATCH v5 02/42] copy-on-read: Support compressed writes

2019-06-12 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 block/copy-on-read.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 53972b1da3..88e1c1f538 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -114,6 +114,16 @@ static int coroutine_fn cor_co_pdiscard(BlockDriverState 
*bs,
 }
 
 
+static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs,
+  uint64_t offset,
+  uint64_t bytes,
+  QEMUIOVector *qiov)
+{
+return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
+   BDRV_REQ_WRITE_COMPRESSED);
+}
+
+
 static void cor_eject(BlockDriverState *bs, bool eject_flag)
 {
 bdrv_eject(bs->file->bs, eject_flag);
@@ -146,6 +156,7 @@ static BlockDriver bdrv_copy_on_read = {
 .bdrv_co_pwritev= cor_co_pwritev,
 .bdrv_co_pwrite_zeroes  = cor_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = cor_co_pdiscard,
+.bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
 
 .bdrv_eject = cor_eject,
 .bdrv_lock_medium   = cor_lock_medium,
-- 
2.21.0




[Qemu-block] [PATCH v5 09/42] block: Include filters when freezing backing chain

2019-06-12 Thread Max Reitz
In order to make filters work in backing chains, the associated
functions must be able to deal with them and freeze all filter links, be
they COW or R/W filter links.

While at it, add some comments that note which functions require their
caller to ensure that a given child link is not frozen, and how the
callers do so.

Signed-off-by: Max Reitz 
---
 block.c | 45 -
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/block.c b/block.c
index 8438b0699e..45882a3470 100644
--- a/block.c
+++ b/block.c
@@ -2214,12 +2214,15 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
  * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this
  * function uses bdrv_set_perm() to update the permissions according to the new
  * reference that @new_bs gets.
+ *
+ * Callers must ensure that child->frozen is false.
  */
 static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
 {
 BlockDriverState *old_bs = child->bs;
 uint64_t perm, shared_perm;
 
+/* Asserts that child->frozen == false */
 bdrv_replace_child_noperm(child, new_bs);
 
 if (old_bs) {
@@ -2360,6 +2363,7 @@ static void bdrv_detach_child(BdrvChild *child)
 g_free(child);
 }
 
+/* Callers must ensure that child->frozen is false. */
 void bdrv_root_unref_child(BdrvChild *child)
 {
 BlockDriverState *child_bs;
@@ -2369,6 +2373,7 @@ void bdrv_root_unref_child(BdrvChild *child)
 bdrv_unref(child_bs);
 }
 
+/* Callers must ensure that child->frozen is false. */
 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
 {
 if (child == NULL) {
@@ -2435,6 +2440,7 @@ void bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd,
 }
 
 if (bs->backing) {
+/* Cannot be frozen, we checked that above */
 bdrv_unref_child(bs, bs->backing);
 }
 
@@ -3908,6 +3914,7 @@ static void bdrv_close(BlockDriverState *bs)
 
 if (bs->drv) {
 if (bs->drv->bdrv_close) {
+/* Must unfreeze all children, so bdrv_unref_child() works */
 bs->drv->bdrv_close(bs);
 }
 bs->drv = NULL;
@@ -4281,17 +4288,20 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs)
  * Return true if at least one of the backing links between @bs and
  * @base is frozen. @errp is set if that's the case.
  * @base must be reachable from @bs, or NULL.
+ * (Filters are treated as normal elements of the backing chain.)
  */
 bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
   Error **errp)
 {
 BlockDriverState *i;
+BdrvChild *child;
 
-for (i = bs; i != base; i = backing_bs(i)) {
-if (i->backing && i->backing->frozen) {
+for (i = bs; i != base; i = child_bs(child)) {
+child = bdrv_filtered_child(i);
+
+if (child && child->frozen) {
 error_setg(errp, "Cannot change '%s' link from '%s' to '%s'",
-   i->backing->name, i->node_name,
-   backing_bs(i)->node_name);
+   child->name, i->node_name, child->bs->node_name);
 return true;
 }
 }
@@ -4305,19 +4315,22 @@ bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, 
BlockDriverState *base,
  * none of the links are modified.
  * @base must be reachable from @bs, or NULL.
  * Returns 0 on success. On failure returns < 0 and sets @errp.
+ * (Filters are treated as normal elements of the backing chain.)
  */
 int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
   Error **errp)
 {
 BlockDriverState *i;
+BdrvChild *child;
 
 if (bdrv_is_backing_chain_frozen(bs, base, errp)) {
 return -EPERM;
 }
 
-for (i = bs; i != base; i = backing_bs(i)) {
-if (i->backing) {
-i->backing->frozen = true;
+for (i = bs; i != base; i = child_bs(child)) {
+child = bdrv_filtered_child(i);
+if (child) {
+child->frozen = true;
 }
 }
 
@@ -4328,15 +4341,18 @@ int bdrv_freeze_backing_chain(BlockDriverState *bs, 
BlockDriverState *base,
  * Unfreeze all backing links between @bs and @base. The caller must
  * ensure that all links are frozen before using this function.
  * @base must be reachable from @bs, or NULL.
+ * (Filters are treated as normal elements of the backing chain.)
  */
 void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
 {
 BlockDriverState *i;
+BdrvChild *child;
 
-for (i = bs; i != base; i = backing_bs(i)) {
-if (i->backing) {
-assert(i->backing->frozen);
-i->backing->frozen = false;
+for (i = bs; i != base; i = child_bs(child)) {
+child = bdrv_filtered_child(i);
+if (child) {
+assert(child->frozen);
+child->frozen = false;
 }
 }
 }
@@ -4438,8 +4454,11 @@ int bdrv_drop_intermediate(BlockDriv

[Qemu-block] [PATCH v5 12/42] block: Use bdrv_filtered_rw* where obvious

2019-06-12 Thread Max Reitz
Places that use patterns like

if (bs->drv->is_filter && bs->file) {
... something about bs->file->bs ...
}

should be

BlockDriverState *filtered = bdrv_filtered_rw_bs(bs);
if (filtered) {
... something about @filtered ...
}

instead.

Signed-off-by: Max Reitz 
---
 block.c| 23 +++
 block/io.c |  5 +++--
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/block.c b/block.c
index 97774b7b06..11f37983d9 100644
--- a/block.c
+++ b/block.c
@@ -556,11 +556,12 @@ int bdrv_create_file(const char *filename, QemuOpts 
*opts, Error **errp)
 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
 BlockDriver *drv = bs->drv;
+BlockDriverState *filtered = bdrv_filtered_rw_bs(bs);
 
 if (drv && drv->bdrv_probe_blocksizes) {
 return drv->bdrv_probe_blocksizes(bs, bsz);
-} else if (drv && drv->is_filter && bs->file) {
-return bdrv_probe_blocksizes(bs->file->bs, bsz);
+} else if (filtered) {
+return bdrv_probe_blocksizes(filtered, bsz);
 }
 
 return -ENOTSUP;
@@ -575,11 +576,12 @@ int bdrv_probe_blocksizes(BlockDriverState *bs, 
BlockSizes *bsz)
 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 {
 BlockDriver *drv = bs->drv;
+BlockDriverState *filtered = bdrv_filtered_rw_bs(bs);
 
 if (drv && drv->bdrv_probe_geometry) {
 return drv->bdrv_probe_geometry(bs, geo);
-} else if (drv && drv->is_filter && bs->file) {
-return bdrv_probe_geometry(bs->file->bs, geo);
+} else if (filtered) {
+return bdrv_probe_geometry(filtered, geo);
 }
 
 return -ENOTSUP;
@@ -4972,6 +4974,8 @@ int bdrv_has_zero_init_1(BlockDriverState *bs)
 
 int bdrv_has_zero_init(BlockDriverState *bs)
 {
+BlockDriverState *filtered;
+
 if (!bs->drv) {
 return 0;
 }
@@ -4984,8 +4988,10 @@ int bdrv_has_zero_init(BlockDriverState *bs)
 if (bs->drv->bdrv_has_zero_init) {
 return bs->drv->bdrv_has_zero_init(bs);
 }
-if (bs->file && bs->drv->is_filter) {
-return bdrv_has_zero_init(bs->file->bs);
+
+filtered = bdrv_filtered_rw_bs(bs);
+if (filtered) {
+return bdrv_has_zero_init(filtered);
 }
 
 /* safe default */
@@ -5030,8 +5036,9 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo 
*bdi)
 return -ENOMEDIUM;
 }
 if (!drv->bdrv_get_info) {
-if (bs->file && drv->is_filter) {
-return bdrv_get_info(bs->file->bs, bdi);
+BlockDriverState *filtered = bdrv_filtered_rw_bs(bs);
+if (filtered) {
+return bdrv_get_info(filtered, bdi);
 }
 return -ENOTSUP;
 }
diff --git a/block/io.c b/block/io.c
index 2408abffd9..73ade04834 100644
--- a/block/io.c
+++ b/block/io.c
@@ -3147,8 +3147,9 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, 
int64_t offset,
 }
 
 if (!drv->bdrv_co_truncate) {
-if (bs->file && drv->is_filter) {
-ret = bdrv_co_truncate(bs->file, offset, prealloc, errp);
+BdrvChild *filtered = bdrv_filtered_rw_child(bs);
+if (filtered) {
+ret = bdrv_co_truncate(filtered, offset, prealloc, errp);
 goto out;
 }
 error_setg(errp, "Image format driver does not support resize");
-- 
2.21.0




[Qemu-block] [PATCH v5 08/42] block: bdrv_set_backing_hd() is about bs->backing

2019-06-12 Thread Max Reitz
bdrv_set_backing_hd() is a function that explicitly cares about the
bs->backing child.  Highlight that in its description and use
child_bs(bs->backing) instead of backing_bs(bs) to make it more obvious.

Signed-off-by: Max Reitz 
---
 block.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 64d6190984..8438b0699e 100644
--- a/block.c
+++ b/block.c
@@ -2417,7 +2417,7 @@ static bool bdrv_inherits_from_recursive(BlockDriverState 
*child,
 }
 
 /*
- * Sets the backing file link of a BDS. A new reference is created; callers
+ * Sets the bs->backing link of a BDS. A new reference is created; callers
  * which don't need their own reference any more must call bdrv_unref().
  */
 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
@@ -2426,7 +2426,7 @@ void bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd,
 bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) &&
 bdrv_inherits_from_recursive(backing_hd, bs);
 
-if (bdrv_is_backing_chain_frozen(bs, backing_bs(bs), errp)) {
+if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) {
 return;
 }
 
-- 
2.21.0




[Qemu-block] [PATCH v5 05/42] block: Add chain helper functions

2019-06-12 Thread Max Reitz
Add some helper functions for skipping filters in a chain of block
nodes.

Signed-off-by: Max Reitz 
---
 include/block/block_int.h |  3 +++
 block.c   | 55 +++
 2 files changed, 58 insertions(+)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 7ce71623f8..875a33f255 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1264,6 +1264,9 @@ BdrvChild *bdrv_filtered_child(BlockDriverState *bs);
 BdrvChild *bdrv_metadata_child(BlockDriverState *bs);
 BdrvChild *bdrv_storage_child(BlockDriverState *bs);
 BdrvChild *bdrv_primary_child(BlockDriverState *bs);
+BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs);
+BlockDriverState *bdrv_skip_rw_filters(BlockDriverState *bs);
+BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs);
 
 static inline BlockDriverState *child_bs(BdrvChild *child)
 {
diff --git a/block.c b/block.c
index 724d8889a6..be18130944 100644
--- a/block.c
+++ b/block.c
@@ -6494,3 +6494,58 @@ BdrvChild *bdrv_primary_child(BlockDriverState *bs)
 {
 return bdrv_filtered_rw_child(bs) ?: bs->file;
 }
+
+static BlockDriverState *bdrv_skip_filters(BlockDriverState *bs,
+   bool stop_on_explicit_filter)
+{
+BdrvChild *filtered;
+
+if (!bs) {
+return NULL;
+}
+
+while (!(stop_on_explicit_filter && !bs->implicit)) {
+filtered = bdrv_filtered_rw_child(bs);
+if (!filtered) {
+break;
+}
+bs = filtered->bs;
+}
+/*
+ * Note that this treats nodes with bs->drv == NULL as not being
+ * R/W filters (bs->drv == NULL should be replaced by something
+ * else anyway).
+ * The advantage of this behavior is that this function will thus
+ * always return a non-NULL value (given a non-NULL @bs).
+ */
+
+return bs;
+}
+
+/*
+ * Return the first BDS that has not been added implicitly or that
+ * does not have an RW-filtered child down the chain starting from @bs
+ * (including @bs itself).
+ */
+BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
+{
+return bdrv_skip_filters(bs, true);
+}
+
+/*
+ * Return the first BDS that does not have an RW-filtered child down
+ * the chain starting from @bs (including @bs itself).
+ */
+BlockDriverState *bdrv_skip_rw_filters(BlockDriverState *bs)
+{
+return bdrv_skip_filters(bs, false);
+}
+
+/*
+ * For a backing chain, return the first non-filter backing image of
+ * the first non-filter image.
+ */
+BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
+{
+return 
bdrv_skip_rw_filters(bdrv_filtered_cow_bs(bdrv_skip_rw_filters(bs)));
+}
-- 
2.21.0




[Qemu-block] [PATCH v5 04/42] block: Add child access functions

2019-06-12 Thread Max Reitz
There are BDS children that the general block layer code can access,
namely bs->file and bs->backing.  Since the introduction of filters and
external data files, their meaning is not quite clear.  bs->backing can
be a COW source, or it can be an R/W-filtered child; bs->file can be an
R/W-filtered child, it can be data and metadata storage, or it can be
just metadata storage.

This overloading really is not helpful.  This patch adds function that
retrieve the correct child for each exact purpose.  Later patches in
this series will make use of them.  Doing so will allow us to handle
filter nodes and external data files in a meaningful way.

Signed-off-by: Max Reitz 
---
 include/block/block_int.h | 57 --
 block.c   | 99 +++
 2 files changed, 153 insertions(+), 3 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 58fca37ba3..7ce71623f8 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -90,9 +90,11 @@ struct BlockDriver {
 int instance_size;
 
 /* set to true if the BlockDriver is a block filter. Block filters pass
- * certain callbacks that refer to data (see block.c) to their bs->file if
- * the driver doesn't implement them. Drivers that do not wish to forward
- * must implement them and return -ENOTSUP.
+ * certain callbacks that refer to data (see block.c) to their bs->file
+ * or bs->backing (whichever one exists) if the driver doesn't implement
+ * them. Drivers that do not wish to forward must implement them and return
+ * -ENOTSUP.
+ * Note that filters are not allowed to modify data.
  */
 bool is_filter;
 /* for snapshots block filter like Quorum can implement the
@@ -562,6 +564,13 @@ struct BlockDriver {
  * If this pointer is NULL, the array is considered empty.
  * "filename" and "driver" are always considered strong. */
 const char *const *strong_runtime_opts;
+
+/**
+ * Return the data storage child, if there is exactly one.  If
+ * this function is not implemented, the block layer will assume
+ * bs->file to be this child.
+ */
+BdrvChild *(*bdrv_storage_child)(BlockDriverState *bs);
 };
 
 typedef struct BlockLimits {
@@ -1249,4 +1258,46 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, 
uint64_t src_offset,
 
 int refresh_total_sectors(BlockDriverState *bs, int64_t hint);
 
+BdrvChild *bdrv_filtered_cow_child(BlockDriverState *bs);
+BdrvChild *bdrv_filtered_rw_child(BlockDriverState *bs);
+BdrvChild *bdrv_filtered_child(BlockDriverState *bs);
+BdrvChild *bdrv_metadata_child(BlockDriverState *bs);
+BdrvChild *bdrv_storage_child(BlockDriverState *bs);
+BdrvChild *bdrv_primary_child(BlockDriverState *bs);
+
+static inline BlockDriverState *child_bs(BdrvChild *child)
+{
+return child ? child->bs : NULL;
+}
+
+static inline BlockDriverState *bdrv_filtered_cow_bs(BlockDriverState *bs)
+{
+return child_bs(bdrv_filtered_cow_child(bs));
+}
+
+static inline BlockDriverState *bdrv_filtered_rw_bs(BlockDriverState *bs)
+{
+return child_bs(bdrv_filtered_rw_child(bs));
+}
+
+static inline BlockDriverState *bdrv_filtered_bs(BlockDriverState *bs)
+{
+return child_bs(bdrv_filtered_child(bs));
+}
+
+static inline BlockDriverState *bdrv_metadata_bs(BlockDriverState *bs)
+{
+return child_bs(bdrv_metadata_child(bs));
+}
+
+static inline BlockDriverState *bdrv_storage_bs(BlockDriverState *bs)
+{
+return child_bs(bdrv_storage_child(bs));
+}
+
+static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
+{
+return child_bs(bdrv_primary_child(bs));
+}
+
 #endif /* BLOCK_INT_H */
diff --git a/block.c b/block.c
index 6bc51e371f..724d8889a6 100644
--- a/block.c
+++ b/block.c
@@ -6395,3 +6395,102 @@ bool bdrv_can_store_new_dirty_bitmap(BlockDriverState 
*bs, const char *name,
 
 return drv->bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp);
 }
+
+/*
+ * Return the child that @bs acts as an overlay for, and from which data may be
+ * copied in COW or COR operations.  Usually this is the backing file.
+ */
+BdrvChild *bdrv_filtered_cow_child(BlockDriverState *bs)
+{
+if (!bs || !bs->drv) {
+return NULL;
+}
+
+if (bs->drv->is_filter) {
+return NULL;
+}
+
+return bs->backing;
+}
+
+/*
+ * If @bs acts as a pass-through filter for one of its children,
+ * return that child.  "Pass-through" means that write operations to
+ * @bs are forwarded to that child instead of triggering COW.
+ */
+BdrvChild *bdrv_filtered_rw_child(BlockDriverState *bs)
+{
+if (!bs || !bs->drv) {
+return NULL;
+}
+
+if (!bs->drv->is_filter) {
+return NULL;
+}
+
+/* Only one of @backing or @file may be used */
+assert(!(bs->backing && bs->file));
+
+return bs->backing ?: bs->file;
+}
+
+/*
+ * Return any filtered child, independently of how it reacts to write
+ * accesses and whether data is copied on

[Qemu-block] [PATCH v5 01/42] block: Mark commit and mirror as filter drivers

2019-06-12 Thread Max Reitz
The commit and mirror block nodes are filters, so they should be marked
as such.  (Strictly speaking, BDS.is_filter's documentation states that
a filter's child must be bs->file.  The following patch will relax this
restriction, however.)

Signed-off-by: Max Reitz 
Reviewed-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/commit.c | 2 ++
 block/mirror.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/block/commit.c b/block/commit.c
index c815def89a..f20a26fecd 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -256,6 +256,8 @@ static BlockDriver bdrv_commit_top = {
 .bdrv_co_block_status   = bdrv_co_block_status_from_backing,
 .bdrv_refresh_filename  = bdrv_commit_top_refresh_filename,
 .bdrv_child_perm= bdrv_commit_top_child_perm,
+
+.is_filter  = true,
 };
 
 void commit_start(const char *job_id, BlockDriverState *bs,
diff --git a/block/mirror.c b/block/mirror.c
index f8bdb5b21b..4fa8f57c80 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1480,6 +1480,8 @@ static BlockDriver bdrv_mirror_top = {
 .bdrv_co_block_status   = bdrv_co_block_status_from_backing,
 .bdrv_refresh_filename  = bdrv_mirror_top_refresh_filename,
 .bdrv_child_perm= bdrv_mirror_top_child_perm,
+
+.is_filter  = true,
 };
 
 static void mirror_start_job(const char *job_id, BlockDriverState *bs,
-- 
2.21.0




[Qemu-block] [PATCH v5 00/42] block: Deal with filters

2019-06-12 Thread Max Reitz
Hi,

When we introduced filters, we did it a bit casually.  Sure, we talked a
lot about them before, but that was mostly discussion about where
implicit filters should be added to the graph (note that we currently
only have two implicit filters, those being mirror and commit).  But in
the end, we really just designated some drivers filters (Quorum,
blkdebug, etc.) and added some specifically (throttle, COR), without
really looking through the block layer to see where issues might occur.

It turns out vast areas of the block layer just don’t know about filters
and cannot really handle them.  Many cases will work in practice, in
others, well, too bad, you cannot use some feature because some part
deep inside the block layer looks at your filters and thinks they are
format nodes.

This is one reason why this series is needed.  Over time (since v1), a
second reason has made its way in:

bs->file is not necessarily the place where a node’s data is stored.
qcow2 now has external data files, and currently there is no way for the
general block layer to know that the data is not stored in bs->file.
Right now, I do not think that has any real consequences (all functions
that need access to the actual data storage file should only do so as a
fallback if the driver does not provide some functionality, but qcow2
should provide it all), but it still shows that we need some way to let
the general block layer know about such data files.  (Also, I will need
this for v1 of my “Inquire images’ rotational info” series.)

I won’t go on and on about this series now, I think the patches pretty
much speak for themselves now.  If the cover letter gets too long,
nobody reads it anyway (see previous versions).


*** This series depends on some others. ***

Dependencies:
- [PATCH 0/4] block: Keep track of parent quiescing
- [PATCH 0/2] vl: Drain before (block) job cancel when quitting
- [PATCH v2 0/2] blockdev: Overlays are not snapshots

Based-on: <20190605161118.14544-1-mre...@redhat.com>
Based-on: <20190612220839.1374-1-mre...@redhat.com>
Based-on: <20190603202236.1342-1-mre...@redhat.com>


v5:
- Split the huge patches 2 and 3 from the previous version into many
  smaller patches to maintain the potential reviewers’ sanity [Vladimir]

- Added support for compressed writes to the COR and throttle filter
  drivers to demonstrate how that looks, because the backup job needs to
  deal with filters that have such support

- Added differentiation between bdrv_storage_child(),
  bdrv_primary_child(), and bdrv_metadata_child()

- A whole lot of things Vladimir has noted

- Made the block jobs really work with filters.  In case of commit and
  stream, this now means that filters go away if they are between top
  and base.  I think that’s OK because it’s the user’s choice to include
  filters or not.  (They can move the filters around if they prefer a
  different result.)
  - This changes the “Add filter commit test cases” from checking that
most things do not work to checking that they do

- Added the “blockdev: Fix active commit choice” patch because it turned
  out this became necessary after I allowed committing through and with
  filters.


Max Reitz (42):
  block: Mark commit and mirror as filter drivers
  copy-on-read: Support compressed writes
  throttle: Support compressed writes
  block: Add child access functions
  block: Add chain helper functions
  qcow2: Implement .bdrv_storage_child()
  block: *filtered_cow_child() for *has_zero_init()
  block: bdrv_set_backing_hd() is about bs->backing
  block: Include filters when freezing backing chain
  block: Use CAF in bdrv_is_encrypted()
  block: Add bdrv_supports_compressed_writes()
  block: Use bdrv_filtered_rw* where obvious
  block: Use CAFs in block status functions
  block: Use CAFs when working with backing chains
  block: Re-evaluate backing file handling in reopen
  block: Use child access functions when flushing
  block: Use CAFs in bdrv_refresh_limits()
  block: Use CAFs in bdrv_refresh_filename()
  block: Use CAF in bdrv_co_rw_vmstate()
  block/snapshot: Fall back to storage child
  block: Use CAFs for debug breakpoints
  block: Use CAFs in bdrv_get_allocated_file_size()
  blockdev: Use CAF in external_snapshot_prepare()
  block: Use child access functions for QAPI queries
  mirror: Deal with filters
  backup: Deal with filters
  commit: Deal with filters
  stream: Deal with filters
  nbd: Use CAF when looking for dirty bitmap
  qemu-img: Use child access functions
  block: Drop backing_bs()
  block: Make bdrv_get_cumulative_perm() public
  blockdev: Fix active commit choice
  block: Inline bdrv_co_block_status_from_*()
  block: Fix check_to_replace_node()
  iotests: Add tests for mirror @replaces loops
  block: Leave BDS.backing_file constant
  iotests: Let complete_and_wait() work with commit
  iotests: Add filter commit test cases
  iotests: Add filter mirror test cases
  iotests: Add test for commit in sub directory
  iotests: Test committing to overridden backing

[Qemu-block] [PATCH 2/2] iotests: Test quitting with job on throttled node

2019-06-12 Thread Max Reitz
When qemu quits, all throttling should be ignored.  That means, if there
is a mirror job running from a throttled node, it should be cancelled
immediately and qemu close without blocking.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/218 | 55 --
 tests/qemu-iotests/218.out |  4 +++
 2 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/218 b/tests/qemu-iotests/218
index 92c331b6fb..2554d84581 100755
--- a/tests/qemu-iotests/218
+++ b/tests/qemu-iotests/218
@@ -27,9 +27,9 @@
 # Creator/Owner: Max Reitz 
 
 import iotests
-from iotests import log
+from iotests import log, qemu_img, qemu_io_silent
 
-iotests.verify_platform(['linux'])
+iotests.verify_image_format(supported_fmts=['qcow2', 'raw'])
 
 
 # Launches the VM, adds two null-co nodes (source and target), and
@@ -136,3 +136,54 @@ with iotests.VM() as vm:
 
 log(vm.event_wait('BLOCK_JOB_CANCELLED'),
 filters=[iotests.filter_qmp_event])
+
+log('')
+log('=== Cancel mirror job from throttled node by quitting ===')
+log('')
+
+with iotests.VM() as vm, \
+ iotests.FilePath('src.img') as src_img_path:
+
+assert qemu_img('create', '-f', iotests.imgfmt, src_img_path, '64M') == 0
+assert qemu_io_silent('-f', iotests.imgfmt, src_img_path,
+  '-c', 'write -P 42 0M 64M') == 0
+
+vm.launch()
+
+ret = vm.qmp('object-add', qom_type='throttle-group', id='tg',
+ props={'x-bps-read': 4096})
+assert ret['return'] == {}
+
+ret = vm.qmp('blockdev-add',
+ node_name='source',
+ driver=iotests.imgfmt,
+ file={
+ 'driver': 'file',
+ 'filename': src_img_path
+ })
+assert ret['return'] == {}
+
+ret = vm.qmp('blockdev-add',
+ node_name='throttled-source',
+ driver='throttle',
+ throttle_group='tg',
+ file='source')
+assert ret['return'] == {}
+
+ret = vm.qmp('blockdev-add',
+ node_name='target',
+ driver='null-co',
+ size=(64 * 1048576))
+assert ret['return'] == {}
+
+ret = vm.qmp('blockdev-mirror',
+ job_id='mirror',
+ device='throttled-source',
+ target='target',
+ sync='full')
+assert ret['return'] == {}
+
+log(vm.qmp('quit'))
+
+with iotests.Timeout(5, 'Timeout waiting for VM to quit'):
+vm.shutdown(has_quit=True)
diff --git a/tests/qemu-iotests/218.out b/tests/qemu-iotests/218.out
index 825a657081..5a86a97550 100644
--- a/tests/qemu-iotests/218.out
+++ b/tests/qemu-iotests/218.out
@@ -28,3 +28,7 @@ Cancelling job
 Cancelling job
 {"return": {}}
 {"data": {"device": "mirror", "len": 1048576, "offset": 1048576, "speed": 0, 
"type": "mirror"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": 
{"microseconds": "USECS", "seconds": "SECS"}}
+
+=== Cancel mirror job from throttled node by quitting ===
+
+{"return": {}}
-- 
2.21.0




[Qemu-block] [PATCH v5 03/42] throttle: Support compressed writes

2019-06-12 Thread Max Reitz
Signed-off-by: Max Reitz 
---
 block/throttle.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/block/throttle.c b/block/throttle.c
index f64dcc27b9..de1b6bd7e8 100644
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -152,6 +152,15 @@ static int coroutine_fn 
throttle_co_pdiscard(BlockDriverState *bs,
 return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
+static int coroutine_fn throttle_co_pwritev_compressed(BlockDriverState *bs,
+   uint64_t offset,
+   uint64_t bytes,
+   QEMUIOVector *qiov)
+{
+return throttle_co_pwritev(bs, offset, bytes, qiov,
+   BDRV_REQ_WRITE_COMPRESSED);
+}
+
 static int throttle_co_flush(BlockDriverState *bs)
 {
 return bdrv_co_flush(bs->file->bs);
@@ -250,6 +259,7 @@ static BlockDriver bdrv_throttle = {
 
 .bdrv_co_pwrite_zeroes  =   throttle_co_pwrite_zeroes,
 .bdrv_co_pdiscard   =   throttle_co_pdiscard,
+.bdrv_co_pwritev_compressed =   throttle_co_pwritev_compressed,
 
 .bdrv_recurse_is_first_non_filter   =   
throttle_recurse_is_first_non_filter,
 
-- 
2.21.0




[Qemu-block] [PATCH 1/2] vl: Drain before (block) job cancel when quitting

2019-06-12 Thread Max Reitz
If the main loop cancels all block jobs while the block layer is not
drained, this cancelling may not happen instantaneously.  We can start a
drained section before vm_shutdown(), which entails another
bdrv_drain_all(); this nested bdrv_drain_all() will thus be a no-op,
basically.

We do not have to end the drained section, because we actually do not
want any requests to happen from this point on.

Signed-off-by: Max Reitz 
---
I don't know whether it actually makes sense to never end this drained
section.  It makes sense to me.  Please correct me if I'm wrong.
---
 vl.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/vl.c b/vl.c
index cd1fbc4cdc..3f8b3f74f5 100644
--- a/vl.c
+++ b/vl.c
@@ -4538,6 +4538,17 @@ int main(int argc, char **argv, char **envp)
  */
 migration_shutdown();
 
+/*
+ * We must cancel all block jobs while the block layer is drained,
+ * or cancelling will be affected by throttling and thus may block
+ * for an extended period of time.
+ * vm_shutdown() will bdrv_drain_all(), so we may as well include
+ * it in the drained section.
+ * We do not need to end this section, because we do not want any
+ * requests happening from here on anyway.
+ */
+bdrv_drain_all_begin();
+
 /* No more vcpu or device emulation activity beyond this point */
 vm_shutdown();
 
-- 
2.21.0




[Qemu-block] [PATCH 0/2] vl: Drain before (block) job cancel when quitting

2019-06-12 Thread Max Reitz
Quitting qemu should lead to qemu exiting pretty much immediately.  That
means if you have a block job running on a throttled block node, the
node should ignore its throttling and the job should be cancelled
immediately.

Unfortunately, that is not what happens.  Currently, the node will be
drained (with a bdrv_drain_all()), and then again unquiesced (because
bdrv_drain_all() ends).  Then, the block job is cancelled; but at this
point, the node is no longer drained, so it will block, as it befits a
throttling node.

To fix this issue, we have to keep all nodes drained while we cancel all
block jobs when quitting qemu.  This will make the throttle node ignore
its throttling and thus let the block job cancel immediately.


Max Reitz (2):
  vl: Drain before (block) job cancel when quitting
  iotests: Test quitting with job on throttled node

 vl.c   | 11 
 tests/qemu-iotests/218 | 55 --
 tests/qemu-iotests/218.out |  4 +++
 3 files changed, 68 insertions(+), 2 deletions(-)

-- 
2.21.0




Re: [Qemu-block] [PATCH v7] ssh: switch from libssh2 to libssh

2019-06-12 Thread Philippe Mathieu-Daudé
Hi Pino,

On 6/12/19 4:48 PM, Pino Toscano wrote:
> Rewrite the implementation of the ssh block driver to use libssh instead
> of libssh2.  The libssh library has various advantages over libssh2:
> - easier API for authentication (for example for using ssh-agent)
> - easier API for known_hosts handling
> - supports newer types of keys in known_hosts
> 
> Use APIs/features available in libssh 0.8 conditionally, to support
> older versions (which are not recommended though).
> 
> Adjust the various Docker/Travis scripts to use libssh when available
> instead of libssh2.
> 
> Signed-off-by: Pino Toscano 
> ---
> 
> Changes from v6:
> - fixed few checkpatch style issues
> - detect libssh 0.8 via symbol detection
> - adjust travis/docker test material
> - remove dead "default" case in a switch
> - use variables for storing MIN() results
> - adapt a documentation bit
> 
> Changes from v5:
> - adapt to newer tracing APIs
> - disable ssh compression (mimic what libssh2 does by default)
> - use build time checks for libssh 0.8, and use newer APIs directly
> 
> Changes from v4:
> - fix wrong usages of error_setg/session_error_setg/sftp_error_setg
> - fix few return code checks
> - remove now-unused parameters in few internal functions
> - allow authentication with "none" method
> - switch to unsigned int for the port number
> - enable TCP_NODELAY on the socket
> - fix one reference error message in iotest 207
> 
> Changes from v3:
> - fix socket cleanup in connect_to_ssh()
> - add comments about the socket cleanup
> - improve the error reporting (closer to what was with libssh2)
> - improve EOF detection on sftp_read()
> 
> Changes from v2:
> - used again an own fd
> - fixed co_yield() implementation
> 
> Changes from v1:
> - fixed jumbo packets writing
> - fixed missing 'err' assignment
> - fixed commit message
> 
>  .travis.yml   |   4 +-
>  block/Makefile.objs   |   6 +-
>  block/ssh.c   | 622 +-
>  block/trace-events|  14 +-
>  configure |  65 +-
>  docs/qemu-block-drivers.texi  |   2 +-
>  .../dockerfiles/debian-win32-cross.docker |   1 -
>  .../dockerfiles/debian-win64-cross.docker |   1 -
>  tests/docker/dockerfiles/fedora.docker|   4 +-
>  tests/docker/dockerfiles/ubuntu.docker|   2 +-
>  tests/docker/dockerfiles/ubuntu1804.docker|   2 +-
>  tests/qemu-iotests/207.out|   2 +-
>  12 files changed, 374 insertions(+), 351 deletions(-)
> 
> diff --git a/.travis.yml b/.travis.yml
> index b053a836a3..a2dac8b7c9 100644
> --- a/.travis.yml
> +++ b/.travis.yml
> @@ -31,7 +31,7 @@ addons:
>- libseccomp-dev
>- libspice-protocol-dev
>- libspice-server-dev
> -  - libssh2-1-dev
> +  - libssh-dev
>- liburcu-dev
>- libusb-1.0-0-dev
>- libvte-2.91-dev
> @@ -261,7 +261,7 @@ matrix:
>  - libseccomp-dev
>  - libspice-protocol-dev
>  - libspice-server-dev
> -- libssh2-1-dev
> +- libssh-dev
>  - liburcu-dev
>  - libusb-1.0-0-dev
>  - libvte-2.91-dev
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index ae11605c9f..bf01429dd5 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -31,7 +31,7 @@ block-obj-$(CONFIG_CURL) += curl.o
>  block-obj-$(CONFIG_RBD) += rbd.o
>  block-obj-$(CONFIG_GLUSTERFS) += gluster.o
>  block-obj-$(CONFIG_VXHS) += vxhs.o
> -block-obj-$(CONFIG_LIBSSH2) += ssh.o
> +block-obj-$(CONFIG_LIBSSH) += ssh.o
>  block-obj-y += accounting.o dirty-bitmap.o
>  block-obj-y += write-threshold.o
>  block-obj-y += backup.o
> @@ -52,8 +52,8 @@ rbd.o-libs := $(RBD_LIBS)
>  gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
>  gluster.o-libs := $(GLUSTERFS_LIBS)
>  vxhs.o-libs:= $(VXHS_LIBS)
> -ssh.o-cflags   := $(LIBSSH2_CFLAGS)
> -ssh.o-libs := $(LIBSSH2_LIBS)
> +ssh.o-cflags   := $(LIBSSH_CFLAGS)
> +ssh.o-libs := $(LIBSSH_LIBS)
>  block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
>  block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
>  dmg-bz2.o-libs := $(BZIP2_LIBS)
> diff --git a/block/ssh.c b/block/ssh.c
> index 12fd4f39e8..13768fad98 100644
> --- a/block/ssh.c
> +++ b/block/ssh.c
> @@ -24,8 +24,8 @@
>  
>  #include "qemu/osdep.h"
>  
> -#include 
> -#include 
> +#include 
> +#include 
>  
>  #include "block/block_int.h"
>  #include "block/qdict.h"
> @@ -44,13 +44,11 @@
>  #include "trace.h"
>  
>  /*
> - * TRACE_LIBSSH2= enables tracing in libssh2 itself.  Note
> - * that this requires that libssh2 was specially compiled with the
> - * `./configure --enable-debug' option, so most likely you will have
> - * to compile it yourself.  The meaning of  is described
> - * here: http://www.libssh2.org/libssh2_trace.html
> + * TRACE_LIBSSH= enables tracing in libssh itself.
> + * The

Re: [Qemu-block] [QEMU] [PATCH v2 0/8] Add Qemu to SeaBIOS LCHS interface

2019-06-12 Thread Gerd Hoffmann
On Wed, Jun 12, 2019 at 04:30:03PM +0300, Sam Eiderman wrote:
> 
> 
> > On 12 Jun 2019, at 16:06, Gerd Hoffmann  wrote:
> > 
> > On Wed, Jun 12, 2019 at 02:59:31PM +0300, Sam Eiderman wrote:
> >> v1:
> >> 
> >> Non-standard logical geometries break under QEMU.
> >> 
> >> A virtual disk which contains an operating system which depends on
> >> logical geometries (consistent values being reported from BIOS INT13
> >> AH=08) will most likely break under QEMU/SeaBIOS if it has non-standard
> >> logical geometries - for example 56 SPT (sectors per track).
> >> No matter what QEMU will guess - SeaBIOS, for large enough disks - will
> >> use LBA translation, which will report 63 SPT instead.
> > 
> > --verbose please.
> > 
> > As far I know seabios switches to LBA mode when the disk is simply too
> > big for LCHS addressing.  So I fail to see which problem is solved by
> > this.  If your guest needs LCHS, why do you assign a disk which can't
> > be fully accessed using LCHS addressing?
> 
> The scenario is as follows:
> 
> A user has a disk with 56 spts.
> This disk has been already created under a bios that reported 56 spts.
> When migrating this disk to QEMU/SeaBIOS, SeaBIOS will report 63 spts
> (under LBA translation) - this will break the boot for this guest.

You sayed so already.  I was looking for a real world example.  Guests
which can't deal with LBA should be pretty rare these days.  What kind
of guest?  What other bios?  Or is this a purely theoretical issue?

> >> In addition we can not enforce SeaBIOS to rely on phyiscal geometries at
> >> all. A virtio-blk-pci virtual disk with 255 phyiscal heads can not
> >> report more than 16 physical heads when moved to an IDE controller, the
> >> ATA spec allows a maximum of 16 heads - this is an artifact of
> >> virtualization.
> > 
> > Well, not really.  Moving disks from one controller to another when the
> > OS depends on LHCS addressing never is a good idea.  That already caused
> > problems in the 90-ies, when moving scsi disks from one scsi host
> > adapter to another type, *way* before virtualization became a thing.
> 
> I agree, but this is easily solvable in virtualized environments where the
> hypervisor can guess the correct LCHS values by inspecting the MBR,

Yes.  This is exactly what the more clever scsi host adapter int13 rom
implementations ended up doing too.  Look at MBR to figure which LCHS
they should use.

> or letting the user set these values manually.

Why?  Asking the user to deal with the mess is pretty lame if there are
better options.  And IMO doing this fully automatic in seabios is
better.

> > BTW:  One possible way to figure which LCHS layout a disk uses is to
> > check the MBR partition table.  With that we (a) don't need a new
> > interface between qemu and seabios and (b) it is not needed to manually
> > specify the geometry.
> 
> In my opinion SeaBIOS is not the correct place for this change since
> “enhancing” the detection of LCHS values in SeaBIOS may cause it to
> suddenly report different values for already existing guests which rely on
> LCHS - thus, breaking compatibility.

I can't see how this can break guests.  It should either have no effect
(guests using LBA) or unbreak guests due to LCHS changing from "wrong"
to "correct".

cheers,
  Gerd




Re: [Qemu-block] [Qemu-devel] [PATCH 0/7] file-posix: Add dynamic-auto-read-only QAPI feature

2019-06-12 Thread Markus Armbruster
Markus Armbruster  writes:

> This series adds optional feature lists to struct definitions in the
> QAPI schema and makes use of them to advertise the new behaviour of
> auto-read-only=on in file-posix.

Queued, thanks!



Re: [Qemu-block] [Qemu-devel] [PATCH 6/7] file-posix: Add dynamic-auto-read-only QAPI feature

2019-06-12 Thread Markus Armbruster
Eric Blake  writes:

> On 6/6/19 10:38 AM, Markus Armbruster wrote:
>> From: Kevin Wolf 
>> 
>> In commit 23dece19da4 ('file-posix: Make auto-read-only dynamic') ,
>> auto-read-only=on changed its behaviour in file-posix for the 4.0
>> release. This change cannot be detected through the usual mechanisms
>> like schema introspection. Add a new feature flag to the schema to
>> allow libvirt to detect the presence of the new behaviour.
>> 
>> Signed-off-by: Kevin Wolf 
>> ---
>>  qapi/block-core.json | 13 -
>>  1 file changed, 12 insertions(+), 1 deletion(-)
>> 
>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>> index 1defcde048..f5e1ee91f9 100644
>> --- a/qapi/block-core.json
>> +++ b/qapi/block-core.json
>> @@ -2859,6 +2859,15 @@
>>  # file is large, do not use in production.
>>  # (default: off) (since: 3.0)
>>  #
>> +# Features:
>> +# @dynamic-auto-read-only: If present, enabled auto-read-only means that the
>> +#  driver will open the image read-only at first,
>> +#  dynamically reopen the image file read-write when
>> +#  the first writer is attached to the node and 
>> reopen
>> +#  read-only when the last writer is detached. This
>> +#  allows to give QEMU write permissions only on 
>> demand
>
> s/allows to give/allows giving/

Fixing, thanks!

>> +#  when an operation actually needs write access.
>> +#
>>  # Since: 2.9
>>  ##
>>  { 'struct': 'BlockdevOptionsFile',
>> @@ -2868,7 +2877,9 @@
>>  '*aio': 'BlockdevAioOptions',
>>  '*drop-cache': {'type': 'bool',
>>  'if': 'defined(CONFIG_LINUX)'},
>> -'*x-check-cache-dropped': 'bool' } }
>> +'*x-check-cache-dropped': 'bool' },
>> +  'features': [ { 'name': 'dynamic-auto-read-only',
>> +  'if': 'defined(CONFIG_POSIX)' } ] }
>>  
>>  ##
>>  # @BlockdevOptionsNull:
>> 



Re: [Qemu-block] [Qemu-devel] [PATCH v2 11/11] monitor: Split out monitor/monitor.c

2019-06-12 Thread Kevin Wolf
Am 12.06.2019 um 15:49 hat Markus Armbruster geschrieben:
> Kevin Wolf  writes:
> 
> > Move the monitor core infrastructure from monitor/misc.c to
> > monitor/monitor.c. This is code that can be shared for all targets, so
> > compile it only once.
> >
> > What remains in monitor/misc.c after this patch is mostly monitor
> > command implementations and code that requires a system emulator or is
> > even target-dependent.
> 
> I think target-independent command handlers should move to qmp-cmds.c
> and hmp-cmds.c.  Okay to leave for later.  Mentioning it the commit
> message wouldn't hurt, though.
> 
> Also left in misc.c, and bulky enough for mention: completion callbacks.
> They should perhaps move next to their command handlers.  Okay to leave
> for later.

Ok.

> > The amount of function and particularly extern variables in
> > monitor_int.h is probably a bit larger than it needs to be, but this way
> > no non-trivial code modifications are needed. The interfaces between all
> > monitor parts can be cleaned up later.
> >
> > Signed-off-by: Kevin Wolf 
> > Reviewed-by: Dr. David Alan Gilbert 
> > ---
> >  include/monitor/monitor.h |   1 +
> >  monitor/monitor_int.h |   1 +
> >  monitor/misc.c| 568 +--
> >  monitor/monitor.c | 605 ++
> >  MAINTAINERS   |   2 +
> >  monitor/Makefile.objs |   2 +-
> >  monitor/trace-events  |   2 +-
> >  7 files changed, 612 insertions(+), 569 deletions(-)
> >  create mode 100644 monitor/monitor.c
> >
> > diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
> > index 8547529e49..b9f8d175ed 100644
> > --- a/include/monitor/monitor.h
> > +++ b/include/monitor/monitor.h
> > @@ -20,6 +20,7 @@ typedef struct MonitorHMP MonitorHMP;
> >  bool monitor_cur_is_qmp(void);
> >  
> >  void monitor_init_globals(void);
> > +void monitor_init_globals_core(void);
> >  void monitor_init(Chardev *chr, int flags);
> >  void monitor_init_qmp(Chardev *chr, int flags);
> >  void monitor_init_hmp(Chardev *chr, int flags);
> > diff --git a/monitor/monitor_int.h b/monitor/monitor_int.h
> > index 88eaed9c5c..d5fb5162f3 100644
> > --- a/monitor/monitor_int.h
> > +++ b/monitor/monitor_int.h
> > @@ -191,6 +191,7 @@ extern mon_cmd_t mon_cmds[];
> >  int monitor_puts(Monitor *mon, const char *str);
> >  void monitor_data_init(Monitor *mon, int flags, bool skip_flush,
> > bool use_io_thread);
> > +void monitor_data_destroy(Monitor *mon);
> >  int monitor_can_read(void *opaque);
> >  void monitor_list_append(Monitor *mon);
> >  void monitor_fdsets_cleanup(void);
> > diff --git a/monitor/misc.c b/monitor/misc.c
> > index 4f1168b7c3..a4ec850493 100644
> > --- a/monitor/misc.c
> > +++ b/monitor/misc.c
> > @@ -62,7 +62,6 @@
> >  #include "qapi/qmp/json-parser.h"
> >  #include "qapi/qmp/qlist.h"
> >  #include "qom/object_interfaces.h"
> > -#include "trace.h"
> >  #include "trace/control.h"
> >  #include "monitor/hmp-target.h"
> >  #ifdef CONFIG_TRACE_SIMPLE
> > @@ -117,43 +116,13 @@ struct MonFdset {
> >  QLIST_ENTRY(MonFdset) next;
> >  };
> >  
> > -/*
> > - * To prevent flooding clients, events can be throttled. The
> > - * throttling is calculated globally, rather than per-Monitor
> > - * instance.
> > - */
> > -typedef struct MonitorQAPIEventState {
> > -QAPIEvent event;/* Throttling state for this event type and... */
> > -QDict *data;/* ... data, see qapi_event_throttle_equal() */
> > -QEMUTimer *timer;   /* Timer for handling delayed events */
> > -QDict *qdict;   /* Delayed event (if any) */
> > -} MonitorQAPIEventState;
> > -
> > -typedef struct {
> > -int64_t rate;   /* Minimum time (in ns) between two events */
> > -} MonitorQAPIEventConf;
> > -
> > -/* Shared monitor I/O thread */
> > -IOThread *mon_iothread;
> > -
> > -/* Bottom half to dispatch the requests received from I/O thread */
> > -QEMUBH *qmp_dispatcher_bh;
> > -
> >  /* QMP checker flags */
> >  #define QMP_ACCEPT_UNKNOWNS 1
> >  
> > -/* Protects mon_list, monitor_qapi_event_state, monitor_destroyed.  */
> > -QemuMutex monitor_lock;
> > -static GHashTable *monitor_qapi_event_state;
> > -MonitorList mon_list;
> > -static bool monitor_destroyed;
> > -
> >  /* Protects mon_fdsets */
> >  static QemuMutex mon_fdsets_lock;
> >  static QLIST_HEAD(, MonFdset) mon_fdsets;
> >  
> > -int mon_refcount;
> > -
> >  static mon_cmd_t info_cmds[];
> >  
> >  __thread Monitor *cur_mon;
> > @@ -161,32 +130,6 @@ __thread Monitor *cur_mon;
> >  static void monitor_command_cb(void *opaque, const char *cmdline,
> > void *readline_opaque);
> >  
> > -/**
> > - * Is @mon is using readline?
> > - * Note: not all HMP monitors use readline, e.g., gdbserver has a
> > - * non-interactive HMP monitor, so readline is not used there.
> > - */
> > -static inline bool monitor_uses_readline(const Monitor *mon)
> > -{
> > -return mon->flags & MONI

Re: [Qemu-block] [Qemu-devel] [PATCH v2 10/11] monitor: Split out monitor/hmp.c

2019-06-12 Thread Kevin Wolf
Am 12.06.2019 um 15:17 hat Markus Armbruster geschrieben:
> Kevin Wolf  writes:
> 
> > Move HMP infrastructure from monitor/misc.c to monitor/hmp.c. This is
> > code that can be shared for all targets, so compile it only once.
> >
> > The amount of function and particularly extern variables in
> > monitor_int.h is probably a bit larger than it needs to be, but this way
> > no non-trivial code modifications are needed. The interfaces between HMP
> > and the monitor core can be cleaned up later.
> >
> > Signed-off-by: Kevin Wolf 
> > Reviewed-by: Dr. David Alan Gilbert 
> > ---
> >  include/monitor/monitor.h |1 +
> >  monitor/monitor_int.h |   31 +
> >  monitor/hmp.c | 1387 +
> >  monitor/misc.c| 1338 +--
> >  monitor/Makefile.objs |2 +-
> >  monitor/trace-events  |4 +-
> >  6 files changed, 1429 insertions(+), 1334 deletions(-)
> >  create mode 100644 monitor/hmp.c
> >
> > diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
> > index 7bbab05320..8547529e49 100644
> > --- a/include/monitor/monitor.h
> > +++ b/include/monitor/monitor.h
> > @@ -22,6 +22,7 @@ bool monitor_cur_is_qmp(void);
> >  void monitor_init_globals(void);
> >  void monitor_init(Chardev *chr, int flags);
> >  void monitor_init_qmp(Chardev *chr, int flags);
> > +void monitor_init_hmp(Chardev *chr, int flags);
> >  void monitor_cleanup(void);
> >  
> >  int monitor_suspend(Monitor *mon);
> > diff --git a/monitor/monitor_int.h b/monitor/monitor_int.h
> > index 4aabee54e1..88eaed9c5c 100644
> > --- a/monitor/monitor_int.h
> > +++ b/monitor/monitor_int.h
> > @@ -27,6 +27,7 @@
> >  
> >  #include "qemu-common.h"
> >  #include "monitor/monitor.h"
> > +#include "qemu/cutils.h"
> >  
> >  #include "qapi/qmp/qdict.h"
> >  #include "qapi/qmp/json-parser.h"
> > @@ -154,6 +155,29 @@ static inline bool monitor_is_qmp(const Monitor *mon)
> >  return (mon->flags & MONITOR_USE_CONTROL);
> >  }
> >  
> > +/**
> > + * Is @name in the '|' separated list of names @list?
> > + */
> > +static inline int compare_cmd(const char *name, const char *list)
> > +{
> > +const char *p, *pstart;
> > +int len;
> > +len = strlen(name);
> > +p = list;
> > +for (;;) {
> > +pstart = p;
> > +p = qemu_strchrnul(p, '|');
> > +if ((p - pstart) == len && !memcmp(pstart, name, len)) {
> > +return 1;
> > +}
> > +if (*p == '\0') {
> > +break;
> > +}
> > +p++;
> > +}
> > +return 0;
> > +}
> > +
> 
> What's the justification for inline?

It seemed small enough, but maybe it isn't (it has also grown by two
lines after fixing the coding style). I can leave it in misc.c and just
make it public.

I'd just need a more specific name than compare_cmd() to make it public.

> >  typedef QTAILQ_HEAD(MonitorList, Monitor) MonitorList;
> >  extern IOThread *mon_iothread;
> >  extern QEMUBH *qmp_dispatcher_bh;
> > @@ -162,6 +186,8 @@ extern QemuMutex monitor_lock;
> >  extern MonitorList mon_list;
> >  extern int mon_refcount;
> >  
> > +extern mon_cmd_t mon_cmds[];
> > +
> 
> Any particular reason for not moving this one to hmp.c, along with
> info_cmds?  Question, not demand :)

Yes, it's not part of the core infrastructure, but contains commands
specific to the system emulator. If a tool were to use HMP, it would
have to provide its own command tables.

If we ever create a monitor/hmp-sysemu.c or something like it, this
would be a good place for the tables.

Kevin



Re: [Qemu-block] [Qemu-devel] [PATCH v2 09/11] monitor: Split out monitor/qmp.c

2019-06-12 Thread Kevin Wolf
Am 12.06.2019 um 15:11 hat Markus Armbruster geschrieben:
> Kevin Wolf  writes:
> 
> > Move QMP infrastructure from monitor/misc.c to monitor/qmp.c. This is
> > code that can be shared for all targets, so compile it only once.
> 
> Less code compiled per target, yay!
> 
> > The amount of function and particularly extern variables in
> > monitor_int.h is probably a bit larger than it needs to be, but this way
> > no non-trivial code modifications are needed. The interfaces between QMP
> > and the monitor core can be cleaned up later.
> 
> That's okay.
> 
> I have to admit I naively expected the previous patch moved everything
> to the new header we need in a header for splitting up monitor/misc.c.
> How did you decide what to move to the header in which patch?

The previous patch moved only the Monitor{HMP,QMP} data structures and
their dependencies as I was sure these would be shared. Everything else
was added to address linker complaints as I was going. I'll clarify this
in the commit message of the previous patch.

> > Signed-off-by: Kevin Wolf 
> > Reviewed-by: Dr. David Alan Gilbert 
> > ---
> >  include/monitor/monitor.h |   1 +
> >  monitor/monitor_int.h |  30 ++-
> >  monitor/misc.c| 394 +
> >  monitor/qmp.c | 404 ++
> >  Makefile.objs |   1 +
> >  monitor/Makefile.objs |   1 +
> >  monitor/trace-events  |   4 +-
> >  7 files changed, 448 insertions(+), 387 deletions(-)
> >  create mode 100644 monitor/qmp.c
> >
> > diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
> > index 1ba354f811..7bbab05320 100644
> > --- a/include/monitor/monitor.h
> > +++ b/include/monitor/monitor.h
> > @@ -21,6 +21,7 @@ bool monitor_cur_is_qmp(void);
> >  
> >  void monitor_init_globals(void);
> >  void monitor_init(Chardev *chr, int flags);
> > +void monitor_init_qmp(Chardev *chr, int flags);
> 
> Why does this one go to the non-internal header?

Most callers already know whether they want QMP or HMP, so they can just
directly create the right thing instead of going through the
monitor_init() wrapper.

If you prefer, I can move it to the internal header, though. It's not
called externally yet.

> >  void monitor_cleanup(void);
> >  
> >  int monitor_suspend(Monitor *mon);
> > diff --git a/monitor/monitor_int.h b/monitor/monitor_int.h
> > index 7122418955..4aabee54e1 100644
> > --- a/monitor/monitor_int.h
> > +++ b/monitor/monitor_int.h
> > @@ -30,10 +30,11 @@
> >  
> >  #include "qapi/qmp/qdict.h"
> >  #include "qapi/qmp/json-parser.h"
> > -#include "qapi/qapi-commands.h"
> > +#include "qapi/qmp/dispatch.h"
> 
> This part should be squashed into the previous patch.  You'll
> additionally need qapi/qapi-types-misc.h for QMP_CAPABILITY__MAX there,
> or keep monitor/monitor.h, even though you need it only here for
> MONITOR_USE_CONTROL.

Yes, already happened while addressing the comments you had for the
header.

> >  
> >  #include "qemu/readline.h"
> >  #include "chardev/char-fe.h"
> > +#include "sysemu/iothread.h"
> 
> Perhaps IOThread should be typedef'ed in qemu/typedefs.h.  I'm not
> asking you to do that.
> 
> >  
> >  /*
> >   * Supported types:
> > @@ -145,4 +146,31 @@ typedef struct {
> >  GQueue *qmp_requests;
> >  } MonitorQMP;
> >  
> > +/**
> > + * Is @mon a QMP monitor?
> > + */
> > +static inline bool monitor_is_qmp(const Monitor *mon)
> > +{
> > +return (mon->flags & MONITOR_USE_CONTROL);
> > +}
> > +
> > +typedef QTAILQ_HEAD(MonitorList, Monitor) MonitorList;
> > +extern IOThread *mon_iothread;
> > +extern QEMUBH *qmp_dispatcher_bh;
> > +extern QmpCommandList qmp_commands, qmp_cap_negotiation_commands;
> > +extern QemuMutex monitor_lock;
> > +extern MonitorList mon_list;
> > +extern int mon_refcount;
> > +
> > +int monitor_puts(Monitor *mon, const char *str);
> > +void monitor_data_init(Monitor *mon, int flags, bool skip_flush,
> > +   bool use_io_thread);
> > +int monitor_can_read(void *opaque);
> > +void monitor_list_append(Monitor *mon);
> > +void monitor_fdsets_cleanup(void);
> > +
> > +void qmp_send_response(MonitorQMP *mon, const QDict *rsp);
> > +void monitor_data_destroy_qmp(MonitorQMP *mon);
> > +void monitor_qmp_bh_dispatcher(void *data);
> > +
> >  #endif
> 
> I trust you these are indeed all needed.

The linker said so. :-)

> > diff --git a/monitor/qmp.c b/monitor/qmp.c
> > new file mode 100644
> > index 00..d425b0f2ba
> > --- /dev/null
> > +++ b/monitor/qmp.c
> > @@ -0,0 +1,404 @@
> > +/*
> > + * QEMU monitor
> > + *
> > + * Copyright (c) 2003-2004 Fabrice Bellard
> 
> I'm pretty confident nothing in this file is actually due to Fabrice.
> 
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining a 
> > copy
> > + * of this software and associated documentation files (the "Software"), 
> > to deal
> > + * in the Software without restriction, including without limitation the 
> > rights
> > + * to

Re: [Qemu-block] [Qemu-devel] [PATCH v2 04/11] monitor: Create MonitorHMP with readline state

2019-06-12 Thread Kevin Wolf
Am 12.06.2019 um 16:08 hat Markus Armbruster geschrieben:
> Kevin Wolf  writes:
> 
> > Am 12.06.2019 um 11:07 hat Markus Armbruster geschrieben:
> >> Cc: Peter for a monitor I/O thread question.
> >> 
> >> Kevin Wolf  writes:
> >> 
> >> > The ReadLineState in Monitor is only used for HMP monitors. Create
> >> > MonitorHMP and move it there.
> >> >
> >> > Signed-off-by: Kevin Wolf 
> >> > Reviewed-by: Dr. David Alan Gilbert 
> >
> >> > @@ -218,6 +210,17 @@ struct Monitor {
> >> >  int mux_out;
> >> >  };
> >> >  
> >> > +struct MonitorHMP {
> >> > +Monitor common;
> >> > +/*
> >> > + * State used only in the thread "owning" the monitor.
> >> > + * If @use_io_thread, this is @mon_iothread.
> >> > + * Else, it's the main thread.
> >> > + * These members can be safely accessed without locks.
> >> > + */
> >> > +ReadLineState *rs;
> >> > +};
> >> > +
> >> 
> >> Hmm.
> >> 
> >> The monitor I/O thread code makes an effort not to restrict I/O thread
> >> use to QMP, even though we only use it there.  Whether the code would
> >> actually work for HMP as well we don't know.
> >> 
> >> Readline was similar until your PATCH 02: the code made an effort not to
> >> restrict it to HMP, even though we only use it there.  Whether the code
> >> would actually work for QMP as well we don't know.
> >> 
> >> Should we stop pretending and hard-code "I/O thread only for QMP"?
> >> 
> >> If yes, the comment above gets simplified by the patch that hard-codes
> >> "I/O thread only for QMP".
> >> 
> >> If no, we should perhaps point out that we currently don't use an I/O
> >> thread with HMP.  The comment above seems like a good place for that.
> >> 
> >> Perhaps restricting readline to HMP should be a separate patch before
> >> PATCH 02.
> >
> > Yes, possibly iothreads could be restricted to QMP. It doesn't help me
> > in splitting the monitor in any way, though, so I don't see it within
> > the scope of this series.
> 
> That's okay.
> 
> Would you mind pointing out we don't actually use an I/O thread with HMP
> in the comment?

I do mind in a way (git doesn't really cope well with changing things in
the first patches of this series while the later patches move them to
different files - renaming mon_cmds resulted in a big mess and now I'm
kind of fed up with this kind of merge conflicts), but I'll do it
anyway. As long as it's only one line, it shouldn't be that hard to make
sure that it still exists at the end of the series...

> >> > @@ -748,12 +754,13 @@ char *qmp_human_monitor_command(const char 
> >> > *command_line, bool has_cpu_index,
> >> >  int64_t cpu_index, Error **errp)
> >> >  {
> >> >  char *output = NULL;
> >> > -Monitor *old_mon, hmp;
> >> > +Monitor *old_mon;
> >> > +MonitorHMP hmp = {};
> >> 
> >> Any particular reason for adding the initializer?
> >
> > Yes:
> >
> >> >  
> >> > -monitor_data_init(&hmp, 0, true, false);
> >> > +monitor_data_init(&hmp.common, 0, true, false);
> >
> > monitor_data_init() does a memset(), but only on hmp.common, so the
> > fields outside of hmp.common would remain uniniitialised. Specifically,
> > hmp.rs wouldn't be initialised to NULL and attempting to free it in the
> > end would crash.
> 
> I see.
> 
> Drop the superfluous memset() in monitor_data_init() then.

Hm, yes, all callers already initialise the memory now, so that can be
done.

Kevin



[Qemu-block] [PATCH v7] ssh: switch from libssh2 to libssh

2019-06-12 Thread Pino Toscano
Rewrite the implementation of the ssh block driver to use libssh instead
of libssh2.  The libssh library has various advantages over libssh2:
- easier API for authentication (for example for using ssh-agent)
- easier API for known_hosts handling
- supports newer types of keys in known_hosts

Use APIs/features available in libssh 0.8 conditionally, to support
older versions (which are not recommended though).

Adjust the various Docker/Travis scripts to use libssh when available
instead of libssh2.

Signed-off-by: Pino Toscano 
---

Changes from v6:
- fixed few checkpatch style issues
- detect libssh 0.8 via symbol detection
- adjust travis/docker test material
- remove dead "default" case in a switch
- use variables for storing MIN() results
- adapt a documentation bit

Changes from v5:
- adapt to newer tracing APIs
- disable ssh compression (mimic what libssh2 does by default)
- use build time checks for libssh 0.8, and use newer APIs directly

Changes from v4:
- fix wrong usages of error_setg/session_error_setg/sftp_error_setg
- fix few return code checks
- remove now-unused parameters in few internal functions
- allow authentication with "none" method
- switch to unsigned int for the port number
- enable TCP_NODELAY on the socket
- fix one reference error message in iotest 207

Changes from v3:
- fix socket cleanup in connect_to_ssh()
- add comments about the socket cleanup
- improve the error reporting (closer to what was with libssh2)
- improve EOF detection on sftp_read()

Changes from v2:
- used again an own fd
- fixed co_yield() implementation

Changes from v1:
- fixed jumbo packets writing
- fixed missing 'err' assignment
- fixed commit message

 .travis.yml   |   4 +-
 block/Makefile.objs   |   6 +-
 block/ssh.c   | 622 +-
 block/trace-events|  14 +-
 configure |  65 +-
 docs/qemu-block-drivers.texi  |   2 +-
 .../dockerfiles/debian-win32-cross.docker |   1 -
 .../dockerfiles/debian-win64-cross.docker |   1 -
 tests/docker/dockerfiles/fedora.docker|   4 +-
 tests/docker/dockerfiles/ubuntu.docker|   2 +-
 tests/docker/dockerfiles/ubuntu1804.docker|   2 +-
 tests/qemu-iotests/207.out|   2 +-
 12 files changed, 374 insertions(+), 351 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index b053a836a3..a2dac8b7c9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -31,7 +31,7 @@ addons:
   - libseccomp-dev
   - libspice-protocol-dev
   - libspice-server-dev
-  - libssh2-1-dev
+  - libssh-dev
   - liburcu-dev
   - libusb-1.0-0-dev
   - libvte-2.91-dev
@@ -261,7 +261,7 @@ matrix:
 - libseccomp-dev
 - libspice-protocol-dev
 - libspice-server-dev
-- libssh2-1-dev
+- libssh-dev
 - liburcu-dev
 - libusb-1.0-0-dev
 - libvte-2.91-dev
diff --git a/block/Makefile.objs b/block/Makefile.objs
index ae11605c9f..bf01429dd5 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -31,7 +31,7 @@ block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 block-obj-$(CONFIG_VXHS) += vxhs.o
-block-obj-$(CONFIG_LIBSSH2) += ssh.o
+block-obj-$(CONFIG_LIBSSH) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
 block-obj-y += backup.o
@@ -52,8 +52,8 @@ rbd.o-libs := $(RBD_LIBS)
 gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
 gluster.o-libs := $(GLUSTERFS_LIBS)
 vxhs.o-libs:= $(VXHS_LIBS)
-ssh.o-cflags   := $(LIBSSH2_CFLAGS)
-ssh.o-libs := $(LIBSSH2_LIBS)
+ssh.o-cflags   := $(LIBSSH_CFLAGS)
+ssh.o-libs := $(LIBSSH_LIBS)
 block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
 block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
 dmg-bz2.o-libs := $(BZIP2_LIBS)
diff --git a/block/ssh.c b/block/ssh.c
index 12fd4f39e8..13768fad98 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -24,8 +24,8 @@
 
 #include "qemu/osdep.h"
 
-#include 
-#include 
+#include 
+#include 
 
 #include "block/block_int.h"
 #include "block/qdict.h"
@@ -44,13 +44,11 @@
 #include "trace.h"
 
 /*
- * TRACE_LIBSSH2= enables tracing in libssh2 itself.  Note
- * that this requires that libssh2 was specially compiled with the
- * `./configure --enable-debug' option, so most likely you will have
- * to compile it yourself.  The meaning of  is described
- * here: http://www.libssh2.org/libssh2_trace.html
+ * TRACE_LIBSSH= enables tracing in libssh itself.
+ * The meaning of  is described here:
+ * http://api.libssh.org/master/group__libssh__log.html
  */
-#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
+#define TRACE_LIBSSH  0 /* see: SSH_LOG_* */
 
 typedef struct BDRVSSHState {
 /* Coroutine. */
@@ -58,18 +56,15 @@ typedef struct BDRVSSHState {
 
 /* SSH connectio

Re: [Qemu-block] [PATCH v5 04/12] block/io_uring: implements interfaces for io_uring

2019-06-12 Thread Stefan Hajnoczi
On Tue, Jun 11, 2019 at 07:17:14PM +0800, Fam Zheng wrote:
> On Mon, 06/10 19:18, Aarushi Mehta wrote:
> > +/* Prevent infinite loop if submission is refused */
> > +if (ret <= 0) {
> > +if (ret == -EAGAIN) {
> > +continue;
> > +}
> > +break;
> > +}
> > +s->io_q.in_flight += ret;
> > +s->io_q.in_queue  -= ret;
> > +}
> > +s->io_q.blocked = (s->io_q.in_queue > 0);
> 
> I'm confused about s->io_q.blocked. ioq_submit is where it gets updated, but
> if it becomes true, calling ioq_submit will be fenced. So how does it get
> cleared?

When blocked, additional I/O requests are not submitted until the next
completion.  See qemu_luring_process_completions_and_submit() for the
code path where ioq_submit() gets called again.

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-block] [Qemu-devel] [PATCH v2 03/11] monitor: Make MonitorQMP a child class of Monitor

2019-06-12 Thread Markus Armbruster
Kevin Wolf  writes:

> Am 12.06.2019 um 09:59 hat Markus Armbruster geschrieben:
>> Kevin Wolf  writes:
>> 
>> > Currently, struct Monitor mixes state that is only relevant for HMP,
>> > state that is only relevant for QMP, and some actually shared state.
>> > In particular, a MonitorQMP field is present in the state of any
>> > monitor, even if it's not a QMP monitor and therefore doesn't use the
>> > state.
>> >
>> > As a first step towards a clean separation between QMP and HMP, let
>> > MonitorQMP extend Monitor and create a MonitorQMP object only when the
>> > monitor is actually a QMP monitor.
>> >
>> > Signed-off-by: Kevin Wolf 
>> > Reviewed-by: Dr. David Alan Gilbert 
>> 
>> This is a bit harder to review than necessary, because it mixes the
>> largely mechanical "replace QMP member by child class" with the
>> necessary prerequisite "clean up to access QMP stuff only when the
>> monitor is actually a QMP monitor".  I'm going to post a split.
>> 
>> Effectively preexisting: we go from Monitor * to MonitorQMP * without
>> checking in several places.  I'll throw in assertions.
>
> Since I don't think doing both in one patch makes review a lot harder
> (and in fact think your patch 2.5 is harder to review for completeness
> that the combined patch)

I disagree with the parenthesis.  The completeness argument is really
simple: each occurence of member qmp is either guarded by a "is a QMP
monitor" conditional, or an "is a QMP monitor" assertion, or in a
callback that takes a QMP monitor converted to void * (I didn't bother
asserting anything there).

>  and since both Dave and you already reviewed
> the patch in its current form

Actually, I didn't review the patch "in its current form", because I
found that more bothersome than splitting it up and reviewing the parts.

By effectively squashing together the parts, you have of course every
right to claim the resulting code passed my review.  That's not quite
the same as claiming my R-by for the *patch*.

>   I don't want to invalidate that
> review, I'm going to keep it as a single patch and just squash in the
> additional assertions where container_of() is used. The resulting code
> is the same anyway.

Having the commit message explain that the patch mixes mechanical change
for the "replace QMP member by child class" data reorganization with its
prerequisite cleanup "access QMP stuff only when the monitor is actually
a QMP monitor" might suffice to make me acquiesce to the squashed patch.



Re: [Qemu-block] [Qemu-devel] [PATCH v2 00/11] monitor: Split monitor.c in core/HMP/QMP/misc

2019-06-12 Thread Markus Armbruster
Kevin Wolf  writes:

> monitor.c mixes a lot of different things in a single file: The core
> monitor infrastructure, HMP infrastrcture, QMP infrastructure, and the
> implementation of several HMP and QMP commands. Almost worse, struct
> Monitor mixes state for HMP, for QMP, and state actually shared between
> all monitors. monitor.c must be linked with a system emulator and even
> requires per-target compilation because some of the commands it
> implements access system emulator state.

Also: it's so fat it hasn't seen its feet in years.

> The reason why I care about this is that I'm working on a protoype for a
> storage daemon, which wants to use QMP (but probably not HMP) and
> obviously doesn't have any system emulator state. So I'm interested in
> some core monitor parts that can be linked to non-system-emulator tools.
>
> This series first creates separate structs MonitorQMP and MonitorHMP
> which inherit from Monitor, and then moves the associated infrastructure
> code into separate source files.
>
> While the split is probably not perfect,

It's not :)

>  I think it's an improvement of
> the current state even for QEMU proper,

It very much is!

There are a few issues to address, but nothing structural.  Looking
forward to v3.

> and it's good enough so I can
> link my storage daemon against just monitor/core.o and monitor/qmp.o and
> get a useless QMP monitor that parses the JSON input and rejects
> everything as an unknown command.
>
> Next I'll try to teach it a subset of QMP commands that can actually be
> supported in a tool, but while there will be a few follow-up patches to
> achieve this, I don't expect that this work will bring up much that
> needs to be changed in the splitting process done in this series.



Re: [Qemu-block] [Qemu-devel] [PATCH v2 04/11] monitor: Create MonitorHMP with readline state

2019-06-12 Thread Dr. David Alan Gilbert
* Markus Armbruster (arm...@redhat.com) wrote:
> Kevin Wolf  writes:
> 
> > Am 12.06.2019 um 11:07 hat Markus Armbruster geschrieben:
> >> Cc: Peter for a monitor I/O thread question.
> >> 
> >> Kevin Wolf  writes:
> >> 
> >> > The ReadLineState in Monitor is only used for HMP monitors. Create
> >> > MonitorHMP and move it there.
> >> >
> >> > Signed-off-by: Kevin Wolf 
> >> > Reviewed-by: Dr. David Alan Gilbert 
> >
> >> > @@ -218,6 +210,17 @@ struct Monitor {
> >> >  int mux_out;
> >> >  };
> >> >  
> >> > +struct MonitorHMP {
> >> > +Monitor common;
> >> > +/*
> >> > + * State used only in the thread "owning" the monitor.
> >> > + * If @use_io_thread, this is @mon_iothread.
> >> > + * Else, it's the main thread.
> >> > + * These members can be safely accessed without locks.
> >> > + */
> >> > +ReadLineState *rs;
> >> > +};
> >> > +
> >> 
> >> Hmm.
> >> 
> >> The monitor I/O thread code makes an effort not to restrict I/O thread
> >> use to QMP, even though we only use it there.  Whether the code would
> >> actually work for HMP as well we don't know.
> >> 
> >> Readline was similar until your PATCH 02: the code made an effort not to
> >> restrict it to HMP, even though we only use it there.  Whether the code
> >> would actually work for QMP as well we don't know.
> >> 
> >> Should we stop pretending and hard-code "I/O thread only for QMP"?
> >> 
> >> If yes, the comment above gets simplified by the patch that hard-codes
> >> "I/O thread only for QMP".
> >> 
> >> If no, we should perhaps point out that we currently don't use an I/O
> >> thread with HMP.  The comment above seems like a good place for that.
> >> 
> >> Perhaps restricting readline to HMP should be a separate patch before
> >> PATCH 02.
> >
> > Yes, possibly iothreads could be restricted to QMP. It doesn't help me
> > in splitting the monitor in any way, though, so I don't see it within
> > the scope of this series.
> 
> That's okay.
> 
> Would you mind pointing out we don't actually use an I/O thread with HMP
> in the comment?
> 
> > Keeping readline around for QMP, on the other hand, would probably have
> > been harder than making the restriction.
> >
> > As for splitting patch 2, I don't think that reorganising a patch that
> > already does its job and already received review is the most productive
> > thing we could do, but if you insist on a separate patch, I can do that.
> 
> No, I don't insist.
> 
> >> > @@ -748,12 +754,13 @@ char *qmp_human_monitor_command(const char 
> >> > *command_line, bool has_cpu_index,
> >> >  int64_t cpu_index, Error **errp)
> >> >  {
> >> >  char *output = NULL;
> >> > -Monitor *old_mon, hmp;
> >> > +Monitor *old_mon;
> >> > +MonitorHMP hmp = {};
> >> 
> >> Any particular reason for adding the initializer?
> >
> > Yes:
> >
> >> >  
> >> > -monitor_data_init(&hmp, 0, true, false);
> >> > +monitor_data_init(&hmp.common, 0, true, false);
> >
> > monitor_data_init() does a memset(), but only on hmp.common, so the
> > fields outside of hmp.common would remain uniniitialised. Specifically,
> > hmp.rs wouldn't be initialised to NULL and attempting to free it in the
> > end would crash.
> 
> I see.
> 
> Drop the superfluous memset() in monitor_data_init() then.
> 
> >> >  old_mon = cur_mon;
> >> > -cur_mon = &hmp;
> >> > +cur_mon = &hmp.common;
> >> >  
> >> >  if (has_cpu_index) {
> >> >  int ret = monitor_set_cpu(cpu_index);
> >
> >> > @@ -1341,16 +1348,19 @@ static void hmp_info_sync_profile(Monitor *mon, 
> >> > const QDict *qdict)
> >> >  
> >> >  static void hmp_info_history(Monitor *mon, const QDict *qdict)
> >> >  {
> >> > +MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
> >> 
> >> Unchecked conversion.  Tolerable, I think, since HMP command handlers
> >> generally don't get invoked manually, unlike QMP command handlers.
> >
> > I would like to see all HMP command handlers take MonitorHMP* instead of
> > Monitor*, but that would be a big ugly patch touching everything that
> > isn't really needed for the goal of this series, so I didn't include it.
> 
> I consider the MonitorHMP job incomplete without it.  But it's Dave's
> turf.

I'd rather see stuff move forward and then fix that later when someone
has the time.

Dave

> > If you consider it valuable to get rid of this container_of(), that's
> > probably the follow-up you could do.
> 
> My recent qemu-common.h pull request temporarily cooled my enthusiasm
> for big, ugly patches touching everything...
> 
> >> > @@ -4460,6 +4474,7 @@ static void monitor_qmp_event(void *opaque, int 
> >> > event)
> >> >  static void monitor_event(void *opaque, int event)
> >> >  {
> >> >  Monitor *mon = opaque;
> >> > +MonitorHMP *hmp_mon = container_of(cur_mon, MonitorHMP, common);
> >> 
> >> Any particular reason for changing from @opaque to @cur_mon?
> >
> > Probably a copy & paste error, thanks for catching it! I'll fix it.
> >
> >>

Re: [Qemu-block] [Qemu-devel] [PATCH v2 04/11] monitor: Create MonitorHMP with readline state

2019-06-12 Thread Markus Armbruster
Kevin Wolf  writes:

> Am 12.06.2019 um 11:07 hat Markus Armbruster geschrieben:
>> Cc: Peter for a monitor I/O thread question.
>> 
>> Kevin Wolf  writes:
>> 
>> > The ReadLineState in Monitor is only used for HMP monitors. Create
>> > MonitorHMP and move it there.
>> >
>> > Signed-off-by: Kevin Wolf 
>> > Reviewed-by: Dr. David Alan Gilbert 
>
>> > @@ -218,6 +210,17 @@ struct Monitor {
>> >  int mux_out;
>> >  };
>> >  
>> > +struct MonitorHMP {
>> > +Monitor common;
>> > +/*
>> > + * State used only in the thread "owning" the monitor.
>> > + * If @use_io_thread, this is @mon_iothread.
>> > + * Else, it's the main thread.
>> > + * These members can be safely accessed without locks.
>> > + */
>> > +ReadLineState *rs;
>> > +};
>> > +
>> 
>> Hmm.
>> 
>> The monitor I/O thread code makes an effort not to restrict I/O thread
>> use to QMP, even though we only use it there.  Whether the code would
>> actually work for HMP as well we don't know.
>> 
>> Readline was similar until your PATCH 02: the code made an effort not to
>> restrict it to HMP, even though we only use it there.  Whether the code
>> would actually work for QMP as well we don't know.
>> 
>> Should we stop pretending and hard-code "I/O thread only for QMP"?
>> 
>> If yes, the comment above gets simplified by the patch that hard-codes
>> "I/O thread only for QMP".
>> 
>> If no, we should perhaps point out that we currently don't use an I/O
>> thread with HMP.  The comment above seems like a good place for that.
>> 
>> Perhaps restricting readline to HMP should be a separate patch before
>> PATCH 02.
>
> Yes, possibly iothreads could be restricted to QMP. It doesn't help me
> in splitting the monitor in any way, though, so I don't see it within
> the scope of this series.

That's okay.

Would you mind pointing out we don't actually use an I/O thread with HMP
in the comment?

> Keeping readline around for QMP, on the other hand, would probably have
> been harder than making the restriction.
>
> As for splitting patch 2, I don't think that reorganising a patch that
> already does its job and already received review is the most productive
> thing we could do, but if you insist on a separate patch, I can do that.

No, I don't insist.

>> > @@ -748,12 +754,13 @@ char *qmp_human_monitor_command(const char 
>> > *command_line, bool has_cpu_index,
>> >  int64_t cpu_index, Error **errp)
>> >  {
>> >  char *output = NULL;
>> > -Monitor *old_mon, hmp;
>> > +Monitor *old_mon;
>> > +MonitorHMP hmp = {};
>> 
>> Any particular reason for adding the initializer?
>
> Yes:
>
>> >  
>> > -monitor_data_init(&hmp, 0, true, false);
>> > +monitor_data_init(&hmp.common, 0, true, false);
>
> monitor_data_init() does a memset(), but only on hmp.common, so the
> fields outside of hmp.common would remain uniniitialised. Specifically,
> hmp.rs wouldn't be initialised to NULL and attempting to free it in the
> end would crash.

I see.

Drop the superfluous memset() in monitor_data_init() then.

>> >  old_mon = cur_mon;
>> > -cur_mon = &hmp;
>> > +cur_mon = &hmp.common;
>> >  
>> >  if (has_cpu_index) {
>> >  int ret = monitor_set_cpu(cpu_index);
>
>> > @@ -1341,16 +1348,19 @@ static void hmp_info_sync_profile(Monitor *mon, 
>> > const QDict *qdict)
>> >  
>> >  static void hmp_info_history(Monitor *mon, const QDict *qdict)
>> >  {
>> > +MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
>> 
>> Unchecked conversion.  Tolerable, I think, since HMP command handlers
>> generally don't get invoked manually, unlike QMP command handlers.
>
> I would like to see all HMP command handlers take MonitorHMP* instead of
> Monitor*, but that would be a big ugly patch touching everything that
> isn't really needed for the goal of this series, so I didn't include it.

I consider the MonitorHMP job incomplete without it.  But it's Dave's
turf.

> If you consider it valuable to get rid of this container_of(), that's
> probably the follow-up you could do.

My recent qemu-common.h pull request temporarily cooled my enthusiasm
for big, ugly patches touching everything...

>> > @@ -4460,6 +4474,7 @@ static void monitor_qmp_event(void *opaque, int 
>> > event)
>> >  static void monitor_event(void *opaque, int event)
>> >  {
>> >  Monitor *mon = opaque;
>> > +MonitorHMP *hmp_mon = container_of(cur_mon, MonitorHMP, common);
>> 
>> Any particular reason for changing from @opaque to @cur_mon?
>
> Probably a copy & paste error, thanks for catching it! I'll fix it.
>
>> > @@ -4662,11 +4679,11 @@ static void monitor_init_qmp(Chardev *chr, int 
>> > flags)
>> >  
>> >  static void monitor_init_hmp(Chardev *chr, int flags)
>> >  {
>> > -Monitor *mon = g_malloc(sizeof(*mon));
>> > +MonitorHMP *mon = g_malloc0(sizeof(*mon));
>> 
>> Any particular reason for changing to g_malloc0()?
>> 
>> You hid the same change for monitor_init_qmp() in PATCH 03, where I
>> missed it

Re: [Qemu-block] [Qemu-devel] [PATCH v2 11/11] monitor: Split out monitor/monitor.c

2019-06-12 Thread Markus Armbruster
Kevin Wolf  writes:

> Move the monitor core infrastructure from monitor/misc.c to
> monitor/monitor.c. This is code that can be shared for all targets, so
> compile it only once.
>
> What remains in monitor/misc.c after this patch is mostly monitor
> command implementations and code that requires a system emulator or is
> even target-dependent.

I think target-independent command handlers should move to qmp-cmds.c
and hmp-cmds.c.  Okay to leave for later.  Mentioning it the commit
message wouldn't hurt, though.

Also left in misc.c, and bulky enough for mention: completion callbacks.
They should perhaps move next to their command handlers.  Okay to leave
for later.

> The amount of function and particularly extern variables in
> monitor_int.h is probably a bit larger than it needs to be, but this way
> no non-trivial code modifications are needed. The interfaces between all
> monitor parts can be cleaned up later.
>
> Signed-off-by: Kevin Wolf 
> Reviewed-by: Dr. David Alan Gilbert 
> ---
>  include/monitor/monitor.h |   1 +
>  monitor/monitor_int.h |   1 +
>  monitor/misc.c| 568 +--
>  monitor/monitor.c | 605 ++
>  MAINTAINERS   |   2 +
>  monitor/Makefile.objs |   2 +-
>  monitor/trace-events  |   2 +-
>  7 files changed, 612 insertions(+), 569 deletions(-)
>  create mode 100644 monitor/monitor.c
>
> diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
> index 8547529e49..b9f8d175ed 100644
> --- a/include/monitor/monitor.h
> +++ b/include/monitor/monitor.h
> @@ -20,6 +20,7 @@ typedef struct MonitorHMP MonitorHMP;
>  bool monitor_cur_is_qmp(void);
>  
>  void monitor_init_globals(void);
> +void monitor_init_globals_core(void);
>  void monitor_init(Chardev *chr, int flags);
>  void monitor_init_qmp(Chardev *chr, int flags);
>  void monitor_init_hmp(Chardev *chr, int flags);
> diff --git a/monitor/monitor_int.h b/monitor/monitor_int.h
> index 88eaed9c5c..d5fb5162f3 100644
> --- a/monitor/monitor_int.h
> +++ b/monitor/monitor_int.h
> @@ -191,6 +191,7 @@ extern mon_cmd_t mon_cmds[];
>  int monitor_puts(Monitor *mon, const char *str);
>  void monitor_data_init(Monitor *mon, int flags, bool skip_flush,
> bool use_io_thread);
> +void monitor_data_destroy(Monitor *mon);
>  int monitor_can_read(void *opaque);
>  void monitor_list_append(Monitor *mon);
>  void monitor_fdsets_cleanup(void);
> diff --git a/monitor/misc.c b/monitor/misc.c
> index 4f1168b7c3..a4ec850493 100644
> --- a/monitor/misc.c
> +++ b/monitor/misc.c
> @@ -62,7 +62,6 @@
>  #include "qapi/qmp/json-parser.h"
>  #include "qapi/qmp/qlist.h"
>  #include "qom/object_interfaces.h"
> -#include "trace.h"
>  #include "trace/control.h"
>  #include "monitor/hmp-target.h"
>  #ifdef CONFIG_TRACE_SIMPLE
> @@ -117,43 +116,13 @@ struct MonFdset {
>  QLIST_ENTRY(MonFdset) next;
>  };
>  
> -/*
> - * To prevent flooding clients, events can be throttled. The
> - * throttling is calculated globally, rather than per-Monitor
> - * instance.
> - */
> -typedef struct MonitorQAPIEventState {
> -QAPIEvent event;/* Throttling state for this event type and... */
> -QDict *data;/* ... data, see qapi_event_throttle_equal() */
> -QEMUTimer *timer;   /* Timer for handling delayed events */
> -QDict *qdict;   /* Delayed event (if any) */
> -} MonitorQAPIEventState;
> -
> -typedef struct {
> -int64_t rate;   /* Minimum time (in ns) between two events */
> -} MonitorQAPIEventConf;
> -
> -/* Shared monitor I/O thread */
> -IOThread *mon_iothread;
> -
> -/* Bottom half to dispatch the requests received from I/O thread */
> -QEMUBH *qmp_dispatcher_bh;
> -
>  /* QMP checker flags */
>  #define QMP_ACCEPT_UNKNOWNS 1
>  
> -/* Protects mon_list, monitor_qapi_event_state, monitor_destroyed.  */
> -QemuMutex monitor_lock;
> -static GHashTable *monitor_qapi_event_state;
> -MonitorList mon_list;
> -static bool monitor_destroyed;
> -
>  /* Protects mon_fdsets */
>  static QemuMutex mon_fdsets_lock;
>  static QLIST_HEAD(, MonFdset) mon_fdsets;
>  
> -int mon_refcount;
> -
>  static mon_cmd_t info_cmds[];
>  
>  __thread Monitor *cur_mon;
> @@ -161,32 +130,6 @@ __thread Monitor *cur_mon;
>  static void monitor_command_cb(void *opaque, const char *cmdline,
> void *readline_opaque);
>  
> -/**
> - * Is @mon is using readline?
> - * Note: not all HMP monitors use readline, e.g., gdbserver has a
> - * non-interactive HMP monitor, so readline is not used there.
> - */
> -static inline bool monitor_uses_readline(const Monitor *mon)
> -{
> -return mon->flags & MONITOR_USE_READLINE;
> -}
> -
> -static inline bool monitor_is_hmp_non_interactive(const Monitor *mon)
> -{
> -return !monitor_is_qmp(mon) && !monitor_uses_readline(mon);
> -}
> -
> -/*
> - * Return the clock to use for recording an event's time.
> - * It's QEMU_CLOCK_REALTIME, except for qtests it's
> - * QEMU_CLO

Re: [Qemu-block] [Qemu-devel] [PATCH v2 05/11] monitor: Move cmd_table to MonitorHMP

2019-06-12 Thread Kevin Wolf
Am 12.06.2019 um 13:45 hat Markus Armbruster geschrieben:
> Kevin Wolf  writes:
> 
> > Monitor.cmd_table contains the handlers for HMP commands, so there is no
> > reason to keep it in the state shared with QMP. Move it to MonitorHMP.
> >
> > Signed-off-by: Kevin Wolf 
> > Reviewed-by: Dr. David Alan Gilbert 
> > ---
> >  monitor.c | 23 +++
> >  1 file changed, 15 insertions(+), 8 deletions(-)
> >
> > diff --git a/monitor.c b/monitor.c
> > index f8730e4462..56af8ed448 100644
> > --- a/monitor.c
> > +++ b/monitor.c
> > @@ -191,7 +191,6 @@ struct Monitor {
> >  bool use_io_thread;
> >  
> >  gchar *mon_cpu_path;
> > -mon_cmd_t *cmd_table;
> >  QTAILQ_ENTRY(Monitor) entry;
> >  
> >  /*
> > @@ -219,6 +218,7 @@ struct MonitorHMP {
> >   * These members can be safely accessed without locks.
> >   */
> >  ReadLineState *rs;
> > +mon_cmd_t *cmd_table;
> >  };
> >  
> >  typedef struct {
> > @@ -720,13 +720,19 @@ static void monitor_data_init(Monitor *mon, int 
> > flags, bool skip_flush,
> >  memset(mon, 0, sizeof(Monitor));
> >  qemu_mutex_init(&mon->mon_lock);
> >  mon->outbuf = qstring_new();
> > -/* Use *mon_cmds by default. */
> > -mon->cmd_table = mon_cmds;
> 
> As far as I can tell, this is the only assignment to Monitor member
> cmd_table.  Why not delete it outright, and use mon_cmds directly?
> Preferably renamed to something like hmp_cmds.

Good idea, I'll do that.

Kevin



Re: [Qemu-block] [SeaBIOS] [QEMU] [PATCH v2 0/8] Add Qemu to SeaBIOS LCHS interface

2019-06-12 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/20190612115939.23825-1-shmuel.eider...@oracle.com/



Hi,

This series failed the asan build test. Please find the testing commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#!/bin/bash
time make docker-test-debug@fedora TARGET_LIST=x86_64-softmmu J=14 NETWORK=1
=== TEST SCRIPT END ===

PASS 5 check-qjson /literals/number/simple
PASS 6 check-qjson /literals/number/large
PASS 7 check-qjson /literals/number/float
==9311==WARNING: ASan doesn't fully support makecontext/swapcontext functions 
and may produce false positives in some cases!
PASS 8 check-qjson /literals/interpolation/valid
PASS 9 check-qjson /literals/interpolation/unkown
PASS 10 check-qjson /literals/interpolation/string
---
PASS 32 test-opts-visitor /visitor/opts/range/beyond
PASS 33 test-opts-visitor /visitor/opts/dict/unvisited
MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}  
tests/test-coroutine -m=quick -k --tap < /dev/null | ./scripts/tap-driver.pl 
--test-name="test-coroutine" 
==9369==WARNING: ASan doesn't fully support makecontext/swapcontext functions 
and may produce false positives in some cases!
==9369==WARNING: ASan is ignoring requested __asan_handle_no_return: stack top: 
0x7ffc405a1000; bottom 0x7efed01f8000; size: 0x00fd703a9000 (1088509612032)
False positive error reports may follow
For details see https://github.com/google/sanitizers/issues/189
PASS 1 test-coroutine /basic/no-dangling-access
---
PASS 11 test-aio /aio/event/wait
PASS 12 test-aio /aio/event/flush
PASS 13 test-aio /aio/event/wait/no-flush-cb
==9384==WARNING: ASan doesn't fully support makecontext/swapcontext functions 
and may produce false positives in some cases!
PASS 14 test-aio /aio/timer/schedule
PASS 15 test-aio /aio/coroutine/queue-chaining
PASS 16 test-aio /aio-gsource/flush
---
PASS 13 fdc-test /x86_64/fdc/fuzz-registers
MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}  
QTEST_QEMU_BINARY=x86_64-softmmu/qemu-system-x86_64 QTEST_QEMU_IMG=qemu-img 
tests/ide-test -m=quick -k --tap < /dev/null | ./scripts/tap-driver.pl 
--test-name="ide-test" 
PASS 28 test-aio /aio-gsource/timer/schedule
==9393==WARNING: ASan doesn't fully support makecontext/swapcontext functions 
and may produce false positives in some cases!
MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}  
tests/test-aio-multithread -m=quick -k --tap < /dev/null | 
./scripts/tap-driver.pl --test-name="test-aio-multithread" 
==9400==WARNING: ASan doesn't fully support makecontext/swapcontext functions 
and may produce false positives in some cases!
PASS 1 test-aio-multithread /aio/multi/lifecycle
PASS 1 ide-test /x86_64/ide/identify
==9414==WARNING: ASan doesn't fully support makecontext/swapcontext functions 
and may produce false positives in some cases!
PASS 2 test-aio-multithread /aio/multi/schedule
PASS 2 ide-test /x86_64/ide/flush
==9425==WARNING: ASan doesn't fully support makecontext/swapcontext functions 
and may produce false positives in some cases!
PASS 3 test-aio-multithread /aio/multi/mutex/contended
PASS 3 ide-test /x86_64/ide/bmdma/simple_rw
==9436==WARNING: ASan doesn't fully support makecontext/swapcontext functions 
and may produce false positives in some cases!
PASS 4 ide-test /x86_64/ide/bmdma/trim
==9442==WARNING: ASan doesn't fully support makecontext/swapcontext functions 
and may produce false positives in some cases!
PASS 5 ide-test /x86_64/ide/bmdma/short_prdt
==9448==WARNING: ASan doesn't fully support makecontext/swapcontext functions 
and may produce false positives in some cases!
PASS 4 test-aio-multithread /aio/multi/mutex/handoff
PASS 6 ide-test /x86_64/ide/bmdma/one_sector_short_prdt
==9459==WARNING: ASan doesn't fully support makecontext/swapcontext functions 
and may produce false positives in some cases!
PASS 5 test-aio-multithread /aio/multi/mutex/mcs
PASS 7 ide-test /x86_64/ide/bmdma/long_prdt
==9470==WARNING: ASan doesn't fully support makecontext/swapcontext functions 
and may produce false positives in some cases!
==9470==WARNING: ASan is ignoring requested __asan_handle_no_return: stack top: 
0x7ffef4f62000; bottom 0x7fda7dbfe000; size: 0x002477364000 (156618866688)
False positive error reports may follow
For details see https://github.com/google/sanitizers/issues/189
PASS 6 test-aio-multithread /aio/multi/mutex/pthread
MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(( ${RANDOM:-0} % 255 + 1))}  
tests/test-throttle -m=quick -k --tap < /dev/null | ./scripts/tap-driver.pl 
--test-name="test-throttle" 
PASS 8 ide-test /x86_64/ide/bmdma/no_busmaster
==9478==WARNING: ASan doesn't fully support makecontext/swapcontext functions 
and may produce false positives in some cases!
PASS 1 test-throttle /throttle/leak_bucket
PASS 2 test-throttle /throttle/compute_wait
PASS 3 test-throttle /throttle/init
---
PASS 14 test-throttle /throttle/config/max
PASS 15 test-throttle /throttle/config/iops_size
MALLOC_PERTURB_=${MALL

Re: [Qemu-block] [QEMU] [PATCH v2 0/8] Add Qemu to SeaBIOS LCHS interface

2019-06-12 Thread Sam Eiderman



> On 12 Jun 2019, at 16:06, Gerd Hoffmann  wrote:
> 
> On Wed, Jun 12, 2019 at 02:59:31PM +0300, Sam Eiderman wrote:
>> v1:
>> 
>> Non-standard logical geometries break under QEMU.
>> 
>> A virtual disk which contains an operating system which depends on
>> logical geometries (consistent values being reported from BIOS INT13
>> AH=08) will most likely break under QEMU/SeaBIOS if it has non-standard
>> logical geometries - for example 56 SPT (sectors per track).
>> No matter what QEMU will guess - SeaBIOS, for large enough disks - will
>> use LBA translation, which will report 63 SPT instead.
> 
> --verbose please.
> 
> As far I know seabios switches to LBA mode when the disk is simply too
> big for LCHS addressing.  So I fail to see which problem is solved by
> this.  If your guest needs LCHS, why do you assign a disk which can't
> be fully accessed using LCHS addressing?

The scenario is as follows:

A user has a disk with 56 spts.
This disk has been already created under a bios that reported 56 spts.
When migrating this disk to QEMU/SeaBIOS, SeaBIOS will report 63 spts
(under LBA translation) - this will break the boot for this guest.

> 
>> In addition we can not enforce SeaBIOS to rely on phyiscal geometries at
>> all. A virtio-blk-pci virtual disk with 255 phyiscal heads can not
>> report more than 16 physical heads when moved to an IDE controller, the
>> ATA spec allows a maximum of 16 heads - this is an artifact of
>> virtualization.
> 
> Well, not really.  Moving disks from one controller to another when the
> OS depends on LHCS addressing never is a good idea.  That already caused
> problems in the 90-ies, when moving scsi disks from one scsi host
> adapter to another type, *way* before virtualization became a thing.

I agree, but this is easily solvable in virtualized environments where the
hypervisor can guess the correct LCHS values by inspecting the MBR,
or letting the user set these values manually.

> 
> BTW:  One possible way to figure which LCHS layout a disk uses is to
> check the MBR partition table.  With that we (a) don't need a new
> interface between qemu and seabios and (b) it is not needed to manually
> specify the geometry.

In my opinion SeaBIOS is not the correct place for this change since
“enhancing” the detection of LCHS values in SeaBIOS may cause it to
suddenly report different values for already existing guests which rely on
LCHS - thus, breaking compatibility.
Much like smbios, acpi and mptables - I believe that the correct place to
use MBR guessing is QEMU (which already has one, with some issues)
and pass the guess using fw_cfg - this will allow using the compat system
in qemu itself.

Sam

> 
> cheers,
>  Gerd
> 




Re: [Qemu-block] [PATCH v6] ssh: switch from libssh2 to libssh

2019-06-12 Thread Philippe Mathieu-Daudé
Cc'ing Alex (Docker, Travis) and Stefan (MinGW)

On 6/5/19 11:36 PM, Pino Toscano wrote:
> Rewrite the implementation of the ssh block driver to use libssh instead
> of libssh2.  The libssh library has various advantages over libssh2:
> - easier API for authentication (for example for using ssh-agent)
> - easier API for known_hosts handling
> - supports newer types of keys in known_hosts
> 
> Use APIs/features available in libssh 0.8 conditionally, to support
> older versions (which are not recommended though).
> 
> Signed-off-by: Pino Toscano 
> ---
> 
> Changes from v5:
> - adapt to newer tracing APIs
> - disable ssh compression (mimic what libssh2 does by default)
> - use build time checks for libssh 0.8, and use newer APIs directly
> 
> Changes from v4:
> - fix wrong usages of error_setg/session_error_setg/sftp_error_setg
> - fix few return code checks
> - remove now-unused parameters in few internal functions
> - allow authentication with "none" method
> - switch to unsigned int for the port number
> - enable TCP_NODELAY on the socket
> - fix one reference error message in iotest 207
> 
> Changes from v3:
> - fix socket cleanup in connect_to_ssh()
> - add comments about the socket cleanup
> - improve the error reporting (closer to what was with libssh2)
> - improve EOF detection on sftp_read()
> 
> Changes from v2:
> - used again an own fd
> - fixed co_yield() implementation
> 
> Changes from v1:
> - fixed jumbo packets writing
> - fixed missing 'err' assignment
> - fixed commit message
> 
>  block/Makefile.objs|   6 +-
>  block/ssh.c| 610 +++--
>  block/trace-events |  14 +-
>  configure  |  62 ++--
>  tests/qemu-iotests/207.out |   2 +-
>  5 files changed, 351 insertions(+), 343 deletions(-)
> 
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index ae11605c9f..bf01429dd5 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -31,7 +31,7 @@ block-obj-$(CONFIG_CURL) += curl.o
>  block-obj-$(CONFIG_RBD) += rbd.o
>  block-obj-$(CONFIG_GLUSTERFS) += gluster.o
>  block-obj-$(CONFIG_VXHS) += vxhs.o
> -block-obj-$(CONFIG_LIBSSH2) += ssh.o
> +block-obj-$(CONFIG_LIBSSH) += ssh.o
>  block-obj-y += accounting.o dirty-bitmap.o
>  block-obj-y += write-threshold.o
>  block-obj-y += backup.o
> @@ -52,8 +52,8 @@ rbd.o-libs := $(RBD_LIBS)
>  gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
>  gluster.o-libs := $(GLUSTERFS_LIBS)
>  vxhs.o-libs:= $(VXHS_LIBS)
> -ssh.o-cflags   := $(LIBSSH2_CFLAGS)
> -ssh.o-libs := $(LIBSSH2_LIBS)
> +ssh.o-cflags   := $(LIBSSH_CFLAGS)
> +ssh.o-libs := $(LIBSSH_LIBS)
>  block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
>  block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
>  dmg-bz2.o-libs := $(BZIP2_LIBS)
> diff --git a/block/ssh.c b/block/ssh.c
> index 12fd4f39e8..ce2363a471 100644
> --- a/block/ssh.c
> +++ b/block/ssh.c
> @@ -24,8 +24,8 @@
>  
>  #include "qemu/osdep.h"
>  
> -#include 
> -#include 
> +#include 
> +#include 
>  
>  #include "block/block_int.h"
>  #include "block/qdict.h"
> @@ -43,14 +43,13 @@
>  #include "qapi/qobject-output-visitor.h"
>  #include "trace.h"
>  
> -/*
> - * TRACE_LIBSSH2= enables tracing in libssh2 itself.  Note
> - * that this requires that libssh2 was specially compiled with the
> - * `./configure --enable-debug' option, so most likely you will have
> - * to compile it yourself.  The meaning of  is described
> - * here: http://www.libssh2.org/libssh2_trace.html
> +/* TRACE_LIBSSH= enables tracing in libssh itself.
> + * The meaning of  is described here:
> + * http://api.libssh.org/master/group__libssh__log.html
>   */
> -#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
> +#define TRACE_LIBSSH  0 /* see: SSH_LOG_* */
> +
> +#define HAVE_LIBSSH_0_8 (LIBSSH_VERSION_INT >= SSH_VERSION_INT(0, 8, 0))

As I noticed with ssh_get_publickey() and reading
https://www.redhat.com/archives/libvir-list/2018-May/msg00597.html, I'm
not convinced this definition is accurate. Used in [1].

>  
>  typedef struct BDRVSSHState {
>  /* Coroutine. */
> @@ -58,18 +57,14 @@ typedef struct BDRVSSHState {
>  
>  /* SSH connection. */
>  int sock; /* socket */
> -LIBSSH2_SESSION *session; /* ssh session */
> -LIBSSH2_SFTP *sftp;   /* sftp session */
> -LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */
> +ssh_session session;  /* ssh session */
> +sftp_session sftp;/* sftp session */
> +sftp_file sftp_handle;/* sftp remote file handle */
>  
> -/* See ssh_seek() function below. */
> -int64_t offset;
> -bool offset_op_read;
> -
> -/* File attributes at open.  We try to keep the .filesize field
> +/* File attributes at open.  We try to keep the .size field
>   * updated if it changes (eg by writing at the end of the file).
>   */
> -LIBSSH2_SFTP_ATTRIBUTES attrs;

Re: [Qemu-block] [Qemu-devel] [PATCH v2 10/11] monitor: Split out monitor/hmp.c

2019-06-12 Thread Markus Armbruster
Kevin Wolf  writes:

> Move HMP infrastructure from monitor/misc.c to monitor/hmp.c. This is
> code that can be shared for all targets, so compile it only once.
>
> The amount of function and particularly extern variables in
> monitor_int.h is probably a bit larger than it needs to be, but this way
> no non-trivial code modifications are needed. The interfaces between HMP
> and the monitor core can be cleaned up later.
>
> Signed-off-by: Kevin Wolf 
> Reviewed-by: Dr. David Alan Gilbert 
> ---
>  include/monitor/monitor.h |1 +
>  monitor/monitor_int.h |   31 +
>  monitor/hmp.c | 1387 +
>  monitor/misc.c| 1338 +--
>  monitor/Makefile.objs |2 +-
>  monitor/trace-events  |4 +-
>  6 files changed, 1429 insertions(+), 1334 deletions(-)
>  create mode 100644 monitor/hmp.c
>
> diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
> index 7bbab05320..8547529e49 100644
> --- a/include/monitor/monitor.h
> +++ b/include/monitor/monitor.h
> @@ -22,6 +22,7 @@ bool monitor_cur_is_qmp(void);
>  void monitor_init_globals(void);
>  void monitor_init(Chardev *chr, int flags);
>  void monitor_init_qmp(Chardev *chr, int flags);
> +void monitor_init_hmp(Chardev *chr, int flags);
>  void monitor_cleanup(void);
>  
>  int monitor_suspend(Monitor *mon);
> diff --git a/monitor/monitor_int.h b/monitor/monitor_int.h
> index 4aabee54e1..88eaed9c5c 100644
> --- a/monitor/monitor_int.h
> +++ b/monitor/monitor_int.h
> @@ -27,6 +27,7 @@
>  
>  #include "qemu-common.h"
>  #include "monitor/monitor.h"
> +#include "qemu/cutils.h"
>  
>  #include "qapi/qmp/qdict.h"
>  #include "qapi/qmp/json-parser.h"
> @@ -154,6 +155,29 @@ static inline bool monitor_is_qmp(const Monitor *mon)
>  return (mon->flags & MONITOR_USE_CONTROL);
>  }
>  
> +/**
> + * Is @name in the '|' separated list of names @list?
> + */
> +static inline int compare_cmd(const char *name, const char *list)
> +{
> +const char *p, *pstart;
> +int len;
> +len = strlen(name);
> +p = list;
> +for (;;) {
> +pstart = p;
> +p = qemu_strchrnul(p, '|');
> +if ((p - pstart) == len && !memcmp(pstart, name, len)) {
> +return 1;
> +}
> +if (*p == '\0') {
> +break;
> +}
> +p++;
> +}
> +return 0;
> +}
> +

What's the justification for inline?

>  typedef QTAILQ_HEAD(MonitorList, Monitor) MonitorList;
>  extern IOThread *mon_iothread;
>  extern QEMUBH *qmp_dispatcher_bh;
> @@ -162,6 +186,8 @@ extern QemuMutex monitor_lock;
>  extern MonitorList mon_list;
>  extern int mon_refcount;
>  
> +extern mon_cmd_t mon_cmds[];
> +

Any particular reason for not moving this one to hmp.c, along with
info_cmds?  Question, not demand :)

>  int monitor_puts(Monitor *mon, const char *str);
>  void monitor_data_init(Monitor *mon, int flags, bool skip_flush,
> bool use_io_thread);
> @@ -173,4 +199,9 @@ void qmp_send_response(MonitorQMP *mon, const QDict *rsp);
>  void monitor_data_destroy_qmp(MonitorQMP *mon);
>  void monitor_qmp_bh_dispatcher(void *data);
>  
> +void monitor_data_init_hmp(MonitorHMP *mon, int flags, bool skip_flush);
> +int get_monitor_def(int64_t *pval, const char *name);
> +void help_cmd(Monitor *mon, const char *name);
> +void handle_hmp_command(MonitorHMP *mon, const char *cmdline);
> +
>  #endif
[...]



Re: [Qemu-block] [QEMU] [PATCH v2 0/8] Add Qemu to SeaBIOS LCHS interface

2019-06-12 Thread Gerd Hoffmann
On Wed, Jun 12, 2019 at 02:59:31PM +0300, Sam Eiderman wrote:
> v1:
> 
> Non-standard logical geometries break under QEMU.
> 
> A virtual disk which contains an operating system which depends on
> logical geometries (consistent values being reported from BIOS INT13
> AH=08) will most likely break under QEMU/SeaBIOS if it has non-standard
> logical geometries - for example 56 SPT (sectors per track).
> No matter what QEMU will guess - SeaBIOS, for large enough disks - will
> use LBA translation, which will report 63 SPT instead.

--verbose please.

As far I know seabios switches to LBA mode when the disk is simply too
big for LCHS addressing.  So I fail to see which problem is solved by
this.  If your guest needs LCHS, why do you assign a disk which can't
be fully accessed using LCHS addressing?

> In addition we can not enforce SeaBIOS to rely on phyiscal geometries at
> all. A virtio-blk-pci virtual disk with 255 phyiscal heads can not
> report more than 16 physical heads when moved to an IDE controller, the
> ATA spec allows a maximum of 16 heads - this is an artifact of
> virtualization.

Well, not really.  Moving disks from one controller to another when the
OS depends on LHCS addressing never is a good idea.  That already caused
problems in the 90-ies, when moving scsi disks from one scsi host
adapter to another type, *way* before virtualization became a thing.

BTW:  One possible way to figure which LCHS layout a disk uses is to
check the MBR partition table.  With that we (a) don't need a new
interface between qemu and seabios and (b) it is not needed to manually
specify the geometry.

cheers,
  Gerd




Re: [Qemu-block] [Qemu-devel] [QEMU] [PATCH 7/8] bootdevice: FW_CFG interface for LCHS values

2019-06-12 Thread Sam Eiderman


> On 12 Jun 2019, at 15:27, Laszlo Ersek  wrote:
> 
> On 06/12/19 11:42, Sam Eiderman wrote:
>> Using fw_cfg, supply logical CHS values directly from QEMU to the BIOS.
>> 
>> Non-standard logical geometries break under QEMU.
>> 
>> A virtual disk which contains an operating system which depends on
>> logical geometries (consistent values being reported from BIOS INT13
>> AH=08) will most likely break under QEMU/SeaBIOS if it has non-standard
>> logical geometries - for example 56 SPT (sectors per track).
>> No matter what QEMU will report - SeaBIOS, for large enough disks - will
>> use LBA translation, which will report 63 SPT instead.
>> 
>> In addition we cannot force SeaBIOS to rely on physical geometries at
>> all. A virtio-blk-pci virtual disk with 255 phyiscal heads cannot
>> report more than 16 physical heads when moved to an IDE controller,
>> since the ATA spec allows a maximum of 16 heads - this is an artifact of
>> virtualization.
>> 
>> By supplying the logical geometries directly we are able to support such
>> "exotic" disks.
>> 
>> We serialize this information in a similar way to the "bootorder"
>> interface.
>> The fw_cfg entry is "bootdevices" and it serializes a struct.
>> At the moment the struct holds the values of logical CHS values but it
>> can be expanded easily due to the extendable ABI implemented.
>> 
>> (In the future, we can pass the bootindex through "bootdevices" instead
>> "bootorder" - unifying all bootdevice information in one fw_cfg value)
> 
> I would disagree with that. UEFI guest firmware doesn't seem to have any
> use for this new type of information ("logical CHS values"), so the
> current interface (the "bootorder" fw_cfg file) should continue to work.
> The ArmVirtQemu and OVMF platform firmwares (built from the edk2
> project, and bundled with QEMU 4.1+) implement some serious parsing and
> processing for "bootorder”.

I agree, I didn’t mean to say that “bootdevices" will replace “bootorder”,
they will have to reside side by side.
I just meant to emphasis that bootorder is not extendible - adding more disk
specific fields other than bootorder (that for some platforms will be unused)
is not possible. “bootdevices” will work for LCHS, if another entry has to be
passed - it can be added to “bootdevice”.
Migrating “bootorder” into a different fw_cfg value is a tedious, probably not
worth it, effort.

> 
> Independently, another comment:
> 
>> The PV interface through fw_cfg could have also been implemented using
>> device specific keys, e.g.: "/etc/bootdevice/%s/logical_geometry" where
>> %s is the device name QEMU produces - but this implementation would
>> require much more code refactoring, both in QEMU and SeaBIOS, so the
>> current implementation was chosen.
>> 
>> Reviewed-by: Karl Heubaum 
>> Reviewed-by: Arbel Moshe 
>> Signed-off-by: Sam Eiderman 
>> ---
>> bootdevice.c| 42 ++
>> hw/nvram/fw_cfg.c   | 14 +++---
>> include/sysemu/sysemu.h |  1 +
>> 3 files changed, 54 insertions(+), 3 deletions(-)
>> 
>> diff --git a/bootdevice.c b/bootdevice.c
>> index 2b12fb85a4..84c2a83f25 100644
>> --- a/bootdevice.c
>> +++ b/bootdevice.c
>> @@ -405,3 +405,45 @@ void del_boot_device_lchs(DeviceState *dev, const char 
>> *suffix)
>> }
>> }
>> }
>> +
>> +typedef struct QEMU_PACKED BootDeviceEntrySerialized {
>> +/* Do not change field order - add new fields below */
>> +uint32_t lcyls;
>> +uint32_t lheads;
>> +uint32_t lsecs;
>> +} BootDeviceEntrySerialized;
>> +
>> +/* Serialized as: struct size (4) + (device name\0 + device struct) x 
>> devices */
>> +char *get_boot_devices_info(size_t *size)
>> +{
>> +FWLCHSEntry *i;
>> +BootDeviceEntrySerialized s;
>> +size_t total = 0;
>> +char *list = NULL;
>> +
>> +list = g_malloc0(sizeof(uint32_t));
>> +*((uint32_t *)list) = (uint32_t)sizeof(s);
>> +total = sizeof(uint32_t);
>> +
>> +QTAILQ_FOREACH(i, &fw_lchs, link) {
>> +char *bootpath;
>> +size_t len;
>> +
>> +bootpath = get_boot_device_path(i->dev, false, i->suffix);
>> +s.lcyls = i->lcyls;
>> +s.lheads = i->lheads;
>> +s.lsecs = i->lsecs;
> 
> You should document the endianness of the fields in
> BootDeviceEntrySerialized, and then call byte order conversion functions
> here accordingly (most probably cpu_to_le32()).
> 
> As written, this code would break if you ran qemu-system-x86_64 /
> qemu-system-i386 (with TCG acceleration) on a big endian host.

Nice catch, thanks!

> 
> Thanks
> Laszlo
> 
>> +
>> +len = strlen(bootpath) + 1;
>> +list = g_realloc(list, total + len + sizeof(s));
>> +memcpy(&list[total], bootpath, len);
>> +memcpy(&list[total + len], &s, sizeof(s));
>> +total += len + sizeof(s);
>> +
>> +g_free(bootpath);
>> +}
>> +
>> +*size = total;
>> +
>> +return list;
>> +}
>> diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
>> index 9f7

Re: [Qemu-block] [Qemu-devel] [PATCH v2 09/11] monitor: Split out monitor/qmp.c

2019-06-12 Thread Markus Armbruster
Kevin Wolf  writes:

> Move QMP infrastructure from monitor/misc.c to monitor/qmp.c. This is
> code that can be shared for all targets, so compile it only once.

Less code compiled per target, yay!

> The amount of function and particularly extern variables in
> monitor_int.h is probably a bit larger than it needs to be, but this way
> no non-trivial code modifications are needed. The interfaces between QMP
> and the monitor core can be cleaned up later.

That's okay.

I have to admit I naively expected the previous patch moved everything
to the new header we need in a header for splitting up monitor/misc.c.
How did you decide what to move to the header in which patch?

> Signed-off-by: Kevin Wolf 
> Reviewed-by: Dr. David Alan Gilbert 
> ---
>  include/monitor/monitor.h |   1 +
>  monitor/monitor_int.h |  30 ++-
>  monitor/misc.c| 394 +
>  monitor/qmp.c | 404 ++
>  Makefile.objs |   1 +
>  monitor/Makefile.objs |   1 +
>  monitor/trace-events  |   4 +-
>  7 files changed, 448 insertions(+), 387 deletions(-)
>  create mode 100644 monitor/qmp.c
>
> diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
> index 1ba354f811..7bbab05320 100644
> --- a/include/monitor/monitor.h
> +++ b/include/monitor/monitor.h
> @@ -21,6 +21,7 @@ bool monitor_cur_is_qmp(void);
>  
>  void monitor_init_globals(void);
>  void monitor_init(Chardev *chr, int flags);
> +void monitor_init_qmp(Chardev *chr, int flags);

Why does this one go to the non-internal header?

>  void monitor_cleanup(void);
>  
>  int monitor_suspend(Monitor *mon);
> diff --git a/monitor/monitor_int.h b/monitor/monitor_int.h
> index 7122418955..4aabee54e1 100644
> --- a/monitor/monitor_int.h
> +++ b/monitor/monitor_int.h
> @@ -30,10 +30,11 @@
>  
>  #include "qapi/qmp/qdict.h"
>  #include "qapi/qmp/json-parser.h"
> -#include "qapi/qapi-commands.h"
> +#include "qapi/qmp/dispatch.h"

This part should be squashed into the previous patch.  You'll
additionally need qapi/qapi-types-misc.h for QMP_CAPABILITY__MAX there,
or keep monitor/monitor.h, even though you need it only here for
MONITOR_USE_CONTROL.

>  
>  #include "qemu/readline.h"
>  #include "chardev/char-fe.h"
> +#include "sysemu/iothread.h"

Perhaps IOThread should be typedef'ed in qemu/typedefs.h.  I'm not
asking you to do that.

>  
>  /*
>   * Supported types:
> @@ -145,4 +146,31 @@ typedef struct {
>  GQueue *qmp_requests;
>  } MonitorQMP;
>  
> +/**
> + * Is @mon a QMP monitor?
> + */
> +static inline bool monitor_is_qmp(const Monitor *mon)
> +{
> +return (mon->flags & MONITOR_USE_CONTROL);
> +}
> +
> +typedef QTAILQ_HEAD(MonitorList, Monitor) MonitorList;
> +extern IOThread *mon_iothread;
> +extern QEMUBH *qmp_dispatcher_bh;
> +extern QmpCommandList qmp_commands, qmp_cap_negotiation_commands;
> +extern QemuMutex monitor_lock;
> +extern MonitorList mon_list;
> +extern int mon_refcount;
> +
> +int monitor_puts(Monitor *mon, const char *str);
> +void monitor_data_init(Monitor *mon, int flags, bool skip_flush,
> +   bool use_io_thread);
> +int monitor_can_read(void *opaque);
> +void monitor_list_append(Monitor *mon);
> +void monitor_fdsets_cleanup(void);
> +
> +void qmp_send_response(MonitorQMP *mon, const QDict *rsp);
> +void monitor_data_destroy_qmp(MonitorQMP *mon);
> +void monitor_qmp_bh_dispatcher(void *data);
> +
>  #endif

I trust you these are indeed all needed.

> diff --git a/monitor/misc.c b/monitor/misc.c
> index aa3342c1e5..7e6f09106c 100644
> --- a/monitor/misc.c
> +++ b/monitor/misc.c
> @@ -140,51 +140,29 @@ IOThread *mon_iothread;
>  /* Bottom half to dispatch the requests received from I/O thread */
>  QEMUBH *qmp_dispatcher_bh;
>  
> -struct QMPRequest {
> -/* Owner of the request */
> -MonitorQMP *mon;
> -/*
> - * Request object to be handled or Error to be reported
> - * (exactly one of them is non-null)
> - */
> -QObject *req;
> -Error *err;
> -};
> -typedef struct QMPRequest QMPRequest;
> -
>  /* QMP checker flags */
>  #define QMP_ACCEPT_UNKNOWNS 1
>  
>  /* Protects mon_list, monitor_qapi_event_state, monitor_destroyed.  */
> -static QemuMutex monitor_lock;
> +QemuMutex monitor_lock;
>  static GHashTable *monitor_qapi_event_state;
> -static QTAILQ_HEAD(, Monitor) mon_list;
> +MonitorList mon_list;
>  static bool monitor_destroyed;
>  
>  /* Protects mon_fdsets */
>  static QemuMutex mon_fdsets_lock;
>  static QLIST_HEAD(, MonFdset) mon_fdsets;
>  
> -static int mon_refcount;
> +int mon_refcount;
>  
>  static mon_cmd_t mon_cmds[];
>  static mon_cmd_t info_cmds[];
>  
> -QmpCommandList qmp_commands, qmp_cap_negotiation_commands;
> -
>  __thread Monitor *cur_mon;
>  
>  static void monitor_command_cb(void *opaque, const char *cmdline,
> void *readline_opaque);
>  
> -/**
> - * Is @mon a QMP monitor?
> - */
> -static inline bool monitor_is_qmp(const Monitor *mon)

Re: [Qemu-block] [Qemu-devel] [PATCH v2 08/11] monitor: Create monitor_int.h with common definitions

2019-06-12 Thread Markus Armbruster
Kevin Wolf  writes:

> Before we can split monitor.c, we need to create a header file that
> contains the common definitions that will be used by multiple source
> files.
>
> Signed-off-by: Kevin Wolf 
> Reviewed-by: Dr. David Alan Gilbert 
> ---
>  monitor/monitor_int.h | 148 ++
>  monitor/misc.c| 110 +--
>  MAINTAINERS   |   2 +
>  3 files changed, 151 insertions(+), 109 deletions(-)
>  create mode 100644 monitor/monitor_int.h
>
> diff --git a/monitor/monitor_int.h b/monitor/monitor_int.h
> new file mode 100644
> index 00..7122418955
> --- /dev/null
> +++ b/monitor/monitor_int.h

Please spell it with a '-', like the other files in this directory.

Suggest not to abbreviate "internal".  We use both spellings, but
-internal.h is clearer and more common.

> @@ -0,0 +1,148 @@
> +/*
> + * QEMU monitor
> + *
> + * Copyright (c) 2003-2004 Fabrice Bellard
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to 
> deal
> + * in the Software without restriction, including without limitation the 
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#ifndef MONITOR_INT_H
> +#define MONITOR_INT_H
> +
> +#include "qemu-common.h"

Use of qemu-common.h in headers is forbidden.  See its file comment, and
my "[PATCH 0/4] Cleanups around qemu-common.h".  Fortunately, its
inclusion is superfluous here.

> +#include "monitor/monitor.h"

Also superfluous.

> +
> +#include "qapi/qmp/qdict.h"

Likewise.

> +#include "qapi/qmp/json-parser.h"
> +#include "qapi/qapi-commands.h"
> +
> +#include "qemu/readline.h"
> +#include "chardev/char-fe.h"
> +
> +/*
> + * Supported types:
> + *
> + * 'F'  filename
> + * 'B'  block device name
> + * 's'  string (accept optional quote)
> + * 'S'  it just appends the rest of the string (accept optional 
> quote)
> + * 'O'  option string of the form NAME=VALUE,...
> + *  parsed according to QemuOptsList given by its name
> + *  Example: 'device:O' uses qemu_device_opts.
> + *  Restriction: only lists with empty desc are supported
> + *  TODO lift the restriction
> + * 'i'  32 bit integer
> + * 'l'  target long (32 or 64 bit)
> + * 'M'  Non-negative target long (32 or 64 bit), in user mode the
> + *  value is multiplied by 2^20 (think Mebibyte)
> + * 'o'  octets (aka bytes)
> + *  user mode accepts an optional E, e, P, p, T, t, G, g, M, m,
> + *  K, k suffix, which multiplies the value by 2^60 for suffixes 
> E
> + *  and e, 2^50 for suffixes P and p, 2^40 for suffixes T and t,
> + *  2^30 for suffixes G and g, 2^20 for M and m, 2^10 for K and k
> + * 'T'  double
> + *  user mode accepts an optional ms, us, ns suffix,
> + *  which divides the value by 1e3, 1e6, 1e9, respectively
> + * '/'  optional gdb-like print format (like "/10x")
> + *
> + * '?'  optional type (for all types, except '/')
> + * '.'  other form of optional type (for 'i' and 'l')
> + * 'b'  boolean
> + *  user mode accepts "on" or "off"
> + * '-'  optional parameter (eg. '-f')
> + *
> + */
> +
> +typedef struct mon_cmd_t {
> +const char *name;
> +const char *args_type;
> +const char *params;
> +const char *help;
> +const char *flags; /* p=preconfig */
> +void (*cmd)(Monitor *mon, const QDict *qdict);
> +/*
> + * @sub_table is a list of 2nd level of commands. If it does not exist,
> + * cmd should be used. If it exists, sub_table[?].cmd should be
> + * used, and cmd of 1st level plays the role of help function.
> + */
> +struct mon_cmd_t *sub_table;
> +void (*command_completion)(ReadLineState *rs, int nb_args, const char 
> *str);
> +} mon_cmd_t;
> +
> +struct Monitor {
> +CharBackend chr;
> +int reset_seen;
> +int flags;
> +int suspend_cnt;/* Needs to be accesse

Re: [Qemu-block] [Qemu-devel] [PATCH v2 08/11] monitor: Create monitor_int.h with common definitions

2019-06-12 Thread Markus Armbruster
One more nit...

Kevin Wolf  writes:

> Before we can split monitor.c, we need to create a header file that

monitor/misc.c

> contains the common definitions that will be used by multiple source
> files.
>
> Signed-off-by: Kevin Wolf 
> Reviewed-by: Dr. David Alan Gilbert 



Re: [Qemu-block] [SeaBIOS] [QEMU] [PATCH v2 0/8] Add Qemu to SeaBIOS LCHS interface

2019-06-12 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/20190612115939.23825-1-shmuel.eider...@oracle.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [SeaBIOS] [QEMU] [PATCH v2 0/8] Add Qemu to SeaBIOS LCHS interface
Type: series
Message-id: 20190612115939.23825-1-shmuel.eider...@oracle.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

From https://github.com/patchew-project/qemu
 * [new tag]   
patchew/20190612115939.23825-1-shmuel.eider...@oracle.com -> 
patchew/20190612115939.23825-1-shmuel.eider...@oracle.com
Switched to a new branch 'test'
bdf6a1bd24 hd-geo-test: Add tests for lchs override
d7b67f4193 bootdevice: FW_CFG interface for LCHS values
11a64fd11f bootdevice: Refactor get_boot_devices_list
dce31cf2c7 bootdevice: Gather LCHS from all relevant devices
aaa025aea3 scsi: Propagate unrealize() callback to scsi-hd
ba777cd8b1 bootdevice: Add interface to gather LCHS
ed9b61ee8d block: Support providing LCHS from user
eb61f6f1d3 block: Refactor macros - fix tabbing

=== OUTPUT BEGIN ===
1/8 Checking commit eb61f6f1d35c (block: Refactor macros - fix tabbing)
ERROR: Macros with complex values should be enclosed in parenthesis
#55: FILE: include/hw/block/block.h:65:
+#define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)  \
+DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
+DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0),\
 DEFINE_PROP_UINT32("secs", _state, _conf.secs, 0)

total: 1 errors, 0 warnings, 37 lines checked

Patch 1/8 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

2/8 Checking commit ed9b61ee8dbf (block: Support providing LCHS from user)
3/8 Checking commit ba777cd8b1e3 (bootdevice: Add interface to gather LCHS)
4/8 Checking commit aaa025aea333 (scsi: Propagate unrealize() callback to 
scsi-hd)
5/8 Checking commit dce31cf2c7ac (bootdevice: Gather LCHS from all relevant 
devices)
6/8 Checking commit 11a64fd11f0f (bootdevice: Refactor get_boot_devices_list)
7/8 Checking commit d7b67f4193ef (bootdevice: FW_CFG interface for LCHS values)
8/8 Checking commit bdf6a1bd24bd (hd-geo-test: Add tests for lchs override)
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20190612115939.23825-1-shmuel.eider...@oracle.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [Qemu-block] [Qemu-devel] [QEMU] [PATCH 7/8] bootdevice: FW_CFG interface for LCHS values

2019-06-12 Thread Laszlo Ersek
On 06/12/19 11:42, Sam Eiderman wrote:
> Using fw_cfg, supply logical CHS values directly from QEMU to the BIOS.
> 
> Non-standard logical geometries break under QEMU.
> 
> A virtual disk which contains an operating system which depends on
> logical geometries (consistent values being reported from BIOS INT13
> AH=08) will most likely break under QEMU/SeaBIOS if it has non-standard
> logical geometries - for example 56 SPT (sectors per track).
> No matter what QEMU will report - SeaBIOS, for large enough disks - will
> use LBA translation, which will report 63 SPT instead.
> 
> In addition we cannot force SeaBIOS to rely on physical geometries at
> all. A virtio-blk-pci virtual disk with 255 phyiscal heads cannot
> report more than 16 physical heads when moved to an IDE controller,
> since the ATA spec allows a maximum of 16 heads - this is an artifact of
> virtualization.
> 
> By supplying the logical geometries directly we are able to support such
> "exotic" disks.
> 
> We serialize this information in a similar way to the "bootorder"
> interface.
> The fw_cfg entry is "bootdevices" and it serializes a struct.
> At the moment the struct holds the values of logical CHS values but it
> can be expanded easily due to the extendable ABI implemented.
> 
> (In the future, we can pass the bootindex through "bootdevices" instead
> "bootorder" - unifying all bootdevice information in one fw_cfg value)

I would disagree with that. UEFI guest firmware doesn't seem to have any
use for this new type of information ("logical CHS values"), so the
current interface (the "bootorder" fw_cfg file) should continue to work.
The ArmVirtQemu and OVMF platform firmwares (built from the edk2
project, and bundled with QEMU 4.1+) implement some serious parsing and
processing for "bootorder".

Independently, another comment:

> The PV interface through fw_cfg could have also been implemented using
> device specific keys, e.g.: "/etc/bootdevice/%s/logical_geometry" where
> %s is the device name QEMU produces - but this implementation would
> require much more code refactoring, both in QEMU and SeaBIOS, so the
> current implementation was chosen.
> 
> Reviewed-by: Karl Heubaum 
> Reviewed-by: Arbel Moshe 
> Signed-off-by: Sam Eiderman 
> ---
>  bootdevice.c| 42 ++
>  hw/nvram/fw_cfg.c   | 14 +++---
>  include/sysemu/sysemu.h |  1 +
>  3 files changed, 54 insertions(+), 3 deletions(-)
> 
> diff --git a/bootdevice.c b/bootdevice.c
> index 2b12fb85a4..84c2a83f25 100644
> --- a/bootdevice.c
> +++ b/bootdevice.c
> @@ -405,3 +405,45 @@ void del_boot_device_lchs(DeviceState *dev, const char 
> *suffix)
>  }
>  }
>  }
> +
> +typedef struct QEMU_PACKED BootDeviceEntrySerialized {
> +/* Do not change field order - add new fields below */
> +uint32_t lcyls;
> +uint32_t lheads;
> +uint32_t lsecs;
> +} BootDeviceEntrySerialized;
> +
> +/* Serialized as: struct size (4) + (device name\0 + device struct) x 
> devices */
> +char *get_boot_devices_info(size_t *size)
> +{
> +FWLCHSEntry *i;
> +BootDeviceEntrySerialized s;
> +size_t total = 0;
> +char *list = NULL;
> +
> +list = g_malloc0(sizeof(uint32_t));
> +*((uint32_t *)list) = (uint32_t)sizeof(s);
> +total = sizeof(uint32_t);
> +
> +QTAILQ_FOREACH(i, &fw_lchs, link) {
> +char *bootpath;
> +size_t len;
> +
> +bootpath = get_boot_device_path(i->dev, false, i->suffix);
> +s.lcyls = i->lcyls;
> +s.lheads = i->lheads;
> +s.lsecs = i->lsecs;

You should document the endianness of the fields in
BootDeviceEntrySerialized, and then call byte order conversion functions
here accordingly (most probably cpu_to_le32()).

As written, this code would break if you ran qemu-system-x86_64 /
qemu-system-i386 (with TCG acceleration) on a big endian host.

Thanks
Laszlo

> +
> +len = strlen(bootpath) + 1;
> +list = g_realloc(list, total + len + sizeof(s));
> +memcpy(&list[total], bootpath, len);
> +memcpy(&list[total + len], &s, sizeof(s));
> +total += len + sizeof(s);
> +
> +g_free(bootpath);
> +}
> +
> +*size = total;
> +
> +return list;
> +}
> diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
> index 9f7b7789bc..008b21542f 100644
> --- a/hw/nvram/fw_cfg.c
> +++ b/hw/nvram/fw_cfg.c
> @@ -916,13 +916,21 @@ void *fw_cfg_modify_file(FWCfgState *s, const char 
> *filename,
>  
>  static void fw_cfg_machine_reset(void *opaque)
>  {
> +MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
> +FWCfgState *s = opaque;
>  void *ptr;
>  size_t len;
> -FWCfgState *s = opaque;
> -char *bootindex = get_boot_devices_list(&len);
> +char *buf;
>  
> -ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)bootindex, len);
> +buf = get_boot_devices_list(&len);
> +ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)buf, len);
>  g_free(ptr);
> +
> +if (!mc->leg

[Qemu-block] [QEMU] [PATCH v2 1/8] block: Refactor macros - fix tabbing

2019-06-12 Thread Sam Eiderman
Fixing tabbing in block related macros.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 hw/ide/qdev.c|  2 +-
 include/hw/block/block.h | 16 
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 360cd20bd8..9cae3205df 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -285,7 +285,7 @@ static void ide_drive_realize(IDEDevice *dev, Error **errp)
 DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf),\
 DEFINE_BLOCK_ERROR_PROPERTIES(IDEDrive, dev.conf),  \
 DEFINE_PROP_STRING("ver",  IDEDrive, dev.version),  \
-DEFINE_PROP_UINT64("wwn",  IDEDrive, dev.wwn, 0),\
+DEFINE_PROP_UINT64("wwn",  IDEDrive, dev.wwn, 0),   \
 DEFINE_PROP_STRING("serial",  IDEDrive, dev.serial),\
 DEFINE_PROP_STRING("model", IDEDrive, dev.model)
 
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index 607539057a..fd55a30bca 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -50,21 +50,21 @@ static inline unsigned int get_physical_block_exp(BlockConf 
*conf)
   _conf.logical_block_size),\
 DEFINE_PROP_BLOCKSIZE("physical_block_size", _state,\
   _conf.physical_block_size),   \
-DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0),  \
+DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0),\
 DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0),\
-DEFINE_PROP_UINT32("discard_granularity", _state, \
-   _conf.discard_granularity, -1), \
-DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, \
-ON_OFF_AUTO_AUTO), \
+DEFINE_PROP_UINT32("discard_granularity", _state,   \
+   _conf.discard_granularity, -1),  \
+DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce,   \
+ON_OFF_AUTO_AUTO),  \
 DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false)
 
 #define DEFINE_BLOCK_PROPERTIES(_state, _conf)  \
 DEFINE_PROP_DRIVE("drive", _state, _conf.blk),  \
 DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf)
 
-#define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)  \
-DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
-DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0), \
+#define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)  \
+DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
+DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0),\
 DEFINE_PROP_UINT32("secs", _state, _conf.secs, 0)
 
 #define DEFINE_BLOCK_ERROR_PROPERTIES(_state, _conf)\
-- 
2.13.3




[Qemu-block] [QEMU] [PATCH v2 2/8] block: Support providing LCHS from user

2019-06-12 Thread Sam Eiderman
Add logical geometry variables to BlockConf.

A user can now supply "lcyls", "lheads" & "lsecs" for any HD device
that supports CHS ("cyls", "heads", "secs").

These devices include:
* ide-hd
* scsi-hd
* virtio-blk-pci

In future commits we will use the provided LCHS and pass it to the BIOS
through fw_cfg to be supplied using INT13 routines.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 include/hw/block/block.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index fd55a30bca..d7246f3862 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -26,6 +26,7 @@ typedef struct BlockConf {
 uint32_t discard_granularity;
 /* geometry, not all devices use this */
 uint32_t cyls, heads, secs;
+uint32_t lcyls, lheads, lsecs;
 OnOffAuto wce;
 bool share_rw;
 BlockdevOnError rerror;
@@ -65,7 +66,10 @@ static inline unsigned int get_physical_block_exp(BlockConf 
*conf)
 #define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)  \
 DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
 DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0),\
-DEFINE_PROP_UINT32("secs", _state, _conf.secs, 0)
+DEFINE_PROP_UINT32("secs", _state, _conf.secs, 0),  \
+DEFINE_PROP_UINT32("lcyls", _state, _conf.lcyls, 0),\
+DEFINE_PROP_UINT32("lheads", _state, _conf.lheads, 0),  \
+DEFINE_PROP_UINT32("lsecs", _state, _conf.lsecs, 0)
 
 #define DEFINE_BLOCK_ERROR_PROPERTIES(_state, _conf)\
 DEFINE_PROP_BLOCKDEV_ON_ERROR("rerror", _state, _conf.rerror,   \
-- 
2.13.3




[Qemu-block] [QEMU] [PATCH v2 0/8] Add Qemu to SeaBIOS LCHS interface

2019-06-12 Thread Sam Eiderman
v1:

Non-standard logical geometries break under QEMU.

A virtual disk which contains an operating system which depends on
logical geometries (consistent values being reported from BIOS INT13
AH=08) will most likely break under QEMU/SeaBIOS if it has non-standard
logical geometries - for example 56 SPT (sectors per track).
No matter what QEMU will guess - SeaBIOS, for large enough disks - will
use LBA translation, which will report 63 SPT instead.

In addition we can not enforce SeaBIOS to rely on phyiscal geometries at
all. A virtio-blk-pci virtual disk with 255 phyiscal heads can not
report more than 16 physical heads when moved to an IDE controller, the
ATA spec allows a maximum of 16 heads - this is an artifact of
virtualization.

By supplying the logical geometies directly we are able to support such
"exotic" disks.

We will use fw_cfg to do just that.

v2:

Fix missing parenthesis check in
"hd-geo-test: Add tests for lchs override"

Sam Eiderman (8):
  block: Refactor macros - fix tabbing
  block: Support providing LCHS from user
  bootdevice: Add interface to gather LCHS
  scsi: Propagate unrealize() callback to scsi-hd
  bootdevice: Gather LCHS from all relevant devices
  bootdevice: Refactor get_boot_devices_list
  bootdevice: FW_CFG interface for LCHS values
  hd-geo-test: Add tests for lchs override

 bootdevice.c | 158 ++---
 hw/block/virtio-blk.c|   6 +
 hw/ide/qdev.c|   7 +-
 hw/nvram/fw_cfg.c|  14 +-
 hw/scsi/scsi-bus.c   |  15 ++
 hw/scsi/scsi-disk.c  |  14 ++
 include/hw/block/block.h |  22 +-
 include/hw/scsi/scsi.h   |   1 +
 include/sysemu/sysemu.h  |   4 +
 tests/Makefile.include   |   2 +-
 tests/hd-geo-test.c  | 565 +++
 11 files changed, 767 insertions(+), 41 deletions(-)

-- 
2.13.3




Re: [Qemu-block] [Qemu-devel] [PATCH v2 06/11] Move monitor.c to monitor/misc.c

2019-06-12 Thread Markus Armbruster
Kevin Wolf  writes:

> Create a new monitor/ subdirectory and move monitor.c there. As the plan
> is to move the monitor core into separate files, use the chance to
> rename it to misc.c.

I figure we'll want to move most of (all of?) monitor/misc.c out.  Not a
job this series must finish, of course.

> Signed-off-by: Kevin Wolf 
> ---
>  docs/devel/writing-qmp-commands.txt |  2 +-
>  monitor.c => monitor/misc.c |  2 +-
>  MAINTAINERS |  4 ++--
>  Makefile.objs   |  1 +
>  Makefile.target |  3 ++-
>  monitor/Makefile.objs   |  1 +
>  monitor/trace-events| 11 +++
>  trace-events| 10 --
>  8 files changed, 19 insertions(+), 15 deletions(-)
>  rename monitor.c => monitor/misc.c (99%)
>  create mode 100644 monitor/Makefile.objs
>  create mode 100644 monitor/trace-events
>
> diff --git a/docs/devel/writing-qmp-commands.txt 
> b/docs/devel/writing-qmp-commands.txt
> index 9dfc62bf5a..cc6ecd6d5d 100644
> --- a/docs/devel/writing-qmp-commands.txt
> +++ b/docs/devel/writing-qmp-commands.txt
> @@ -470,7 +470,7 @@ it's good practice to always check for errors.
>  
>  Another important detail is that HMP's "info" commands don't go into the
>  hmp-commands.hx. Instead, they go into the info_cmds[] table, which is 
> defined
> -in the monitor.c file. The entry for the "info alarmclock" follows:
> +in the monitor/misc.c file. The entry for the "info alarmclock" follows:

Not this patch's fault, but this is wrong since commit da76ee76f78 (Sep
2015).

Funny, the one place that provides advice on writing HMP commands is
called writing-qmp-commands.txt %-}

>  
>  {
>  .name   = "alarmclock",

[...]

Reviewed-by: Markus Armbruster 



[Qemu-block] [QEMU] [PATCH v2 3/8] bootdevice: Add interface to gather LCHS

2019-06-12 Thread Sam Eiderman
Add an interface to provide direct logical CHS values for boot devices.
We will use this interface in the next commits.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 bootdevice.c| 55 +
 include/sysemu/sysemu.h |  3 +++
 2 files changed, 58 insertions(+)

diff --git a/bootdevice.c b/bootdevice.c
index 1d225202f9..bc5e1c2de4 100644
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -343,3 +343,58 @@ void device_add_bootindex_property(Object *obj, int32_t 
*bootindex,
 /* initialize devices' bootindex property to -1 */
 object_property_set_int(obj, -1, name, NULL);
 }
+
+typedef struct FWLCHSEntry FWLCHSEntry;
+
+struct FWLCHSEntry {
+QTAILQ_ENTRY(FWLCHSEntry) link;
+DeviceState *dev;
+char *suffix;
+uint32_t lcyls;
+uint32_t lheads;
+uint32_t lsecs;
+};
+
+static QTAILQ_HEAD(, FWLCHSEntry) fw_lchs =
+QTAILQ_HEAD_INITIALIZER(fw_lchs);
+
+void add_boot_device_lchs(DeviceState *dev, const char *suffix,
+  uint32_t lcyls, uint32_t lheads, uint32_t lsecs)
+{
+FWLCHSEntry *node;
+
+if (!lcyls && !lheads && !lsecs) {
+return;
+}
+
+assert(dev != NULL || suffix != NULL);
+
+node = g_malloc0(sizeof(FWLCHSEntry));
+node->suffix = g_strdup(suffix);
+node->dev = dev;
+node->lcyls = lcyls;
+node->lheads = lheads;
+node->lsecs = lsecs;
+
+QTAILQ_INSERT_TAIL(&fw_lchs, node, link);
+}
+
+void del_boot_device_lchs(DeviceState *dev, const char *suffix)
+{
+FWLCHSEntry *i;
+
+if (dev == NULL) {
+return;
+}
+
+QTAILQ_FOREACH(i, &fw_lchs, link) {
+if ((!suffix || !g_strcmp0(i->suffix, suffix)) &&
+ i->dev == dev) {
+QTAILQ_REMOVE(&fw_lchs, i, link);
+g_free(i->suffix);
+g_free(i);
+
+break;
+}
+}
+}
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 61579ae71e..173dfbb539 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -171,6 +171,9 @@ void device_add_bootindex_property(Object *obj, int32_t 
*bootindex,
DeviceState *dev, Error **errp);
 void restore_boot_order(void *opaque);
 void validate_bootdevices(const char *devices, Error **errp);
+void add_boot_device_lchs(DeviceState *dev, const char *suffix,
+  uint32_t lcyls, uint32_t lheads, uint32_t lsecs);
+void del_boot_device_lchs(DeviceState *dev, const char *suffix);
 
 /* handler to set the boot_device order for a specific type of MachineClass */
 typedef void QEMUBootSetHandler(void *opaque, const char *boot_order,
-- 
2.13.3




[Qemu-block] [QEMU] [PATCH v2 5/8] bootdevice: Gather LCHS from all relevant devices

2019-06-12 Thread Sam Eiderman
Relevant devices are:
* ide-hd (and ide-cd, ide-drive)
* scsi-hd (and scsi-cd, scsi-disk, scsi-block)
* virtio-blk-pci

We do not call del_boot_device_lchs() for ide-* since we don't need to -
IDE block devices do not support unplugging.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 hw/block/virtio-blk.c |  6 ++
 hw/ide/qdev.c |  5 +
 hw/scsi/scsi-disk.c   | 14 ++
 3 files changed, 25 insertions(+)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 06e57a4d39..787bbd768a 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1182,6 +1182,11 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size);
 
 blk_iostatus_enable(s->blk);
+
+add_boot_device_lchs(dev, "/disk@0,0",
+ (&conf->conf)->lcyls,
+ (&conf->conf)->lheads,
+ (&conf->conf)->lsecs);
 }
 
 static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp)
@@ -1189,6 +1194,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev, 
Error **errp)
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 VirtIOBlock *s = VIRTIO_BLK(dev);
 
+del_boot_device_lchs(dev, "/disk@0,0");
 virtio_blk_data_plane_destroy(s->dataplane);
 s->dataplane = NULL;
 qemu_del_vm_change_state_handler(s->change);
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 9cae3205df..07f429d5e3 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -215,6 +215,11 @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind 
kind, Error **errp)
 
 add_boot_device_path(dev->conf.bootindex, &dev->qdev,
  dev->unit ? "/disk@1" : "/disk@0");
+
+add_boot_device_lchs(&dev->qdev, dev->unit ? "/disk@1" : "/disk@0",
+ (&dev->conf)->lcyls,
+ (&dev->conf)->lheads,
+ (&dev->conf)->lsecs);
 }
 
 static void ide_dev_get_bootindex(Object *obj, Visitor *v, const char *name,
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 7b89ac798b..3451aefdea 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2390,6 +2390,16 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
 blk_set_guest_block_size(s->qdev.conf.blk, s->qdev.blocksize);
 
 blk_iostatus_enable(s->qdev.conf.blk);
+
+add_boot_device_lchs(&dev->qdev, NULL,
+ (&dev->conf)->lcyls,
+ (&dev->conf)->lheads,
+ (&dev->conf)->lsecs);
+}
+
+static void scsi_unrealize(SCSIDevice *dev, Error **errp)
+{
+del_boot_device_lchs(&dev->qdev, NULL);
 }
 
 static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
@@ -2988,6 +2998,7 @@ static void scsi_hd_class_initfn(ObjectClass *klass, void 
*data)
 SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
 
 sc->realize  = scsi_hd_realize;
+sc->unrealize= scsi_unrealize;
 sc->alloc_req= scsi_new_request;
 sc->unit_attention_reported = scsi_disk_unit_attention_reported;
 dc->desc = "virtual SCSI disk";
@@ -3019,6 +3030,7 @@ static void scsi_cd_class_initfn(ObjectClass *klass, void 
*data)
 SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
 
 sc->realize  = scsi_cd_realize;
+sc->unrealize= scsi_unrealize;
 sc->alloc_req= scsi_new_request;
 sc->unit_attention_reported = scsi_disk_unit_attention_reported;
 dc->desc = "virtual SCSI CD-ROM";
@@ -3054,6 +3066,7 @@ static void scsi_block_class_initfn(ObjectClass *klass, 
void *data)
 SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
 
 sc->realize  = scsi_block_realize;
+sc->unrealize= scsi_unrealize;
 sc->alloc_req= scsi_block_new_request;
 sc->parse_cdb= scsi_block_parse_cdb;
 sdc->dma_readv   = scsi_block_dma_readv;
@@ -3095,6 +3108,7 @@ static void scsi_disk_class_initfn(ObjectClass *klass, 
void *data)
 SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
 
 sc->realize  = scsi_disk_realize;
+sc->unrealize= scsi_unrealize;
 sc->alloc_req= scsi_new_request;
 sc->unit_attention_reported = scsi_disk_unit_attention_reported;
 dc->fw_name = "disk";
-- 
2.13.3




Re: [Qemu-block] [Qemu-devel] [PATCH v2 07/11] monitor: Move {hmp, qmp}.c to monitor/{hmp, qmp}-cmds.c

2019-06-12 Thread Markus Armbruster
Kevin Wolf  writes:

> Now that we have a monitor/ subdirectory, let's move hmp.c and qmp.c
> from the root directory there. As they contain implementations of
> monitor commands, rename them to {hmp,qmp}-cmds.c, so that {hmp,qmp}.c
> are free for the HMP and QMP infrastructure.
>
> Signed-off-by: Kevin Wolf 
> ---
>  docs/devel/writing-qmp-commands.txt | 9 +
>  hmp.c => monitor/hmp-cmds.c | 2 +-
>  qmp.c => monitor/qmp-cmds.c | 2 +-
>  MAINTAINERS | 5 +++--
>  Makefile.objs   | 2 +-
>  monitor/Makefile.objs   | 1 +
>  6 files changed, 12 insertions(+), 9 deletions(-)
>  rename hmp.c => monitor/hmp-cmds.c (99%)
>  rename qmp.c => monitor/qmp-cmds.c (99%)
>
> diff --git a/docs/devel/writing-qmp-commands.txt 
> b/docs/devel/writing-qmp-commands.txt
> index cc6ecd6d5d..46a6c48683 100644
> --- a/docs/devel/writing-qmp-commands.txt
> +++ b/docs/devel/writing-qmp-commands.txt
> @@ -20,7 +20,7 @@ new QMP command.
>  
>  2. Write the QMP command itself, which is a regular C function. Preferably,
> the command should be exported by some QEMU subsystem. But it can also be
> -   added to the qmp.c file
> +   added to the monitor/qmp-cmds.c file
>  
>  3. At this point the command can be tested under the QMP protocol
>  
> @@ -101,7 +101,8 @@ protocol data.
>  
>  The next step is to write the "hello-world" implementation. As explained
>  earlier, it's preferable for commands to live in QEMU subsystems. But
> -"hello-world" doesn't pertain to any, so we put its implementation in qmp.c:
> +"hello-world" doesn't pertain to any, so we put its implementation in
> +monitor/qmp-cmds.c:
>  
>  void qmp_hello_world(Error **errp)
>  {
> @@ -146,7 +147,7 @@ for mandatory arguments). Finally, 'str' is the 
> argument's type, which
>  stands for "string". The QAPI also supports integers, booleans, enumerations
>  and user defined types.
>  
> -Now, let's update our C implementation in qmp.c:
> +Now, let's update our C implementation in monitor/qmp-cmds.c:
>  
>  void qmp_hello_world(bool has_message, const char *message, Error **errp)
>  {
> @@ -267,7 +268,7 @@ monitor (HMP).
>  
>  With the introduction of the QAPI, HMP commands make QMP calls. Most of the
>  time HMP commands are simple wrappers. All HMP commands implementation exist 
> in

Not this patch's fault: the "All" is wrong, and the entire sentence is
bad English.

> -the hmp.c file.
> +the monitor/hmp-cmds.c file.
>  
>  Here's the implementation of the "hello-world" HMP command:
>  
> diff --git a/hmp.c b/monitor/hmp-cmds.c
> similarity index 99%
> rename from hmp.c
> rename to monitor/hmp-cmds.c
> index 99414cd39c..712737cd18 100644
> --- a/hmp.c
> +++ b/monitor/hmp-cmds.c
> @@ -1,5 +1,5 @@
>  /*
> - * Human Monitor Interface
> + * Human Monitor Interface commands
>   *
>   * Copyright IBM, Corp. 2011
>   *
> diff --git a/qmp.c b/monitor/qmp-cmds.c
> similarity index 99%
> rename from qmp.c
> rename to monitor/qmp-cmds.c
> index fa1b3c1577..65520222ca 100644
> --- a/qmp.c
> +++ b/monitor/qmp-cmds.c
> @@ -1,5 +1,5 @@
>  /*
> - * QEMU Management Protocol
> + * QEMU Management Protocol commands
>   *
>   * Copyright IBM, Corp. 2011
>   *
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 10c082314c..8789c82e5c 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1925,7 +1925,8 @@ Human Monitor (HMP)
>  M: Dr. David Alan Gilbert 
>  S: Maintained
>  F: monitor/misc.c
> -F: hmp.[ch]
> +F: monitor/hmp*
> +F: hmp.h

Move hmp.h to include/monitor/ ?

>  F: hmp-commands*.hx
>  F: include/monitor/hmp-target.h
>  F: tests/test-hmp.c
> @@ -2045,7 +2046,7 @@ F: tests/check-qom-proplist.c
>  QMP
>  M: Markus Armbruster 
>  S: Supported
> -F: qmp.c
> +F: monitor/qmp*
>  F: monitor/misc.c
>  F: docs/devel/*qmp-*
>  F: docs/interop/*qmp-*
> diff --git a/Makefile.objs b/Makefile.objs
> index dd39a70b48..9495fcbc7e 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -83,8 +83,8 @@ common-obj-$(CONFIG_FDT) += device_tree.o
>  ##
>  # qapi
>  
> -common-obj-y += qmp.o hmp.o
>  common-obj-y += qapi/
> +common-obj-y += monitor/
>  endif
>  
>  ###
> diff --git a/monitor/Makefile.objs b/monitor/Makefile.objs
> index e783b0616b..a7170af6e1 100644
> --- a/monitor/Makefile.objs
> +++ b/monitor/Makefile.objs
> @@ -1 +1,2 @@
>  obj-y += misc.o
> +common-obj-y += qmp-cmds.o hmp-cmds.o

Reviewed-by: Markus Armbruster 



[Qemu-block] [QEMU] [PATCH v2 8/8] hd-geo-test: Add tests for lchs override

2019-06-12 Thread Sam Eiderman
Add QTest tests to check the logical geometry override option.

The tests in hd-geo-test are out of date - they only test IDE and do not
test interesting MBRs.

I added a few helper functions which will make adding more tests easier.

QTest's fw_cfg helper functions support only legacy fw_cfg, so I had to
read the new fw_cfg layout on my own.

Creating qcow2 disks with specific size and MBR layout is currently
unused - we only use a default empty MBR.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 tests/Makefile.include |   2 +-
 tests/hd-geo-test.c| 565 +
 2 files changed, 566 insertions(+), 1 deletion(-)

diff --git a/tests/Makefile.include b/tests/Makefile.include
index 46a36c2c95..55ea165ed4 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -765,7 +765,7 @@ tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y)
 tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y) 
qemu-img$(EXESUF)
 tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o
 tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o
-tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o
+tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o $(libqos-obj-y)
 tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y)
 tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y)
 tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \
diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c
index 62eb624726..08eafeb81a 100644
--- a/tests/hd-geo-test.c
+++ b/tests/hd-geo-test.c
@@ -17,7 +17,11 @@
 
 #include "qemu/osdep.h"
 #include "qemu-common.h"
+#include "qemu/bswap.h"
+#include "qapi/qmp/qlist.h"
 #include "libqtest.h"
+#include "libqos/fw_cfg.h"
+#include "standard-headers/linux/qemu_fw_cfg.h"
 
 #define ARGV_SIZE 256
 
@@ -388,6 +392,557 @@ static void test_ide_drive_cd_0(void)
 qtest_quit(qts);
 }
 
+typedef struct {
+bool active;
+uint32_t head;
+uint32_t sector;
+uint32_t cyl;
+uint32_t end_head;
+uint32_t end_sector;
+uint32_t end_cyl;
+uint32_t start_sect;
+uint32_t nr_sects;
+} MBRpartitions[4];
+
+static MBRpartitions empty_mbr = { {false, 0, 0, 0, 0, 0, 0, 0, 0},
+   {false, 0, 0, 0, 0, 0, 0, 0, 0},
+   {false, 0, 0, 0, 0, 0, 0, 0, 0},
+   {false, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+static char *create_qcow2_with_mbr(MBRpartitions mbr, uint64_t sectors)
+{
+const char *template = "/tmp/qtest.XX";
+char *raw_path = strdup(template);
+char *qcow2_path = strdup(template);
+char cmd[100 + 2 * PATH_MAX];
+uint8_t buf[512];
+int i, ret, fd, offset;
+uint64_t qcow2_size = sectors * 512;
+uint8_t status, parttype, head, sector, cyl;
+
+offset = 0xbe;
+
+for (i = 0; i < 4; i++) {
+status = mbr[i].active ? 0x80 : 0x00;
+g_assert(mbr[i].head < 256);
+g_assert(mbr[i].sector < 64);
+g_assert(mbr[i].cyl < 1024);
+head = mbr[i].head;
+sector = mbr[i].sector + ((mbr[i].cyl & 0x300) >> 2);
+cyl = mbr[i].cyl & 0xff;
+
+buf[offset + 0x0] = status;
+buf[offset + 0x1] = head;
+buf[offset + 0x2] = sector;
+buf[offset + 0x3] = cyl;
+
+parttype = 0;
+g_assert(mbr[i].end_head < 256);
+g_assert(mbr[i].end_sector < 64);
+g_assert(mbr[i].end_cyl < 1024);
+head = mbr[i].end_head;
+sector = mbr[i].end_sector + ((mbr[i].end_cyl & 0x300) >> 2);
+cyl = mbr[i].end_cyl & 0xff;
+
+buf[offset + 0x4] = parttype;
+buf[offset + 0x5] = head;
+buf[offset + 0x6] = sector;
+buf[offset + 0x7] = cyl;
+
+(*(uint32_t *)&buf[offset + 0x8]) = cpu_to_le32(mbr[i].start_sect);
+(*(uint32_t *)&buf[offset + 0xc]) = cpu_to_le32(mbr[i].nr_sects);
+
+offset += 0x10;
+}
+
+fd = mkstemp(raw_path);
+g_assert(fd);
+close(fd);
+
+fd = open(raw_path, O_WRONLY);
+g_assert(fd >= 0);
+ret = write(fd, buf, sizeof(buf));
+g_assert(ret == sizeof(buf));
+close(fd);
+
+fd = mkstemp(qcow2_path);
+g_assert(fd);
+close(fd);
+
+ret = snprintf(cmd, sizeof(cmd),
+   "$QTEST_QEMU_IMG convert -f raw -O qcow2 %s %s > /dev/null",
+   raw_path, qcow2_path);
+g_assert((0 < ret) && (ret <= sizeof(cmd)));
+ret = system(cmd);
+g_assert(ret == 0);
+
+ret = snprintf(cmd, sizeof(cmd),
+   "$QTEST_QEMU_IMG resize %s %" PRIu64 " > /dev/null",
+   qcow2_path, qcow2_size);
+g_assert((0 < ret) && (ret <= sizeof(cmd)));
+ret = system(cmd);
+g_assert(ret == 0);
+
+unlink(raw_path);
+free(raw_path);
+
+return qcow2_path;
+}
+
+struct QemuCfgFile {
+uint32_t  size;/* file size */
+uint16_t  select;  /* write this to 0x510 to read it */
+uint16_t  reserv

[Qemu-block] [QEMU] [PATCH v2 4/8] scsi: Propagate unrealize() callback to scsi-hd

2019-06-12 Thread Sam Eiderman
We will need to add LCHS removal logic to scsi-hd's unrealize() in the
next commit.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 hw/scsi/scsi-bus.c | 15 +++
 include/hw/scsi/scsi.h |  1 +
 2 files changed, 16 insertions(+)

diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index c480553083..f6fe497a1a 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -55,6 +55,14 @@ static void scsi_device_realize(SCSIDevice *s, Error **errp)
 }
 }
 
+static void scsi_device_unrealize(SCSIDevice *s, Error **errp)
+{
+SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s);
+if (sc->unrealize) {
+sc->unrealize(s, errp);
+}
+}
+
 int scsi_bus_parse_cdb(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf,
void *hba_private)
 {
@@ -213,11 +221,18 @@ static void scsi_qdev_realize(DeviceState *qdev, Error 
**errp)
 static void scsi_qdev_unrealize(DeviceState *qdev, Error **errp)
 {
 SCSIDevice *dev = SCSI_DEVICE(qdev);
+Error *local_err = NULL;
 
 if (dev->vmsentry) {
 qemu_del_vm_change_state_handler(dev->vmsentry);
 }
 
+scsi_device_unrealize(dev, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+
 scsi_device_purge_requests(dev, SENSE_CODE(NO_SENSE));
 blockdev_mark_auto_del(dev->conf.blk);
 }
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index 426566a5c6..8cf71f910d 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -59,6 +59,7 @@ struct SCSIRequest {
 typedef struct SCSIDeviceClass {
 DeviceClass parent_class;
 void (*realize)(SCSIDevice *dev, Error **errp);
+void (*unrealize)(SCSIDevice *dev, Error **errp);
 int (*parse_cdb)(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf,
  void *hba_private);
 SCSIRequest *(*alloc_req)(SCSIDevice *s, uint32_t tag, uint32_t lun,
-- 
2.13.3




[Qemu-block] [QEMU] [PATCH v2 7/8] bootdevice: FW_CFG interface for LCHS values

2019-06-12 Thread Sam Eiderman
Using fw_cfg, supply logical CHS values directly from QEMU to the BIOS.

Non-standard logical geometries break under QEMU.

A virtual disk which contains an operating system which depends on
logical geometries (consistent values being reported from BIOS INT13
AH=08) will most likely break under QEMU/SeaBIOS if it has non-standard
logical geometries - for example 56 SPT (sectors per track).
No matter what QEMU will report - SeaBIOS, for large enough disks - will
use LBA translation, which will report 63 SPT instead.

In addition we cannot force SeaBIOS to rely on physical geometries at
all. A virtio-blk-pci virtual disk with 255 phyiscal heads cannot
report more than 16 physical heads when moved to an IDE controller,
since the ATA spec allows a maximum of 16 heads - this is an artifact of
virtualization.

By supplying the logical geometries directly we are able to support such
"exotic" disks.

We serialize this information in a similar way to the "bootorder"
interface.
The fw_cfg entry is "bootdevices" and it serializes a struct.
At the moment the struct holds the values of logical CHS values but it
can be expanded easily due to the extendable ABI implemented.

(In the future, we can pass the bootindex through "bootdevices" instead
"bootorder" - unifying all bootdevice information in one fw_cfg value)

The PV interface through fw_cfg could have also been implemented using
device specific keys, e.g.: "/etc/bootdevice/%s/logical_geometry" where
%s is the device name QEMU produces - but this implementation would
require much more code refactoring, both in QEMU and SeaBIOS, so the
current implementation was chosen.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 bootdevice.c| 42 ++
 hw/nvram/fw_cfg.c   | 14 +++---
 include/sysemu/sysemu.h |  1 +
 3 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/bootdevice.c b/bootdevice.c
index 2b12fb85a4..84c2a83f25 100644
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -405,3 +405,45 @@ void del_boot_device_lchs(DeviceState *dev, const char 
*suffix)
 }
 }
 }
+
+typedef struct QEMU_PACKED BootDeviceEntrySerialized {
+/* Do not change field order - add new fields below */
+uint32_t lcyls;
+uint32_t lheads;
+uint32_t lsecs;
+} BootDeviceEntrySerialized;
+
+/* Serialized as: struct size (4) + (device name\0 + device struct) x devices 
*/
+char *get_boot_devices_info(size_t *size)
+{
+FWLCHSEntry *i;
+BootDeviceEntrySerialized s;
+size_t total = 0;
+char *list = NULL;
+
+list = g_malloc0(sizeof(uint32_t));
+*((uint32_t *)list) = (uint32_t)sizeof(s);
+total = sizeof(uint32_t);
+
+QTAILQ_FOREACH(i, &fw_lchs, link) {
+char *bootpath;
+size_t len;
+
+bootpath = get_boot_device_path(i->dev, false, i->suffix);
+s.lcyls = i->lcyls;
+s.lheads = i->lheads;
+s.lsecs = i->lsecs;
+
+len = strlen(bootpath) + 1;
+list = g_realloc(list, total + len + sizeof(s));
+memcpy(&list[total], bootpath, len);
+memcpy(&list[total + len], &s, sizeof(s));
+total += len + sizeof(s);
+
+g_free(bootpath);
+}
+
+*size = total;
+
+return list;
+}
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 9f7b7789bc..008b21542f 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -916,13 +916,21 @@ void *fw_cfg_modify_file(FWCfgState *s, const char 
*filename,
 
 static void fw_cfg_machine_reset(void *opaque)
 {
+MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+FWCfgState *s = opaque;
 void *ptr;
 size_t len;
-FWCfgState *s = opaque;
-char *bootindex = get_boot_devices_list(&len);
+char *buf;
 
-ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)bootindex, len);
+buf = get_boot_devices_list(&len);
+ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)buf, len);
 g_free(ptr);
+
+if (!mc->legacy_fw_cfg_order) {
+buf = get_boot_devices_info(&len);
+ptr = fw_cfg_modify_file(s, "bootdevices", (uint8_t *)buf, len);
+g_free(ptr);
+}
 }
 
 static void fw_cfg_machine_ready(struct Notifier *n, void *data)
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 173dfbb539..f0552006f4 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -174,6 +174,7 @@ void validate_bootdevices(const char *devices, Error 
**errp);
 void add_boot_device_lchs(DeviceState *dev, const char *suffix,
   uint32_t lcyls, uint32_t lheads, uint32_t lsecs);
 void del_boot_device_lchs(DeviceState *dev, const char *suffix);
+char *get_boot_devices_info(size_t *size);
 
 /* handler to set the boot_device order for a specific type of MachineClass */
 typedef void QEMUBootSetHandler(void *opaque, const char *boot_order,
-- 
2.13.3




[Qemu-block] [QEMU] [PATCH v2 6/8] bootdevice: Refactor get_boot_devices_list

2019-06-12 Thread Sam Eiderman
Move device name construction to a separate function.

We will reuse this function in the following commit to pass logical CHS
parameters through fw_cfg much like we currently pass bootindex.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 bootdevice.c | 61 +---
 1 file changed, 34 insertions(+), 27 deletions(-)

diff --git a/bootdevice.c b/bootdevice.c
index bc5e1c2de4..2b12fb85a4 100644
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -202,6 +202,39 @@ DeviceState *get_boot_device(uint32_t position)
 return res;
 }
 
+static char *get_boot_device_path(DeviceState *dev, bool ignore_suffixes,
+  char *suffix)
+{
+char *devpath = NULL, *s = NULL, *d, *bootpath;
+
+if (dev) {
+devpath = qdev_get_fw_dev_path(dev);
+assert(devpath);
+}
+
+if (!ignore_suffixes) {
+if (dev) {
+d = qdev_get_own_fw_dev_path_from_handler(dev->parent_bus, dev);
+if (d) {
+assert(!suffix);
+s = d;
+} else {
+s = g_strdup(suffix);
+}
+} else {
+s = g_strdup(suffix);
+}
+}
+
+bootpath = g_strdup_printf("%s%s",
+   devpath ? devpath : "",
+   s ? s : "");
+g_free(devpath);
+g_free(s);
+
+return bootpath;
+}
+
 /*
  * This function returns null terminated string that consist of new line
  * separated device paths.
@@ -218,36 +251,10 @@ char *get_boot_devices_list(size_t *size)
 bool ignore_suffixes = mc->ignore_boot_device_suffixes;
 
 QTAILQ_FOREACH(i, &fw_boot_order, link) {
-char *devpath = NULL,  *suffix = NULL;
 char *bootpath;
-char *d;
 size_t len;
 
-if (i->dev) {
-devpath = qdev_get_fw_dev_path(i->dev);
-assert(devpath);
-}
-
-if (!ignore_suffixes) {
-if (i->dev) {
-d = qdev_get_own_fw_dev_path_from_handler(i->dev->parent_bus,
-  i->dev);
-if (d) {
-assert(!i->suffix);
-suffix = d;
-} else {
-suffix = g_strdup(i->suffix);
-}
-} else {
-suffix = g_strdup(i->suffix);
-}
-}
-
-bootpath = g_strdup_printf("%s%s",
-   devpath ? devpath : "",
-   suffix ? suffix : "");
-g_free(devpath);
-g_free(suffix);
+bootpath = get_boot_device_path(i->dev, ignore_suffixes, i->suffix);
 
 if (total) {
 list[total-1] = '\n';
-- 
2.13.3




Re: [Qemu-block] [Qemu-devel] [PATCH v2 05/11] monitor: Move cmd_table to MonitorHMP

2019-06-12 Thread Markus Armbruster
Kevin Wolf  writes:

> Monitor.cmd_table contains the handlers for HMP commands, so there is no
> reason to keep it in the state shared with QMP. Move it to MonitorHMP.
>
> Signed-off-by: Kevin Wolf 
> Reviewed-by: Dr. David Alan Gilbert 
> ---
>  monitor.c | 23 +++
>  1 file changed, 15 insertions(+), 8 deletions(-)
>
> diff --git a/monitor.c b/monitor.c
> index f8730e4462..56af8ed448 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -191,7 +191,6 @@ struct Monitor {
>  bool use_io_thread;
>  
>  gchar *mon_cpu_path;
> -mon_cmd_t *cmd_table;
>  QTAILQ_ENTRY(Monitor) entry;
>  
>  /*
> @@ -219,6 +218,7 @@ struct MonitorHMP {
>   * These members can be safely accessed without locks.
>   */
>  ReadLineState *rs;
> +mon_cmd_t *cmd_table;
>  };
>  
>  typedef struct {
> @@ -720,13 +720,19 @@ static void monitor_data_init(Monitor *mon, int flags, 
> bool skip_flush,
>  memset(mon, 0, sizeof(Monitor));
>  qemu_mutex_init(&mon->mon_lock);
>  mon->outbuf = qstring_new();
> -/* Use *mon_cmds by default. */
> -mon->cmd_table = mon_cmds;

As far as I can tell, this is the only assignment to Monitor member
cmd_table.  Why not delete it outright, and use mon_cmds directly?
Preferably renamed to something like hmp_cmds.

[...]



Re: [Qemu-block] [Qemu-devel] [PATCH v2 03/11] monitor: Make MonitorQMP a child class of Monitor

2019-06-12 Thread Kevin Wolf
Am 12.06.2019 um 09:59 hat Markus Armbruster geschrieben:
> Kevin Wolf  writes:
> 
> > Currently, struct Monitor mixes state that is only relevant for HMP,
> > state that is only relevant for QMP, and some actually shared state.
> > In particular, a MonitorQMP field is present in the state of any
> > monitor, even if it's not a QMP monitor and therefore doesn't use the
> > state.
> >
> > As a first step towards a clean separation between QMP and HMP, let
> > MonitorQMP extend Monitor and create a MonitorQMP object only when the
> > monitor is actually a QMP monitor.
> >
> > Signed-off-by: Kevin Wolf 
> > Reviewed-by: Dr. David Alan Gilbert 
> 
> This is a bit harder to review than necessary, because it mixes the
> largely mechanical "replace QMP member by child class" with the
> necessary prerequisite "clean up to access QMP stuff only when the
> monitor is actually a QMP monitor".  I'm going to post a split.
> 
> Effectively preexisting: we go from Monitor * to MonitorQMP * without
> checking in several places.  I'll throw in assertions.

Since I don't think doing both in one patch makes review a lot harder
(and in fact think your patch 2.5 is harder to review for completeness
that the combined patch) and since both Dave and you already reviewed
the patch in its current form and I don't want to invalidate that
review, I'm going to keep it as a single patch and just squash in the
additional assertions where container_of() is used. The resulting code
is the same anyway.

Kevin



Re: [Qemu-block] [SeaBIOS] [QEMU] [PATCH 0/8] Add Qemu to SeaBIOS LCHS interface

2019-06-12 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/20190612094237.47462-1-shmuel.eider...@oracle.com/



Hi,

This series failed the asan build test. Please find the testing commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#!/bin/bash
time make docker-test-debug@fedora TARGET_LIST=x86_64-softmmu J=14 NETWORK=1
=== TEST SCRIPT END ===

clang -iquote /tmp/qemu-test/build/tests -iquote tests -iquote 
/tmp/qemu-test/src/tcg -iquote /tmp/qemu-test/src/tcg/i386 
-I/tmp/qemu-test/src/linux-headers -I/tmp/qemu-test/build/linux-headers -iquote 
. -iquote /tmp/qemu-test/src -iquote /tmp/qemu-test/src/accel/tcg -iquote 
/tmp/qemu-test/src/include -I/usr/include/pixman-1  
-I/tmp/qemu-test/src/dtc/libfdt -Werror -DHAS_LIBSSH2_SFTP_FSYNC  -pthread 
-I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include  -fPIE -DPIE -m64 -mcx16 
-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes 
-Wredundant-decls -Wall -Wundef -Wwrite-strings -Wmissing-prototypes 
-fno-strict-aliasing -fno-common -fwrapv -std=gnu99  -Wno-string-plus-int 
-Wno-typedef-redefinition -Wno-initializer-overrides -Wexpansion-to-defined 
-Wendif-labels -Wno-shift-negative-value -Wno-missing-include-dirs -Wempty-body 
-Wnested-externs -Wformat-security -Wformat-y2k -Winit-self 
-Wignored-qualifiers -Wold-style-definition -Wtype-limits 
-fstack-protector-strong  -I/usr/include/p11-kit-1 -I/usr/include/libpng16  
-I/usr/include/spice-1 -I/usr/include/spice-server -I/usr/include/cacard 
-I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include -I/usr/include/nss3 
-I/usr/include/nspr4 -pthread -I/usr/include/libmount -I/usr/include/blkid 
-I/usr/include/uuid -I/usr/include/pixman-1   -I/tmp/qemu-test/src/tests -MMD 
-MP -MT tests/tpm-tis-swtpm-test.o -MF tests/tpm-tis-swtpm-test.d 
-fsanitize=undefined -fsanitize=address -g   -c -o tests/tpm-tis-swtpm-test.o 
/tmp/qemu-test/src/tests/tpm-tis-swtpm-test.c
clang -iquote /tmp/qemu-test/build/tests -iquote tests -iquote 
/tmp/qemu-test/src/tcg -iquote /tmp/qemu-test/src/tcg/i386 
-I/tmp/qemu-test/src/linux-headers -I/tmp/qemu-test/build/linux-headers -iquote 
. -iquote /tmp/qemu-test/src -iquote /tmp/qemu-test/src/accel/tcg -iquote 
/tmp/qemu-test/src/include -I/usr/include/pixman-1  
-I/tmp/qemu-test/src/dtc/libfdt -Werror -DHAS_LIBSSH2_SFTP_FSYNC  -pthread 
-I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include  -fPIE -DPIE -m64 -mcx16 
-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes 
-Wredundant-decls -Wall -Wundef -Wwrite-strings -Wmissing-prototypes 
-fno-strict-aliasing -fno-common -fwrapv -std=gnu99  -Wno-string-plus-int 
-Wno-typedef-redefinition -Wno-initializer-overrides -Wexpansion-to-defined 
-Wendif-labels -Wno-shift-negative-value -Wno-missing-include-dirs -Wempty-body 
-Wnested-externs -Wformat-security -Wformat-y2k -Winit-self 
-Wignored-qualifiers -Wold-style-definition -Wtype-limits 
-fstack-protector-strong  -I/usr/include/p11-kit-1 -I/usr/include/libpng16  
-I/usr/include/spice-1 -I/usr/include/spice-server -I/usr/include/cacard 
-I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include -I/usr/include/nss3 
-I/usr/include/nspr4 -pthread -I/usr/include/libmount -I/usr/include/blkid 
-I/usr/include/uuid -I/usr/include/pixman-1   -I/tmp/qemu-test/src/tests -MMD 
-MP -MT tests/tpm-tis-test.o -MF tests/tpm-tis-test.d -fsanitize=undefined 
-fsanitize=address -g   -c -o tests/tpm-tis-test.o 
/tmp/qemu-test/src/tests/tpm-tis-test.c
clang -iquote /tmp/qemu-test/build/. -iquote . -iquote /tmp/qemu-test/src/tcg 
-iquote /tmp/qemu-test/src/tcg/i386 -I/tmp/qemu-test/src/linux-headers 
-I/tmp/qemu-test/build/linux-headers -iquote . -iquote /tmp/qemu-test/src 
-iquote /tmp/qemu-test/src/accel/tcg -iquote /tmp/qemu-test/src/include 
-I/tmp/qemu-test/src/tests/fp 
-I/tmp/qemu-test/src/tests/fp/berkeley-softfloat-3/source/include 
-I/tmp/qemu-test/src/tests/fp/berkeley-softfloat-3/source/8086-SSE 
-I/tmp/qemu-test/src/tests/fp/berkeley-testfloat-3/source 
-I/usr/include/pixman-1 -I/tmp/qemu-test/src/dtc/libfdt -Werror 
-DHAS_LIBSSH2_SFTP_FSYNC -pthread -I/usr/include/glib-2.0 
-I/usr/lib64/glib-2.0/include -fPIE -DPIE -m64 -mcx16 -D_GNU_SOURCE 
-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes 
-Wredundant-decls -Wall -Wundef -Wwrite-strings -Wmissing-prototypes 
-fno-strict-aliasing -fno-common -fwrapv -std=gnu99 -Wno-string-plus-int 
-Wno-typedef-redefinition -Wno-initializer-overrides -Wexpansion-to-defined 
-Wendif-labels -Wno-shift-negative-value -Wno-missing-include-dirs -Wempty-body 
-Wnested-externs -Wformat-security -Wformat-y2k -Winit-self 
-Wignored-qualifiers -Wold-style-definition -Wtype-limits 
-fstack-protector-strong -I/usr/include/p11-kit-1 -I/usr/include/libpng16 
-I/usr/include/spice-1 -I/usr/include/spice-server -I/usr/include/cacard 
-I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include -I/usr/include/nss3 
-I/usr/include/nspr4 -pthread -I/usr/include/libmount -I/

Re: [Qemu-block] [SeaBIOS] [QEMU] [PATCH 0/8] Add Qemu to SeaBIOS LCHS interface

2019-06-12 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/20190612094237.47462-1-shmuel.eider...@oracle.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [SeaBIOS] [QEMU] [PATCH 0/8] Add Qemu to SeaBIOS LCHS interface
Type: series
Message-id: 20190612094237.47462-1-shmuel.eider...@oracle.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

From https://github.com/patchew-project/qemu
 * [new tag]   
patchew/20190612094237.47462-1-shmuel.eider...@oracle.com -> 
patchew/20190612094237.47462-1-shmuel.eider...@oracle.com
Switched to a new branch 'test'
a9a0cfc956 hd-geo-test: Add tests for lchs override
d1b86f9f47 bootdevice: FW_CFG interface for LCHS values
48dd083d65 bootdevice: Refactor get_boot_devices_list
f37e8f7c3c bootdevice: Gather LCHS from all relevant devices
8a7193c16b scsi: Propagate unrealize() callback to scsi-hd
c0a3a43fe2 bootdevice: Add interface to gather LCHS
9ebc9c1fd9 block: Support providing LCHS from user
6593400075 block: Refactor macros - fix tabbing

=== OUTPUT BEGIN ===
1/8 Checking commit 659340007537 (block: Refactor macros - fix tabbing)
ERROR: Macros with complex values should be enclosed in parenthesis
#55: FILE: include/hw/block/block.h:65:
+#define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)  \
+DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
+DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0),\
 DEFINE_PROP_UINT32("secs", _state, _conf.secs, 0)

total: 1 errors, 0 warnings, 37 lines checked

Patch 1/8 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

2/8 Checking commit 9ebc9c1fd9a2 (block: Support providing LCHS from user)
3/8 Checking commit c0a3a43fe22e (bootdevice: Add interface to gather LCHS)
4/8 Checking commit 8a7193c16bb9 (scsi: Propagate unrealize() callback to 
scsi-hd)
5/8 Checking commit f37e8f7c3cc6 (bootdevice: Gather LCHS from all relevant 
devices)
6/8 Checking commit 48dd083d65ed (bootdevice: Refactor get_boot_devices_list)
7/8 Checking commit d1b86f9f478c (bootdevice: FW_CFG interface for LCHS values)
8/8 Checking commit a9a0cfc956a7 (hd-geo-test: Add tests for lchs override)
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20190612094237.47462-1-shmuel.eider...@oracle.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [Qemu-block] [Qemu-devel] [PATCH v2 04/11] monitor: Create MonitorHMP with readline state

2019-06-12 Thread Kevin Wolf
Am 12.06.2019 um 11:07 hat Markus Armbruster geschrieben:
> Cc: Peter for a monitor I/O thread question.
> 
> Kevin Wolf  writes:
> 
> > The ReadLineState in Monitor is only used for HMP monitors. Create
> > MonitorHMP and move it there.
> >
> > Signed-off-by: Kevin Wolf 
> > Reviewed-by: Dr. David Alan Gilbert 

> > @@ -218,6 +210,17 @@ struct Monitor {
> >  int mux_out;
> >  };
> >  
> > +struct MonitorHMP {
> > +Monitor common;
> > +/*
> > + * State used only in the thread "owning" the monitor.
> > + * If @use_io_thread, this is @mon_iothread.
> > + * Else, it's the main thread.
> > + * These members can be safely accessed without locks.
> > + */
> > +ReadLineState *rs;
> > +};
> > +
> 
> Hmm.
> 
> The monitor I/O thread code makes an effort not to restrict I/O thread
> use to QMP, even though we only use it there.  Whether the code would
> actually work for HMP as well we don't know.
> 
> Readline was similar until your PATCH 02: the code made an effort not to
> restrict it to HMP, even though we only use it there.  Whether the code
> would actually work for QMP as well we don't know.
> 
> Should we stop pretending and hard-code "I/O thread only for QMP"?
> 
> If yes, the comment above gets simplified by the patch that hard-codes
> "I/O thread only for QMP".
> 
> If no, we should perhaps point out that we currently don't use an I/O
> thread with HMP.  The comment above seems like a good place for that.
> 
> Perhaps restricting readline to HMP should be a separate patch before
> PATCH 02.

Yes, possibly iothreads could be restricted to QMP. It doesn't help me
in splitting the monitor in any way, though, so I don't see it within
the scope of this series.

Keeping readline around for QMP, on the other hand, would probably have
been harder than making the restriction.

As for splitting patch 2, I don't think that reorganising a patch that
already does its job and already received review is the most productive
thing we could do, but if you insist on a separate patch, I can do that.

> > @@ -748,12 +754,13 @@ char *qmp_human_monitor_command(const char 
> > *command_line, bool has_cpu_index,
> >  int64_t cpu_index, Error **errp)
> >  {
> >  char *output = NULL;
> > -Monitor *old_mon, hmp;
> > +Monitor *old_mon;
> > +MonitorHMP hmp = {};
> 
> Any particular reason for adding the initializer?

Yes:

> >  
> > -monitor_data_init(&hmp, 0, true, false);
> > +monitor_data_init(&hmp.common, 0, true, false);

monitor_data_init() does a memset(), but only on hmp.common, so the
fields outside of hmp.common would remain uniniitialised. Specifically,
hmp.rs wouldn't be initialised to NULL and attempting to free it in the
end would crash.

> >  old_mon = cur_mon;
> > -cur_mon = &hmp;
> > +cur_mon = &hmp.common;
> >  
> >  if (has_cpu_index) {
> >  int ret = monitor_set_cpu(cpu_index);

> > @@ -1341,16 +1348,19 @@ static void hmp_info_sync_profile(Monitor *mon, 
> > const QDict *qdict)
> >  
> >  static void hmp_info_history(Monitor *mon, const QDict *qdict)
> >  {
> > +MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
> 
> Unchecked conversion.  Tolerable, I think, since HMP command handlers
> generally don't get invoked manually, unlike QMP command handlers.

I would like to see all HMP command handlers take MonitorHMP* instead of
Monitor*, but that would be a big ugly patch touching everything that
isn't really needed for the goal of this series, so I didn't include it.

If you consider it valuable to get rid of this container_of(), that's
probably the follow-up you could do.

> > @@ -4460,6 +4474,7 @@ static void monitor_qmp_event(void *opaque, int event)
> >  static void monitor_event(void *opaque, int event)
> >  {
> >  Monitor *mon = opaque;
> > +MonitorHMP *hmp_mon = container_of(cur_mon, MonitorHMP, common);
> 
> Any particular reason for changing from @opaque to @cur_mon?

Probably a copy & paste error, thanks for catching it! I'll fix it.

> > @@ -4662,11 +4679,11 @@ static void monitor_init_qmp(Chardev *chr, int 
> > flags)
> >  
> >  static void monitor_init_hmp(Chardev *chr, int flags)
> >  {
> > -Monitor *mon = g_malloc(sizeof(*mon));
> > +MonitorHMP *mon = g_malloc0(sizeof(*mon));
> 
> Any particular reason for changing to g_malloc0()?
> 
> You hid the same change for monitor_init_qmp() in PATCH 03, where I
> missed it until now.

As above, initialising the fields outside mon->common.

Kevin



Re: [Qemu-block] [Qemu-devel] [PATCH v2 04/11] monitor: Create MonitorHMP with readline state

2019-06-12 Thread Peter Xu
On Wed, Jun 12, 2019 at 11:07:01AM +0200, Markus Armbruster wrote:

[...]

> > +struct MonitorHMP {
> > +Monitor common;
> > +/*
> > + * State used only in the thread "owning" the monitor.
> > + * If @use_io_thread, this is @mon_iothread.
> > + * Else, it's the main thread.
> > + * These members can be safely accessed without locks.
> > + */
> > +ReadLineState *rs;
> > +};
> > +
> 
> Hmm.
> 
> The monitor I/O thread code makes an effort not to restrict I/O thread
> use to QMP, even though we only use it there.  Whether the code would
> actually work for HMP as well we don't know.
> 
> Readline was similar until your PATCH 02: the code made an effort not to
> restrict it to HMP, even though we only use it there.  Whether the code
> would actually work for QMP as well we don't know.
> 
> Should we stop pretending and hard-code "I/O thread only for QMP"?
> 
> If yes, the comment above gets simplified by the patch that hard-codes
> "I/O thread only for QMP".
> 
> If no, we should perhaps point out that we currently don't use an I/O
> thread with HMP.  The comment above seems like a good place for that.

Yes I agree on that if we're refactoring the comment then we can make
it more explicit here.  For my own preference, I would prefer the
latter one, even we can have a bigger comment above MonitorHMP
mentioning that it's only used in main thread so no lock is needed for
all the HMP only structs (until someone wants to hammer on HMP again).

Thanks,

-- 
Peter Xu



[Qemu-block] [QEMU] [PATCH 8/8] hd-geo-test: Add tests for lchs override

2019-06-12 Thread Sam Eiderman
Add QTest tests to check the logical geometry override option.

The tests in hd-geo-test are out of date - they only test IDE and do not
test interesting MBRs.

I added a few helper functions which will make adding more tests easier.

QTest's fw_cfg helper functions support only legacy fw_cfg, so I had to
read the new fw_cfg layout on my own.

Creating qcow2 disks with specific size and MBR layout is currently
unused - we only use a default empty MBR.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 tests/Makefile.include |   2 +-
 tests/hd-geo-test.c| 565 +
 2 files changed, 566 insertions(+), 1 deletion(-)

diff --git a/tests/Makefile.include b/tests/Makefile.include
index 46a36c2c95..55ea165ed4 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -765,7 +765,7 @@ tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y)
 tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y) 
qemu-img$(EXESUF)
 tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o
 tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o
-tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o
+tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o $(libqos-obj-y)
 tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y)
 tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y)
 tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \
diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c
index 62eb624726..16e7447f75 100644
--- a/tests/hd-geo-test.c
+++ b/tests/hd-geo-test.c
@@ -17,7 +17,11 @@
 
 #include "qemu/osdep.h"
 #include "qemu-common.h"
+#include "qemu/bswap.h"
+#include "qapi/qmp/qlist.h"
 #include "libqtest.h"
+#include "libqos/fw_cfg.h"
+#include "standard-headers/linux/qemu_fw_cfg.h"
 
 #define ARGV_SIZE 256
 
@@ -388,6 +392,557 @@ static void test_ide_drive_cd_0(void)
 qtest_quit(qts);
 }
 
+typedef struct {
+bool active;
+uint32_t head;
+uint32_t sector;
+uint32_t cyl;
+uint32_t end_head;
+uint32_t end_sector;
+uint32_t end_cyl;
+uint32_t start_sect;
+uint32_t nr_sects;
+} MBRpartitions[4];
+
+static MBRpartitions empty_mbr = { {false, 0, 0, 0, 0, 0, 0, 0, 0},
+   {false, 0, 0, 0, 0, 0, 0, 0, 0},
+   {false, 0, 0, 0, 0, 0, 0, 0, 0},
+   {false, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+static char *create_qcow2_with_mbr(MBRpartitions mbr, uint64_t sectors)
+{
+const char *template = "/tmp/qtest.XX";
+char *raw_path = strdup(template);
+char *qcow2_path = strdup(template);
+char cmd[100 + 2 * PATH_MAX];
+uint8_t buf[512];
+int i, ret, fd, offset;
+uint64_t qcow2_size = sectors * 512;
+uint8_t status, parttype, head, sector, cyl;
+
+offset = 0xbe;
+
+for (i = 0; i < 4; i++) {
+status = mbr[i].active ? 0x80 : 0x00;
+g_assert(mbr[i].head < 256);
+g_assert(mbr[i].sector < 64);
+g_assert(mbr[i].cyl < 1024);
+head = mbr[i].head;
+sector = mbr[i].sector + ((mbr[i].cyl & 0x300) >> 2);
+cyl = mbr[i].cyl & 0xff;
+
+buf[offset + 0x0] = status;
+buf[offset + 0x1] = head;
+buf[offset + 0x2] = sector;
+buf[offset + 0x3] = cyl;
+
+parttype = 0;
+g_assert(mbr[i].end_head < 256);
+g_assert(mbr[i].end_sector < 64);
+g_assert(mbr[i].end_cyl < 1024);
+head = mbr[i].end_head;
+sector = mbr[i].end_sector + ((mbr[i].end_cyl & 0x300) >> 2);
+cyl = mbr[i].end_cyl & 0xff;
+
+buf[offset + 0x4] = parttype;
+buf[offset + 0x5] = head;
+buf[offset + 0x6] = sector;
+buf[offset + 0x7] = cyl;
+
+(*(uint32_t *)&buf[offset + 0x8]) = cpu_to_le32(mbr[i].start_sect);
+(*(uint32_t *)&buf[offset + 0xc]) = cpu_to_le32(mbr[i].nr_sects);
+
+offset += 0x10;
+}
+
+fd = mkstemp(raw_path);
+g_assert(fd);
+close(fd);
+
+fd = open(raw_path, O_WRONLY);
+g_assert(fd >= 0);
+ret = write(fd, buf, sizeof(buf));
+g_assert(ret == sizeof(buf));
+close(fd);
+
+fd = mkstemp(qcow2_path);
+g_assert(fd);
+close(fd);
+
+ret = snprintf(cmd, sizeof(cmd),
+   "$QTEST_QEMU_IMG convert -f raw -O qcow2 %s %s > /dev/null",
+   raw_path, qcow2_path);
+g_assert((0 < ret) && (ret <= sizeof(cmd)));
+ret = system(cmd);
+g_assert(ret == 0);
+
+ret = snprintf(cmd, sizeof(cmd),
+   "$QTEST_QEMU_IMG resize %s %" PRIu64 " > /dev/null",
+   qcow2_path, qcow2_size);
+g_assert((0 < ret) && (ret <= sizeof(cmd)));
+ret = system(cmd);
+g_assert(ret == 0);
+
+unlink(raw_path);
+free(raw_path);
+
+return qcow2_path;
+}
+
+struct QemuCfgFile {
+uint32_t  size;/* file size */
+uint16_t  select;  /* write this to 0x510 to read it */
+uint16_t  reserv

[Qemu-block] [QEMU] [PATCH 7/8] bootdevice: FW_CFG interface for LCHS values

2019-06-12 Thread Sam Eiderman
Using fw_cfg, supply logical CHS values directly from QEMU to the BIOS.

Non-standard logical geometries break under QEMU.

A virtual disk which contains an operating system which depends on
logical geometries (consistent values being reported from BIOS INT13
AH=08) will most likely break under QEMU/SeaBIOS if it has non-standard
logical geometries - for example 56 SPT (sectors per track).
No matter what QEMU will report - SeaBIOS, for large enough disks - will
use LBA translation, which will report 63 SPT instead.

In addition we cannot force SeaBIOS to rely on physical geometries at
all. A virtio-blk-pci virtual disk with 255 phyiscal heads cannot
report more than 16 physical heads when moved to an IDE controller,
since the ATA spec allows a maximum of 16 heads - this is an artifact of
virtualization.

By supplying the logical geometries directly we are able to support such
"exotic" disks.

We serialize this information in a similar way to the "bootorder"
interface.
The fw_cfg entry is "bootdevices" and it serializes a struct.
At the moment the struct holds the values of logical CHS values but it
can be expanded easily due to the extendable ABI implemented.

(In the future, we can pass the bootindex through "bootdevices" instead
"bootorder" - unifying all bootdevice information in one fw_cfg value)

The PV interface through fw_cfg could have also been implemented using
device specific keys, e.g.: "/etc/bootdevice/%s/logical_geometry" where
%s is the device name QEMU produces - but this implementation would
require much more code refactoring, both in QEMU and SeaBIOS, so the
current implementation was chosen.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 bootdevice.c| 42 ++
 hw/nvram/fw_cfg.c   | 14 +++---
 include/sysemu/sysemu.h |  1 +
 3 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/bootdevice.c b/bootdevice.c
index 2b12fb85a4..84c2a83f25 100644
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -405,3 +405,45 @@ void del_boot_device_lchs(DeviceState *dev, const char 
*suffix)
 }
 }
 }
+
+typedef struct QEMU_PACKED BootDeviceEntrySerialized {
+/* Do not change field order - add new fields below */
+uint32_t lcyls;
+uint32_t lheads;
+uint32_t lsecs;
+} BootDeviceEntrySerialized;
+
+/* Serialized as: struct size (4) + (device name\0 + device struct) x devices 
*/
+char *get_boot_devices_info(size_t *size)
+{
+FWLCHSEntry *i;
+BootDeviceEntrySerialized s;
+size_t total = 0;
+char *list = NULL;
+
+list = g_malloc0(sizeof(uint32_t));
+*((uint32_t *)list) = (uint32_t)sizeof(s);
+total = sizeof(uint32_t);
+
+QTAILQ_FOREACH(i, &fw_lchs, link) {
+char *bootpath;
+size_t len;
+
+bootpath = get_boot_device_path(i->dev, false, i->suffix);
+s.lcyls = i->lcyls;
+s.lheads = i->lheads;
+s.lsecs = i->lsecs;
+
+len = strlen(bootpath) + 1;
+list = g_realloc(list, total + len + sizeof(s));
+memcpy(&list[total], bootpath, len);
+memcpy(&list[total + len], &s, sizeof(s));
+total += len + sizeof(s);
+
+g_free(bootpath);
+}
+
+*size = total;
+
+return list;
+}
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 9f7b7789bc..008b21542f 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -916,13 +916,21 @@ void *fw_cfg_modify_file(FWCfgState *s, const char 
*filename,
 
 static void fw_cfg_machine_reset(void *opaque)
 {
+MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+FWCfgState *s = opaque;
 void *ptr;
 size_t len;
-FWCfgState *s = opaque;
-char *bootindex = get_boot_devices_list(&len);
+char *buf;
 
-ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)bootindex, len);
+buf = get_boot_devices_list(&len);
+ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)buf, len);
 g_free(ptr);
+
+if (!mc->legacy_fw_cfg_order) {
+buf = get_boot_devices_info(&len);
+ptr = fw_cfg_modify_file(s, "bootdevices", (uint8_t *)buf, len);
+g_free(ptr);
+}
 }
 
 static void fw_cfg_machine_ready(struct Notifier *n, void *data)
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 173dfbb539..f0552006f4 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -174,6 +174,7 @@ void validate_bootdevices(const char *devices, Error 
**errp);
 void add_boot_device_lchs(DeviceState *dev, const char *suffix,
   uint32_t lcyls, uint32_t lheads, uint32_t lsecs);
 void del_boot_device_lchs(DeviceState *dev, const char *suffix);
+char *get_boot_devices_info(size_t *size);
 
 /* handler to set the boot_device order for a specific type of MachineClass */
 typedef void QEMUBootSetHandler(void *opaque, const char *boot_order,
-- 
2.13.3




[Qemu-block] [QEMU] [PATCH 4/8] scsi: Propagate unrealize() callback to scsi-hd

2019-06-12 Thread Sam Eiderman
We will need to add LCHS removal logic to scsi-hd's unrealize() in the
next commit.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 hw/scsi/scsi-bus.c | 15 +++
 include/hw/scsi/scsi.h |  1 +
 2 files changed, 16 insertions(+)

diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index c480553083..f6fe497a1a 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -55,6 +55,14 @@ static void scsi_device_realize(SCSIDevice *s, Error **errp)
 }
 }
 
+static void scsi_device_unrealize(SCSIDevice *s, Error **errp)
+{
+SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s);
+if (sc->unrealize) {
+sc->unrealize(s, errp);
+}
+}
+
 int scsi_bus_parse_cdb(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf,
void *hba_private)
 {
@@ -213,11 +221,18 @@ static void scsi_qdev_realize(DeviceState *qdev, Error 
**errp)
 static void scsi_qdev_unrealize(DeviceState *qdev, Error **errp)
 {
 SCSIDevice *dev = SCSI_DEVICE(qdev);
+Error *local_err = NULL;
 
 if (dev->vmsentry) {
 qemu_del_vm_change_state_handler(dev->vmsentry);
 }
 
+scsi_device_unrealize(dev, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+
 scsi_device_purge_requests(dev, SENSE_CODE(NO_SENSE));
 blockdev_mark_auto_del(dev->conf.blk);
 }
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index 426566a5c6..8cf71f910d 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -59,6 +59,7 @@ struct SCSIRequest {
 typedef struct SCSIDeviceClass {
 DeviceClass parent_class;
 void (*realize)(SCSIDevice *dev, Error **errp);
+void (*unrealize)(SCSIDevice *dev, Error **errp);
 int (*parse_cdb)(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf,
  void *hba_private);
 SCSIRequest *(*alloc_req)(SCSIDevice *s, uint32_t tag, uint32_t lun,
-- 
2.13.3




[Qemu-block] [QEMU] [PATCH 6/8] bootdevice: Refactor get_boot_devices_list

2019-06-12 Thread Sam Eiderman
Move device name construction to a separate function.

We will reuse this function in the following commit to pass logical CHS
parameters through fw_cfg much like we currently pass bootindex.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 bootdevice.c | 61 +---
 1 file changed, 34 insertions(+), 27 deletions(-)

diff --git a/bootdevice.c b/bootdevice.c
index bc5e1c2de4..2b12fb85a4 100644
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -202,6 +202,39 @@ DeviceState *get_boot_device(uint32_t position)
 return res;
 }
 
+static char *get_boot_device_path(DeviceState *dev, bool ignore_suffixes,
+  char *suffix)
+{
+char *devpath = NULL, *s = NULL, *d, *bootpath;
+
+if (dev) {
+devpath = qdev_get_fw_dev_path(dev);
+assert(devpath);
+}
+
+if (!ignore_suffixes) {
+if (dev) {
+d = qdev_get_own_fw_dev_path_from_handler(dev->parent_bus, dev);
+if (d) {
+assert(!suffix);
+s = d;
+} else {
+s = g_strdup(suffix);
+}
+} else {
+s = g_strdup(suffix);
+}
+}
+
+bootpath = g_strdup_printf("%s%s",
+   devpath ? devpath : "",
+   s ? s : "");
+g_free(devpath);
+g_free(s);
+
+return bootpath;
+}
+
 /*
  * This function returns null terminated string that consist of new line
  * separated device paths.
@@ -218,36 +251,10 @@ char *get_boot_devices_list(size_t *size)
 bool ignore_suffixes = mc->ignore_boot_device_suffixes;
 
 QTAILQ_FOREACH(i, &fw_boot_order, link) {
-char *devpath = NULL,  *suffix = NULL;
 char *bootpath;
-char *d;
 size_t len;
 
-if (i->dev) {
-devpath = qdev_get_fw_dev_path(i->dev);
-assert(devpath);
-}
-
-if (!ignore_suffixes) {
-if (i->dev) {
-d = qdev_get_own_fw_dev_path_from_handler(i->dev->parent_bus,
-  i->dev);
-if (d) {
-assert(!i->suffix);
-suffix = d;
-} else {
-suffix = g_strdup(i->suffix);
-}
-} else {
-suffix = g_strdup(i->suffix);
-}
-}
-
-bootpath = g_strdup_printf("%s%s",
-   devpath ? devpath : "",
-   suffix ? suffix : "");
-g_free(devpath);
-g_free(suffix);
+bootpath = get_boot_device_path(i->dev, ignore_suffixes, i->suffix);
 
 if (total) {
 list[total-1] = '\n';
-- 
2.13.3




  1   2   >