date:20181206

Re: [Qemu-block] [Qemu-devel] [PATCH v3 2/4] vmdk: Implement .bdrv_co_create callback

2018-12-06 Thread Markus Armbruster

This is a reasonably careful review of the QAPI-related parts, but more
of an eye-over for the remainder.

Kevin Wolf  writes:

> From: Fam Zheng 
>
> This makes VMDK support blockdev-create. The implementation reuses the
> image creation code in vmdk_co_create_opts which now acceptes a callback
> pointer to "retrieve" BlockBackend pointers from the caller. This way we
> separate the logic between file/extent acquisition and initialization.
>
> The QAPI command parameters are mostly the same as the old create_opts
> except the dropped legacy @compat6 switch, which is redundant with
> @hwversion.
>
> Signed-off-by: Fam Zheng 
> Signed-off-by: Kevin Wolf 
> ---
>  qapi/block-core.json  |  70 +++
>  qapi/qapi-schema.json |   1 +
>  block/vmdk.c  | 452 ++
>  3 files changed, 396 insertions(+), 127 deletions(-)
>
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index d4fe710836..4778f88dd8 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -4021,6 +4021,75 @@
>  'size': 'size',
>  '*cluster-size' :   'size' } }
>  
> +##
> +# @BlockdevVmdkSubformat:
> +#
> +# Subformat options for VMDK images
> +#
> +# @monolithicSparse: Single file image with sparse cluster allocation
> +#
> +# @monolithicFlat:   Single flat data image and a descriptor file
> +#
> +# @twoGbMaxExtentSparse: Data is split into 2GB (per virtual LBA) sparse 
> extent
> +#files, in addition to a descriptor file
> +#
> +# @twoGbMaxExtentFlat:   Data is split into 2GB (per virtual LBA) flat extent
> +#files, in addition to a descriptor file
> +#
> +# @streamOptimized:  Single file image sparse cluster allocation, 
> optimized
> +#for streaming over network.
> +#
> +# Since: 4.0
> +##
> +{ 'enum': 'BlockdevVmdkSubformat',
> +  'data': [ 'monolithicSparse', 'monolithicFlat', 'twoGbMaxExtentSparse',
> +'twoGbMaxExtentFlat', 'streamOptimized'] }

Don't conform to the QAPI rules on names "to match VMDK spec spellings"
(see qapi-schema.json below).  We discussed this in review of v1.

PRO: matches the VMDK spec (whatever that's worth), keeps the code
simple.

CON: the non-conforming names become part of the stable QMP interface,
in the argument of command blockdev-create.

I don't like the CON, but I'm willing to tolerate it in the name of
simplicity.

> +
> +##
> +# @BlockdevVmdkAdapterType:
> +#
> +# Adapter type info for VMDK images
> +#
> +# Since: 4.0
> +##
> +{ 'enum': 'BlockdevVmdkAdapterType',
> +  'data': [ 'ide', 'buslogic', 'lsilogic', 'legacyesx'] }

May I have hyphens in the composite nouns?  Hmm, might be the way they
are to match VMDK spec spellings or for backward compatibility.  I guess
we'll see below.

> +
> +##
> +# @BlockdevCreateOptionsVmdk:
> +#
> +# Driver specific image creation options for VMDK.
> +#
> +# @file Where to store the new image file. This refers to the image
> +#   file for monolithcSparse and streamOptimized format, or the
> +#   descriptor file for other formats.
> +# @size Size of the virtual disk in bytes
> +# @extents  Where to store the data extents. Required for monolithcflat,
> +#   twoGbMaxExtentSparse and twoGbMaxExtentFlat formats. For
> +#   monolithicflat, only one entry is required; for

monolithicFlat

> +#   twoGbMaxExtent* formats, the number of entries required is
> +#   calculated as extent_number = virtual_size / 2GB.

Is it okay to supply more entries than required, or do I have to supply
exactly the right number?

> +# @subformatThe subformat of the VMDK image. Default: "monolithicsparse".

monolithicSparse

> +# @backing-file The path of backing file. Default: no backing file is used.
> +# @adapter-type The adapter type used to fill in the descriptor. Default: 
> ide.
> +# @hwversionHardware version. The meaningful options are "4" or "6".
> +#   Defaulted to "4".

Default: "4"

> +# @zeroed-grain Whether to enable zeroed-grain feature for sparse subformats.
> +#   Default: false.
> +#
> +# Since: 4.0
> +##
> +{ 'struct': 'BlockdevCreateOptionsVmdk',
> +  'data': { 'file': 'BlockdevRef',
> +'size': 'size',
> +'*extents':  ['BlockdevRef'],
> +'*subformat':   'BlockdevVmdkSubformat',
> +'*backing-file':'str',
> +'*adapter-type':'BlockdevVmdkAdapterType',
> +'*hwversion':   'str',
> +'*zeroed-grain':'bool' } }

@zeroed-grain is undocumented.

> +
> +
>  ##
>  # @SheepdogRedundancyType:
>  #
> @@ -4215,6 +4284,7 @@
>'ssh':'BlockdevCreateOptionsSsh',
>'vdi':'BlockdevCreateOptionsVdi',
>'vhdx':   'BlockdevCreateOptionsVhdx',
> +  'vmdk':   'BlockdevCreateOptionsVmdk',
>

Re: [Qemu-block] [Qemu-devel] [PATCH v3 1/4] vmdk: Refactor vmdk_create_extent

2018-12-06 Thread Markus Armbruster

Kevin Wolf  writes:

> From: Fam Zheng 
>
> The extracted vmdk_init_extent takes a BlockBackend object and
> initializes the format metadata. It is the common part between "qemu-img
> create" and "blockdev-create".
>
> Add a "BlockBackend *pbb" parameter to vmdk_create_extent, to return the
> opened BB to the caller in the next patch.

I'd do this part in the next patch, when it has a user.  Matter of
taste.

>
> Signed-off-by: Fam Zheng 
> Signed-off-by: Kevin Wolf 
> ---
>  block/vmdk.c | 69 
>  1 file changed, 43 insertions(+), 26 deletions(-)
>
> diff --git a/block/vmdk.c b/block/vmdk.c
> index 2c9e86d98f..32fc2c84b3 100644
> --- a/block/vmdk.c
> +++ b/block/vmdk.c
> @@ -1741,35 +1741,17 @@ static int coroutine_fn 
> vmdk_co_pwrite_zeroes(BlockDriverState *bs,
>  return ret;
>  }
>  
> -static int vmdk_create_extent(const char *filename, int64_t filesize,
> -  bool flat, bool compress, bool zeroed_grain,
> -  QemuOpts *opts, Error **errp)
> +static int vmdk_init_extent(BlockBackend *blk,
> +int64_t filesize, bool flat,
> +bool compress, bool zeroed_grain,
> +Error **errp)
>  {
>  int ret, i;
> -BlockBackend *blk = NULL;
>  VMDK4Header header;
> -Error *local_err = NULL;
>  uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
>  uint32_t *gd_buf = NULL;
>  int gd_buf_size;
>  
> -ret = bdrv_create_file(filename, opts, &local_err);
> -if (ret < 0) {
> -error_propagate(errp, local_err);
> -goto exit;
> -}
> -
> -blk = blk_new_open(filename, NULL, NULL,
> -   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
> -   &local_err);
> -if (blk == NULL) {
> -error_propagate(errp, local_err);
> -ret = -EIO;
> -goto exit;
> -}
> -
> -blk_set_allow_write_beyond_eof(blk, true);
> -
>  if (flat) {
>  ret = blk_truncate(blk, filesize, PREALLOC_MODE_OFF, errp);
>  goto exit;
> @@ -1863,15 +1845,50 @@ static int vmdk_create_extent(const char *filename, 
> int64_t filesize,
>   gd_buf, gd_buf_size, 0);
>  if (ret < 0) {
>  error_setg(errp, QERR_IO_ERROR);
> -goto exit;
>  }
>  
>  ret = 0;
> +exit:
> +g_free(gd_buf);
> +return ret;
> +}
> +
> +static int vmdk_create_extent(const char *filename, int64_t filesize,
> +  bool flat, bool compress, bool zeroed_grain,
> +  BlockBackend **pbb,
> +  QemuOpts *opts, Error **errp)
> +{
> +int ret;
> +BlockBackend *blk = NULL;
> +Error *local_err = NULL;
> +
> +ret = bdrv_create_file(filename, opts, &local_err);
> +if (ret < 0) {
> +error_propagate(errp, local_err);
> +goto exit;
> +}
> +
> +blk = blk_new_open(filename, NULL, NULL,
> +   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
> +   &local_err);
> +if (blk == NULL) {
> +error_propagate(errp, local_err);
> +ret = -EIO;
> +goto exit;
> +}
> +
> +blk_set_allow_write_beyond_eof(blk, true);
> +
> +ret = vmdk_init_extent(blk, filesize, flat, compress, zeroed_grain, 
> errp);
>  exit:
>  if (blk) {
> -blk_unref(blk);
> +if (pbb) {
> +*pbb = blk;
> +} else {
> +blk_unref(blk);
> +blk = NULL;

Dead assignment.  Matter of taste.

> +}
>  }
> -g_free(gd_buf);
>  return ret;
>  }
>  
> @@ -2094,7 +2111,7 @@ static int coroutine_fn vmdk_co_create_opts(const char 
> *filename, QemuOpts *opts
>  snprintf(ext_filename, PATH_MAX, "%s%s", path, desc_filename);
>  
>  if (vmdk_create_extent(ext_filename, size,
> -   flat, compress, zeroed_grain, opts, errp)) {
> +   flat, compress, zeroed_grain, NULL, opts, 
> errp)) {
>  ret = -EINVAL;
>  goto exit;
>  }

Reviewed-by: Markus Armbruster

Re: [Qemu-block] [PATCH] qemu: avoid memory leak while remove disk

2018-12-06 Thread Michael S. Tsirkin

On Fri, Dec 07, 2018 at 09:53:06AM +0800, wangjian wrote:
> Memset vhost_dev to zero in the vhost_dev_cleanup function.
> This causes dev.vqs to be NULL, so that
> vqs does not free up space when calling the g_free function.
> This will result in a memory leak. But you can't release vqs
> directly in the vhost_dev_cleanup function, because vhost_net
> will also call this function, and vhost_net's vqs is assigned by array.
> In order to solve this problem, we first save the pointer of vqs,
> and release the space of vqs after vhost_dev_cleanup is called.
> 
> Signed-off-by: Jian Wang 

Thanks!
This will be needed upstream and in the stable branch I think.

> ---
>  hw/block/vhost-user-blk.c | 7 +--
>  hw/scsi/vhost-scsi.c  | 3 ++-
>  hw/scsi/vhost-user-scsi.c | 3 ++-
>  3 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index 1451940..c3af28f 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -250,6 +250,7 @@ static void vhost_user_blk_device_realize(DeviceState 
> *dev, Error **errp)
>  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>  VHostUserBlk *s = VHOST_USER_BLK(vdev);
>  VhostUserState *user;
> +struct vhost_virtqueue *vqs = NULL;
>  int i, ret;
> 
>  if (!s->chardev.chr) {
> @@ -288,6 +289,7 @@ static void vhost_user_blk_device_realize(DeviceState 
> *dev, Error **errp)
>  s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
>  s->dev.vq_index = 0;
>  s->dev.backend_features = 0;
> +vqs = s->dev.vqs;
> 
>  vhost_dev_set_config_notifier(&s->dev, &blk_ops);
> 
> @@ -314,7 +316,7 @@ static void vhost_user_blk_device_realize(DeviceState 
> *dev, Error **errp)
>  vhost_err:
>  vhost_dev_cleanup(&s->dev);
>  virtio_err:
> -g_free(s->dev.vqs);
> +g_free(vqs);
>  virtio_cleanup(vdev);
> 
>  vhost_user_cleanup(user);
> @@ -326,10 +328,11 @@ static void vhost_user_blk_device_unrealize(DeviceState 
> *dev, Error **errp)
>  {
>  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>  VHostUserBlk *s = VHOST_USER_BLK(dev);
> +struct vhost_virtqueue *vqs = s->dev.vqs;
> 
>  vhost_user_blk_set_status(vdev, 0);
>  vhost_dev_cleanup(&s->dev);
> -g_free(s->dev.vqs);
> +g_free(vqs);
>  virtio_cleanup(vdev);
> 
>  if (s->vhost_user) {
> diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
> index 7f21b4f..61e2e57 100644
> --- a/hw/scsi/vhost-scsi.c
> +++ b/hw/scsi/vhost-scsi.c
> @@ -215,6 +215,7 @@ static void vhost_scsi_unrealize(DeviceState *dev, Error 
> **errp)
>  {
>  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>  VHostSCSICommon *vsc = VHOST_SCSI_COMMON(dev);
> +struct vhost_virtqueue *vqs = vsc->dev.vqs;
> 
>  migrate_del_blocker(vsc->migration_blocker);
>  error_free(vsc->migration_blocker);
> @@ -223,7 +224,7 @@ static void vhost_scsi_unrealize(DeviceState *dev, Error 
> **errp)
>  vhost_scsi_set_status(vdev, 0);
> 
>  vhost_dev_cleanup(&vsc->dev);
> -g_free(vsc->dev.vqs);
> +g_free(vqs);
> 
>  virtio_scsi_common_unrealize(dev, errp);
>  }
> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
> index 2e1ba4a..6728878 100644
> --- a/hw/scsi/vhost-user-scsi.c
> +++ b/hw/scsi/vhost-user-scsi.c
> @@ -121,12 +121,13 @@ static void vhost_user_scsi_unrealize(DeviceState *dev, 
> Error **errp)
>  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>  VHostUserSCSI *s = VHOST_USER_SCSI(dev);
>  VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> +struct vhost_virtqueue *vqs = vsc->dev.vqs;
> 
>  /* This will stop the vhost backend. */
>  vhost_user_scsi_set_status(vdev, 0);
> 
>  vhost_dev_cleanup(&vsc->dev);
> -g_free(vsc->dev.vqs);
> +g_free(vqs);
> 
>  virtio_scsi_common_unrealize(dev, errp);
> 
> --
> 1.8.3.1
>

[Qemu-block] [PATCH] qemu: avoid memory leak while remove disk

2018-12-06 Thread wangjian


Memset vhost_dev to zero in the vhost_dev_cleanup function.
This causes dev.vqs to be NULL, so that
vqs does not free up space when calling the g_free function.
This will result in a memory leak. But you can't release vqs
directly in the vhost_dev_cleanup function, because vhost_net
will also call this function, and vhost_net's vqs is assigned by array.
In order to solve this problem, we first save the pointer of vqs,
and release the space of vqs after vhost_dev_cleanup is called.

Signed-off-by: Jian Wang 
---
 hw/block/vhost-user-blk.c | 7 +--
 hw/scsi/vhost-scsi.c  | 3 ++-
 hw/scsi/vhost-user-scsi.c | 3 ++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 1451940..c3af28f 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -250,6 +250,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, 
Error **errp)
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 VHostUserBlk *s = VHOST_USER_BLK(vdev);
 VhostUserState *user;
+struct vhost_virtqueue *vqs = NULL;
 int i, ret;
 
 if (!s->chardev.chr) {

@@ -288,6 +289,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, 
Error **errp)
 s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
 s->dev.vq_index = 0;
 s->dev.backend_features = 0;
+vqs = s->dev.vqs;
 
 vhost_dev_set_config_notifier(&s->dev, &blk_ops);
 
@@ -314,7 +316,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)

 vhost_err:
 vhost_dev_cleanup(&s->dev);
 virtio_err:
-g_free(s->dev.vqs);
+g_free(vqs);
 virtio_cleanup(vdev);
 
 vhost_user_cleanup(user);

@@ -326,10 +328,11 @@ static void vhost_user_blk_device_unrealize(DeviceState 
*dev, Error **errp)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 VHostUserBlk *s = VHOST_USER_BLK(dev);
+struct vhost_virtqueue *vqs = s->dev.vqs;
 
 vhost_user_blk_set_status(vdev, 0);

 vhost_dev_cleanup(&s->dev);
-g_free(s->dev.vqs);
+g_free(vqs);
 virtio_cleanup(vdev);
 
 if (s->vhost_user) {

diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 7f21b4f..61e2e57 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -215,6 +215,7 @@ static void vhost_scsi_unrealize(DeviceState *dev, Error 
**errp)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 VHostSCSICommon *vsc = VHOST_SCSI_COMMON(dev);
+struct vhost_virtqueue *vqs = vsc->dev.vqs;
 
 migrate_del_blocker(vsc->migration_blocker);

 error_free(vsc->migration_blocker);
@@ -223,7 +224,7 @@ static void vhost_scsi_unrealize(DeviceState *dev, Error 
**errp)
 vhost_scsi_set_status(vdev, 0);
 
 vhost_dev_cleanup(&vsc->dev);

-g_free(vsc->dev.vqs);
+g_free(vqs);
 
 virtio_scsi_common_unrealize(dev, errp);

 }
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index 2e1ba4a..6728878 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -121,12 +121,13 @@ static void vhost_user_scsi_unrealize(DeviceState *dev, 
Error **errp)
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 VHostUserSCSI *s = VHOST_USER_SCSI(dev);
 VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
+struct vhost_virtqueue *vqs = vsc->dev.vqs;
 
 /* This will stop the vhost backend. */

 vhost_user_scsi_set_status(vdev, 0);
 
 vhost_dev_cleanup(&vsc->dev);

-g_free(vsc->dev.vqs);
+g_free(vqs);
 
 virtio_scsi_common_unrealize(dev, errp);
 
--

1.8.3.1

Re: [Qemu-block] [Qemu-devel] [PATCH 0/3] bitmaps: remove x- prefix from QMP api

2018-12-06 Thread no-reply

Patchew URL: https://patchew.org/QEMU/20181206192544.3987-1-js...@redhat.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Subject: [Qemu-devel] [PATCH 0/3] bitmaps: remove x- prefix from QMP api
Message-id: 20181206192544.3987-1-js...@redhat.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
c972673 block: remove 'x' prefix from experimental bitmap APIs
ebd1e6c blockdev: n-ary bitmap merge
68d3c0c blockdev: abort transactions in reverse order

=== OUTPUT BEGIN ===
Checking PATCH 1/3: blockdev: abort transactions in reverse order...
Checking PATCH 2/3: blockdev: n-ary bitmap merge...
ERROR: externs should be avoided in .c files
#23: FILE: blockdev.c:2125:
+void do_block_dirty_bitmap_merge(const char *node, const char *target,

total: 1 errors, 0 warnings, 147 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 3/3: block: remove 'x' prefix from experimental bitmap APIs...
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20181206192544.3987-1-js...@redhat.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [Qemu-block] [PATCH 1/3] blockdev: abort transactions in reverse order

2018-12-06 Thread John Snow

On 12/6/18 3:37 PM, Eric Blake wrote:
> On 12/6/18 1:25 PM, John Snow wrote:
>> Presently, we abort transactions in the same order they were processed
>> in.
>> Bitmap commands, though, attempt to restore backup data structures on
>> abort.
>> To that end, though, they need to be aborted in reverse chronological
>> order.
>>
>> Replace the QSIMPLEQ data structure with a QTAILQ one, so we can iterate
>> in reverse for the abort phase of the transaction.
>>
>> Signed-off-by: John Snow 
>> ---
>>   blockdev.c | 14 +++---
>>   1 file changed, 7 insertions(+), 7 deletions(-)
> 
> Does this need to cc qemu-stable? I'm trying to figure out if it affects
> any of the transactions issued by my libvirt code demo'd at KVM Forum.
> 
> Reviewed-by: Eric Blake 
> 

Hmm, possibly. merge is of course an x-prefix, but "clear" uses the same
restoration technique. I suppose if you cleared the same bitmap multiple
times and tried to roll it back you could see trouble where we restore
an already cleared bitmap.

I'll say that patch 01/03 here should be considered a bugfix, yes, but I
wanted to see if anyone thought this had consequences I hadn't
considered yet.

--js

Re: [Qemu-block] [Qemu-devel] [PATCH 0/3] bitmaps: remove x- prefix from QMP api

2018-12-06 Thread John Snow




On 12/6/18 2:25 PM, John Snow wrote:
> Touch up a few last things and remove the x- prefix.
> 
> Test on the way, thoughts?
> 
> John Snow (3):
>   blockdev: abort transactions in reverse order
>   blockdev: n-ary bitmap merge
>   block: remove 'x' prefix from experimental bitmap APIs
> 
>  blockdev.c | 96 +-
>  qapi/block-core.json   | 56 
>  qapi/transaction.json  | 12 +++---
>  tests/qemu-iotests/223 |  4 +-
>  4 files changed, 94 insertions(+), 74 deletions(-)
> 

Sorry, at some point I bookmarked a typo for Vladimir's email :(

Correcting his address here in the cover letter.

Re: [Qemu-block] [PATCH 1/3] blockdev: abort transactions in reverse order

2018-12-06 Thread Eric Blake


On 12/6/18 1:25 PM, John Snow wrote:

Presently, we abort transactions in the same order they were processed in.
Bitmap commands, though, attempt to restore backup data structures on abort.
To that end, though, they need to be aborted in reverse chronological order.

Replace the QSIMPLEQ data structure with a QTAILQ one, so we can iterate
in reverse for the abort phase of the transaction.

Signed-off-by: John Snow 
---
  blockdev.c | 14 +++---
  1 file changed, 7 insertions(+), 7 deletions(-)


Does this need to cc qemu-stable? I'm trying to figure out if it affects 
any of the transactions issued by my libvirt code demo'd at KVM Forum.


Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-block] [Qemu-devel] [PATCH v3 0/4] vmdk: Implement blockdev-create

2018-12-06 Thread no-reply

Patchew URL: https://patchew.org/QEMU/20181206151304.8388-1-kw...@redhat.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Subject: [Qemu-devel] [PATCH v3 0/4] vmdk: Implement blockdev-create
Message-id: 20181206151304.8388-1-kw...@redhat.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
72ea84d iotests: Add VMDK tests for blockdev-create
e2fa3cf iotests: Filter cid numbers in VMDK extent info
fb76434 vmdk: Implement .bdrv_co_create callback
d16851a vmdk: Refactor vmdk_create_extent

=== OUTPUT BEGIN ===
Checking PATCH 1/4: vmdk: Refactor vmdk_create_extent...
WARNING: line over 80 characters
#124: FILE: block/vmdk.c:2114:
+   flat, compress, zeroed_grain, NULL, opts, 
errp)) {

total: 0 errors, 1 warnings, 104 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
Checking PATCH 2/4: vmdk: Implement .bdrv_co_create callback...
WARNING: line over 80 characters
#216: FILE: block/vmdk.c:2075:
+bdrv_get_full_backing_filename_from_filename(blk_bs(blk)->filename, 
backing_file,

WARNING: line over 80 characters
#371: FILE: block/vmdk.c:2174:
+bool flat, bool split, bool 
compress,

WARNING: line over 80 characters
#389: FILE: block/vmdk.c:2192:
+rel_filename = g_strdup_printf("%s-flat%s", data->prefix, 
data->postfix);

WARNING: line over 80 characters
#406: FILE: block/vmdk.c:2209:
+static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts 
*opts,

total: 0 errors, 4 warnings, 647 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
Checking PATCH 3/4: iotests: Filter cid numbers in VMDK extent info...
Checking PATCH 4/4: iotests: Add VMDK tests for blockdev-create...
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#11: 
new file mode 100755

ERROR: trailing whitespace
#250: FILE: tests/qemu-iotests/237.out:28:
+format: $

ERROR: trailing whitespace
#277: FILE: tests/qemu-iotests/237.out:55:
+format: $

ERROR: trailing whitespace
#304: FILE: tests/qemu-iotests/237.out:82:
+format: $

total: 3 errors, 1 warnings, 514 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20181206151304.8388-1-kw...@redhat.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[Qemu-block] [PATCH 0/3] bitmaps: remove x- prefix from QMP api

2018-12-06 Thread John Snow

Touch up a few last things and remove the x- prefix.

Test on the way, thoughts?

John Snow (3):
  blockdev: abort transactions in reverse order
  blockdev: n-ary bitmap merge
  block: remove 'x' prefix from experimental bitmap APIs

 blockdev.c | 96 +-
 qapi/block-core.json   | 56 
 qapi/transaction.json  | 12 +++---
 tests/qemu-iotests/223 |  4 +-
 4 files changed, 94 insertions(+), 74 deletions(-)

-- 
2.17.2

[Qemu-block] [PATCH 3/3] block: remove 'x' prefix from experimental bitmap APIs

2018-12-06 Thread John Snow

The 'x' prefix was added because we were uncertain of the direction we'd
take for the libvirt API. With the general approach solidified, I feel
comfortable committing to this API for 4.0.

Signed-off-by: John Snow 
---
 blockdev.c | 22 +++---
 qapi/block-core.json   | 34 +-
 qapi/transaction.json  | 12 ++--
 tests/qemu-iotests/223 |  4 ++--
 4 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 0f740fd964..da87aae5cf 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1966,7 +1966,7 @@ static void block_dirty_bitmap_add_prepare(BlkActionState 
*common,
action->has_granularity, action->granularity,
action->has_persistent, action->persistent,
action->has_autoload, action->autoload,
-   action->has_x_disabled, action->x_disabled,
+   action->has_disabled, action->disabled,
&local_err);
 
 if (!local_err) {
@@ -2051,7 +2051,7 @@ static void 
block_dirty_bitmap_enable_prepare(BlkActionState *common,
 return;
 }
 
-action = common->action->u.x_block_dirty_bitmap_enable.data;
+action = common->action->u.block_dirty_bitmap_enable.data;
 state->bitmap = block_dirty_bitmap_lookup(action->node,
   action->name,
   NULL,
@@ -2092,7 +2092,7 @@ static void 
block_dirty_bitmap_disable_prepare(BlkActionState *common,
 return;
 }
 
-action = common->action->u.x_block_dirty_bitmap_disable.data;
+action = common->action->u.block_dirty_bitmap_disable.data;
 state->bitmap = block_dirty_bitmap_lookup(action->node,
   action->name,
   NULL,
@@ -2137,7 +2137,7 @@ static void 
block_dirty_bitmap_merge_prepare(BlkActionState *common,
 return;
 }
 
-action = common->action->u.x_block_dirty_bitmap_merge.data;
+action = common->action->u.block_dirty_bitmap_merge.data;
 
 do_block_dirty_bitmap_merge(action->node, action->target,
 action->bitmaps, &state->backup,
@@ -2205,17 +2205,17 @@ static const BlkActionOps actions[] = {
 .commit = block_dirty_bitmap_free_backup,
 .abort = block_dirty_bitmap_restore,
 },
-[TRANSACTION_ACTION_KIND_X_BLOCK_DIRTY_BITMAP_ENABLE] = {
+[TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ENABLE] = {
 .instance_size = sizeof(BlockDirtyBitmapState),
 .prepare = block_dirty_bitmap_enable_prepare,
 .abort = block_dirty_bitmap_enable_abort,
 },
-[TRANSACTION_ACTION_KIND_X_BLOCK_DIRTY_BITMAP_DISABLE] = {
+[TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_DISABLE] = {
 .instance_size = sizeof(BlockDirtyBitmapState),
 .prepare = block_dirty_bitmap_disable_prepare,
 .abort = block_dirty_bitmap_disable_abort,
 },
-[TRANSACTION_ACTION_KIND_X_BLOCK_DIRTY_BITMAP_MERGE] = {
+[TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_MERGE] = {
 .instance_size = sizeof(BlockDirtyBitmapState),
 .prepare = block_dirty_bitmap_merge_prepare,
 .commit = block_dirty_bitmap_free_backup,
@@ -2931,7 +2931,7 @@ void qmp_block_dirty_bitmap_clear(const char *node, const 
char *name,
 bdrv_clear_dirty_bitmap(bitmap, NULL);
 }
 
-void qmp_x_block_dirty_bitmap_enable(const char *node, const char *name,
+void qmp_block_dirty_bitmap_enable(const char *node, const char *name,
Error **errp)
 {
 BlockDriverState *bs;
@@ -2952,7 +2952,7 @@ void qmp_x_block_dirty_bitmap_enable(const char *node, 
const char *name,
 bdrv_enable_dirty_bitmap(bitmap);
 }
 
-void qmp_x_block_dirty_bitmap_disable(const char *node, const char *name,
+void qmp_block_dirty_bitmap_disable(const char *node, const char *name,
 Error **errp)
 {
 BlockDriverState *bs;
@@ -3014,8 +3014,8 @@ void do_block_dirty_bitmap_merge(const char *node, const 
char *target,
 bdrv_release_dirty_bitmap(bs, anon);
 }
 
-void qmp_x_block_dirty_bitmap_merge(const char *node, const char *target,
-strList *bitmaps, Error **errp)
+void qmp_block_dirty_bitmap_merge(const char *node, const char *target,
+  strList *bitmaps, Error **errp)
 {
 do_block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp);
 }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 320d74ef34..fde96fdb50 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1803,15 +1803,15 @@
 #Currently, all dirty tracking bitmaps are loaded from Qcow2 on
 #open.
 #
-# @x-disabled: the bitmap is created in the disabled state, which means that
-#  it will not track

[Qemu-block] [PATCH 1/3] blockdev: abort transactions in reverse order

2018-12-06 Thread John Snow

Presently, we abort transactions in the same order they were processed in.
Bitmap commands, though, attempt to restore backup data structures on abort.
To that end, though, they need to be aborted in reverse chronological order.

Replace the QSIMPLEQ data structure with a QTAILQ one, so we can iterate
in reverse for the abort phase of the transaction.

Signed-off-by: John Snow 
---
 blockdev.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 81f95d920b..1ba706df8b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1341,7 +1341,7 @@ struct BlkActionState {
 const BlkActionOps *ops;
 JobTxn *block_job_txn;
 TransactionProperties *txn_props;
-QSIMPLEQ_ENTRY(BlkActionState) entry;
+QTAILQ_ENTRY(BlkActionState) entry;
 };
 
 /* internal snapshot private data */
@@ -2269,8 +2269,8 @@ void qmp_transaction(TransactionActionList *dev_list,
 BlkActionState *state, *next;
 Error *local_err = NULL;
 
-QSIMPLEQ_HEAD(snap_bdrv_states, BlkActionState) snap_bdrv_states;
-QSIMPLEQ_INIT(&snap_bdrv_states);
+QTAILQ_HEAD(snap_bdrv_states, BlkActionState) snap_bdrv_states;
+QTAILQ_INIT(&snap_bdrv_states);
 
 /* Does this transaction get canceled as a group on failure?
  * If not, we don't really need to make a JobTxn.
@@ -2301,7 +2301,7 @@ void qmp_transaction(TransactionActionList *dev_list,
 state->action = dev_info;
 state->block_job_txn = block_job_txn;
 state->txn_props = props;
-QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, state, entry);
+QTAILQ_INSERT_TAIL(&snap_bdrv_states, state, entry);
 
 state->ops->prepare(state, &local_err);
 if (local_err) {
@@ -2310,7 +2310,7 @@ void qmp_transaction(TransactionActionList *dev_list,
 }
 }
 
-QSIMPLEQ_FOREACH(state, &snap_bdrv_states, entry) {
+QTAILQ_FOREACH(state, &snap_bdrv_states, entry) {
 if (state->ops->commit) {
 state->ops->commit(state);
 }
@@ -2321,13 +2321,13 @@ void qmp_transaction(TransactionActionList *dev_list,
 
 delete_and_fail:
 /* failure, and it is all-or-none; roll back all operations */
-QSIMPLEQ_FOREACH(state, &snap_bdrv_states, entry) {
+QTAILQ_FOREACH_REVERSE(state, &snap_bdrv_states, snap_bdrv_states, entry) {
 if (state->ops->abort) {
 state->ops->abort(state);
 }
 }
 exit:
-QSIMPLEQ_FOREACH_SAFE(state, &snap_bdrv_states, entry, next) {
+QTAILQ_FOREACH_SAFE(state, &snap_bdrv_states, entry, next) {
 if (state->ops->clean) {
 state->ops->clean(state);
 }
-- 
2.17.2

[Qemu-block] [PATCH 2/3] blockdev: n-ary bitmap merge

2018-12-06 Thread John Snow

Especially outside of transactions, it is helpful to provide
all-or-nothing semantics for bitmap merges. This facilitates
the coalescing of multiple bitmaps into a single target for
the "checkpoint" interpretation when assembling bitmaps that
represent arbitrary points in time from component bitmaps.

Signed-off-by: John Snow 
---
 blockdev.c   | 64 +---
 qapi/block-core.json | 22 +++
 2 files changed, 53 insertions(+), 33 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 1ba706df8b..0f740fd964 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2122,33 +2122,26 @@ static void 
block_dirty_bitmap_disable_abort(BlkActionState *common)
 }
 }
 
+void do_block_dirty_bitmap_merge(const char *node, const char *target,
+ strList *bitmaps, HBitmap **backup,
+ Error **errp);
+
 static void block_dirty_bitmap_merge_prepare(BlkActionState *common,
  Error **errp)
 {
 BlockDirtyBitmapMerge *action;
 BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
  common, common);
-BdrvDirtyBitmap *merge_source;
 
 if (action_check_completion_mode(common, errp) < 0) {
 return;
 }
 
 action = common->action->u.x_block_dirty_bitmap_merge.data;
-state->bitmap = block_dirty_bitmap_lookup(action->node,
-  action->dst_name,
-  &state->bs,
-  errp);
-if (!state->bitmap) {
-return;
-}
 
-merge_source = bdrv_find_dirty_bitmap(state->bs, action->src_name);
-if (!merge_source) {
-return;
-}
-
-bdrv_merge_dirty_bitmap(state->bitmap, merge_source, &state->backup, errp);
+do_block_dirty_bitmap_merge(action->node, action->target,
+action->bitmaps, &state->backup,
+errp);
 }
 
 static void abort_prepare(BlkActionState *common, Error **errp)
@@ -2980,24 +2973,51 @@ void qmp_x_block_dirty_bitmap_disable(const char *node, 
const char *name,
 bdrv_disable_dirty_bitmap(bitmap);
 }
 
-void qmp_x_block_dirty_bitmap_merge(const char *node, const char *dst_name,
-const char *src_name, Error **errp)
+void do_block_dirty_bitmap_merge(const char *node, const char *target,
+ strList *bitmaps, HBitmap **backup,
+ Error **errp)
 {
 BlockDriverState *bs;
-BdrvDirtyBitmap *dst, *src;
+BdrvDirtyBitmap *dst, *src, *anon;
+strList *lst;
+Error *local_err = NULL;
 
-dst = block_dirty_bitmap_lookup(node, dst_name, &bs, errp);
+dst = block_dirty_bitmap_lookup(node, target, &bs, errp);
 if (!dst) {
 return;
 }
 
-src = bdrv_find_dirty_bitmap(bs, src_name);
-if (!src) {
-error_setg(errp, "Dirty bitmap '%s' not found", src_name);
+anon = bdrv_create_dirty_bitmap(bs, bdrv_dirty_bitmap_granularity(dst),
+NULL, errp);
+if (!anon) {
 return;
 }
 
-bdrv_merge_dirty_bitmap(dst, src, NULL, errp);
+for (lst = bitmaps; lst; lst = lst->next) {
+src = bdrv_find_dirty_bitmap(bs, lst->value);
+if (!src) {
+error_setg(errp, "Dirty bitmap '%s' not found", lst->value);
+goto out;
+}
+
+bdrv_merge_dirty_bitmap(anon, src, NULL, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+goto out;
+}
+}
+
+/* Merge into dst; dst is unchanged on failure. */
+bdrv_merge_dirty_bitmap(dst, anon, backup, errp);
+
+ out:
+bdrv_release_dirty_bitmap(bs, anon);
+}
+
+void qmp_x_block_dirty_bitmap_merge(const char *node, const char *target,
+strList *bitmaps, Error **errp)
+{
+do_block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp);
 }
 
 BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index d4fe710836..320d74ef34 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1818,14 +1818,14 @@
 #
 # @node: name of device/node which the bitmap is tracking
 #
-# @dst_name: name of the destination dirty bitmap
+# @target: name of the destination dirty bitmap
 #
-# @src_name: name of the source dirty bitmap
+# @bitmaps: name(s) of the source dirty bitmap(s)
 #
 # Since: 3.0
 ##
 { 'struct': 'BlockDirtyBitmapMerge',
-  'data': { 'node': 'str', 'dst_name': 'str', 'src_name': 'str' } }
+  'data': { 'node': 'str', 'target': 'str', 'bitmaps': ['str'] } }
 
 ##
 # @block-dirty-bitmap-add:
@@ -1940,23 +1940,23 @@
 ##
 # @x-block-dirty-bitmap-merge:
 #
-# FIXME: Rename @src_name and @dst_name to src-name and dst-name.
-#
-# Merge @src_name

Re: [Qemu-block] [PATCH v2 5/5] crypto: support multiple threads accessing one QCryptoBlock

2018-12-06 Thread Vladimir Sementsov-Ogievskiy

06.12.2018 13:54, Daniel P. Berrangé wrote:
> On Wed, Dec 05, 2018 at 05:47:00PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> The two thing that should be handled are cipher and ivgen. For ivgen
>> the solution is just mutex, as iv calculations should not be long in
>> comparison with encryption/decryption. And for cipher let's just keep
>> per-thread ciphers.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
>> ---

[..]

>> --- a/crypto/block.c
>> +++ b/crypto/block.c

[..]

>>   static int do_qcrypto_cipher_encrypt(QCryptoCipher *cipher,
>>size_t niv,
>>QCryptoIVGen *ivgen,
>> + QemuMutex *ivgen_mutex,
>>int sectorsize,
>>uint64_t offset,
>>uint8_t *buf,
>> @@ -218,10 +307,15 @@ static int do_qcrypto_cipher_encrypt(QCryptoCipher 
>> *cipher,
>>   while (len > 0) {
>>   size_t nbytes;
>>   if (niv) {
>> -if (qcrypto_ivgen_calculate(ivgen,
>> -startsector,
>> -iv, niv,
>> -errp) < 0) {
>> +if (ivgen_mutex) {
>> +qemu_mutex_lock(ivgen_mutex);
>> +}
>> +ret = qcrypto_ivgen_calculate(ivgen, startsector, iv, niv, 
>> errp);
>> +if (ivgen_mutex) {
>> +qemu_mutex_unlock(ivgen_mutex);
>> +}
> 
> I think we should just make  ivgen_mutex compulsory
> 
>> +
>> +if (ret < 0) {
>>   goto cleanup;
>>   }
>>   
>> @@ -258,8 +352,9 @@ int qcrypto_cipher_decrypt_helper(QCryptoCipher *cipher,
>> size_t len,
>> Error **errp)
>>   {
>> -return do_qcrypto_cipher_encrypt(cipher, niv, ivgen, sectorsize, offset,
>> - buf, len, qcrypto_cipher_decrypt, 
>> errp);
>> +return do_qcrypto_cipher_encrypt(cipher, niv, ivgen, NULL, sectorsize,
>> + offset, buf, len, 
>> qcrypto_cipher_decrypt,
>> + errp);
>>   }
>>   
>>   
>> @@ -272,11 +367,11 @@ int qcrypto_cipher_encrypt_helper(QCryptoCipher 
>> *cipher,
>> size_t len,
>> Error **errp)
>>   {
>> -return do_qcrypto_cipher_encrypt(cipher, niv, ivgen, sectorsize, offset,
>> - buf, len, qcrypto_cipher_encrypt, 
>> errp);
>> +return do_qcrypto_cipher_encrypt(cipher, niv, ivgen, NULL, sectorsize,
>> + offset, buf, len, 
>> qcrypto_cipher_encrypt,
>> + errp);
>>   }
> 
> ...and get the mutex passed into these functions, as its easier to just
> know the ivgen is always protected, and not have to trace back the callpath
> to see if the usage is safe.

but there places, where these helpers called without any QCryptoBlock, when we 
just
have local cipher and ivgen, so there is no mutex and it's not needed.

-- 
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v2 2/5] crypto/block: refactor qcrypto_block_*crypt_helper functions

2018-12-06 Thread Vladimir Sementsov-Ogievskiy

06.12.2018 13:36, Daniel P. Berrangé wrote:
> On Wed, Dec 05, 2018 at 05:46:57PM +0300, Vladimir Sementsov-Ogievskiy wrote:
>> qcrypto_block_encrypt_helper and qcrypto_block_decrypt_helper are
>> almost identical, let's reduce code duplication and simplify further
>> improvements.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
>> ---
>>   crypto/block.c | 81 +++---
>>   1 file changed, 31 insertions(+), 50 deletions(-)
>>
>> diff --git a/crypto/block.c b/crypto/block.c
>> index e59d1140fe..f4101f0841 100644
>> --- a/crypto/block.c
>> +++ b/crypto/block.c
>> @@ -190,14 +190,21 @@ void qcrypto_block_free(QCryptoBlock *block)
>>   }
>>   
>>   
>> -int qcrypto_block_decrypt_helper(QCryptoCipher *cipher,
>> - size_t niv,
>> - QCryptoIVGen *ivgen,
>> - int sectorsize,
>> - uint64_t offset,
>> - uint8_t *buf,
>> - size_t len,
>> - Error **errp)
>> +typedef int (*QCryptoCipherEncryptFunc)(QCryptoCipher *cipher,
>> +const void *in,
>> +void *out,
>> +size_t len,
>> +Error **errp);
>> +
>> +static int do_qcrypto_block_encrypt(QCryptoCipher *cipher,
> 
> Can we call this functuon 'encdec', since it is misleading to call
> it just 'encrypt' when its used for decrypt too.

Maybe just _crypt ?)


-- 
Best regards,
Vladimir

Re: [Qemu-block] [PATCH 10/14] nbd/client: Split handshake into two functions

2018-12-06 Thread Vladimir Sementsov-Ogievskiy

06.12.2018 18:16, Vladimir Sementsov-Ogievskiy wrote:
> 01.12.2018 1:03, Eric Blake wrote:
>> + * Returns: negative errno: failure talking to server
>> + *  0: server is oldstyle, client must still parse export size
>> + *  1: server is newstyle, but can only accept EXPORT_NAME
>> + *  2: server is newstyle, but lacks structured replies
>> + *  3: server is newstyle and set up for structured replies
>> + */
> 
> hmm, may be, introduce enum of named constants, instead of raw numbers?
> 
> NBD_STARTED_OLD_STYLE
> NBD_STARTED_NEW_STYLE
> NBD_STARTED_NEW_STYLE_FIXED
> NBD_STARTED_STRUCTURED_REPLIES
> 
> or, at least, add short comment after each return statement, to not scroll up
> every time (like you gracefully do after each case: statement).
> 
> However, I'm okay with either way.
> 

I mean, including "as is".


-- 
Best regards,
Vladimir

Re: [Qemu-block] [PATCH 08/14] nbd/client: Refactor nbd_receive_list()

2018-12-06 Thread Vladimir Sementsov-Ogievskiy

06.12.2018 19:31, Eric Blake wrote:
> On 12/6/18 8:18 AM, Vladimir Sementsov-Ogievskiy wrote:
>> 01.12.2018 1:03, Eric Blake wrote:
>>> Add some parameters to make this function reusable in upcoming
>>> export listing, where we will want to capture the name and
>>> description rather than compare against a user-supplied name.
>>> No change in semantics to the existing caller.
>>>
>>> Signed-off-by: Eric Blake 
>>> ---
>>>    nbd/client.c | 66 +---
>>>    1 file changed, 47 insertions(+), 19 deletions(-)
> 
>>> @@ -290,30 +298,49 @@ static int nbd_receive_list(QIOChannel *ioc, const 
>>> char *want, bool *match,
>>>    nbd_send_opt_abort(ioc);
>>>    return -1;
>>>    }
>>> -    if (namelen != strlen(want)) {
>>> -    if (nbd_drop(ioc, len, errp) < 0) {
>>> -    error_prepend(errp,
>>> -  "failed to skip export name with wrong length: 
>>> ");
>>> -    nbd_send_opt_abort(ioc);
>>> -    return -1;
>>> +    if (want) {
>>> +    if (namelen != strlen(want)) {
>>> +    if (nbd_drop(ioc, len, errp) < 0) {
>>> +    error_prepend(errp,
>>> +  "failed to skip export name with wrong 
>>> length: ");
>>> +    nbd_send_opt_abort(ioc);
>>> +    return -1;
>>> +    }
>>> +    return 1;
>>>    }
>>> -    return 1;
>>> +    assert(namelen < sizeof(array));
>>> +    } else {
>>> +    assert(nameout);
>>
>> this assert looks a bit redundant, if nameout is 0, next line will abort not 
>> less clearly
>>
>>> +    *nameout = name = g_new(char, namelen + 1);
>>
>> We should check namelen <= NBD_MAX_NAME_SIZE before it, I think.
> 
> Why? We already validated

Ah, than OK, the worst case is ~ NBD_MAX_BUFFER_SIZE (32M), not 4G. Missed that 
:(

  that the overall option is not too large, and even if the resulting name from 
the server is larger than NBD_MAX_NAME_SIZE, it's still nice to report that 
name to the client for 'qemu-nbd --list'.  And these days, we only call 
nbd_receive_list if a server lacked NBD_OPT_GO, which is getting rarer and 
rarer, so micro-optimizing a rare case to avoid a large malloc isn't going to 
make a noticeable difference.
> 
>>
>>>    }
>>>
>>> -    assert(namelen < sizeof(name));
>>>    if (nbd_read(ioc, name, namelen, errp) < 0) {
>>>    error_prepend(errp, "failed to read export name: ");
>>>    nbd_send_opt_abort(ioc);
>>> +    if (!want) {
>>> +    free(name);
>>
>> g_free
>>
>>> +    }
>>>    return -1;
>>>    }
>>>    name[namelen] = '\0';
>>>    len -= namelen;
>>> -    if (nbd_drop(ioc, len, errp) < 0) {
>>> +    if (!want) {
>>> +    assert(description);
>>
>> NBD_MAX_NAME_SIZE
> 
> The description is not bound by NBD_MAX_NAME_SIZE.  The NBD protocol DOES 
> document a maximum string size (4k), but we are (so far) not actually 
> honoring that limit (our choice for NBD_MAX_NAME_SIZE is 256, which is 
> smaller than the NBD protocol limit).
> 

Ohm, yes, you are right :(. Too much reviewing, I should stop and make some 
patches :)

>>
>>> +    *description = g_new(char, len + 1);
>>> +    if (nbd_read(ioc, *description, len, errp) < 0) {
>>> +    error_prepend(errp, "failed to read export description: ");
>>> +    nbd_send_opt_abort(ioc);
>>> +    free(name);
>>> +    free(*description);
>>
>> g_free
>>
>>> +    return -1;
>>> +    }
>>> +    (*description)[len] = '\0';
>>> +    } else if (nbd_drop(ioc, len, errp) < 0) {
>>>    error_prepend(errp, "failed to read export description: ");
>>>    nbd_send_opt_abort(ioc);
>>>    return -1;
>>>    }
>>> -    if (!strcmp(name, want)) {
>>> +    if (want && !strcmp(name, want)) {
>>>    *match = true;
>>>    }
>>>    return 1;
>>
>> one more thing: on fail path, you finally fill output name and description
>> with freed pointers. I'd prefer to keep them unchanged in this case, however,
>> it's a matter of taste.
> 
> Okay, I'll try and be more careful in v2 about not altering the callers 
> pointers on failure.
> 


-- 
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v3] qemu-img info lists bitmap directory entries

2018-12-06 Thread Vladimir Sementsov-Ogievskiy

06.12.2018 16:19, Andrey Shinkevich wrote:
> The 'Format specific information' of qemu-img info command will show
> the name, flags and granularity for every QCOW2 bitmap as follows:
> 
> image: /vz/vmprivate/VM1/harddisk.hdd
> file format: qcow2
> virtual size: 64G (68719476736 bytes)
> disk size: 3.0M
> cluster_size: 1048576
> Format specific information:
>  compat: 1.1
>  lazy refcounts: true
>  bitmaps:
>  [0]:
>  flags:
>  [0]: in-use
>  [1]: auto
>  name: back-up1
>  unknown flags: 4
>  granularity: 65536
>  [1]:
>  flags:
>  [0]: in-use
>  [1]: auto
>  name: back-up2
>  unknown flags: 8
>  granularity: 65536
>  refcount bits: 16
>  corrupt: false
> 
> Signed-off-by: Andrey Shinkevich 
> ---
> v3:
> Now, qcow2_get_bitmap_info_list() is invoked under the condition of QCOW
> version #3 to avoid memory leaks in case of QCOW version #2.
> Furthermore, qcow2_get_bitmap_info_list() checks the number of existing 
> bitmaps.
> So, if no bitmap exists, no bitmap error message is printed in the output.
> The data type of the bitmap 'granularity' parameter was left as 'uint32'
> because bitmap_list_load() returns error if granularity_bits is grater than 
> 31.
> 

Ok, uint32 works for me too.


[...]

> +
> +Qcow2BitmapInfoList *qcow2_get_bitmap_info_list(BlockDriverState *bs,
> +Error **errp)
> +{
> +BDRVQcow2State *s = bs->opaque;
> +Qcow2BitmapList *bm_list;
> +Qcow2Bitmap *bm;
> +Qcow2BitmapInfoList *list = NULL;
> +Qcow2BitmapInfoList **plist = &list;
> +
> +if (s->nb_bitmaps == 0) {
> +return NULL;
> +}
> +
> +bm_list = bitmap_list_load(bs, s->bitmap_directory_offset,
> +   s->bitmap_directory_size, errp);
> +if (bm_list == NULL) {
> +return NULL;
> +}
> +
> +QSIMPLEQ_FOREACH(bm, bm_list, entry) {
> +Qcow2BitmapInfo *info = g_new0(Qcow2BitmapInfo, 1);
> +Qcow2BitmapInfoList *obj = g_new0(Qcow2BitmapInfoList, 1);
> +info->granularity = 1U << bm->granularity_bits;
> +info->name = g_strdup(bm->name);
> +info->flags = get_bitmap_info_flags(bm->flags);
> +info->unknown_flags = bm->flags & ~(BME_FLAG_IN_USE | BME_FLAG_AUTO);

we should use bm->flags & BME_RESERVED_FLAGS instead

> +info->has_unknown_flags = !!info->unknown_flags;
> +obj->value = info;
> +*plist = obj;
> +plist = &obj->next;
> +}
> +
> +bitmap_list_free(bm_list);
> +
> +return list;
> +}
> +
>   int qcow2_reopen_bitmaps_rw_hint(BlockDriverState *bs, bool *header_updated,
>Error **errp)
>   {
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 991d6ac..a023856 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -4254,6 +4254,8 @@ static ImageInfoSpecific 
> *qcow2_get_specific_info(BlockDriverState *bs)
>   BDRVQcow2State *s = bs->opaque;
>   ImageInfoSpecific *spec_info;
>   QCryptoBlockInfo *encrypt_info = NULL;
> +Error *local_err = NULL;
> +Qcow2BitmapInfoList *bitmaps;
>   
>   if (s->crypto != NULL) {
>   encrypt_info = qcrypto_block_get_info(s->crypto, &error_abort);
> @@ -4270,6 +4272,10 @@ static ImageInfoSpecific 
> *qcow2_get_specific_info(BlockDriverState *bs)
>   .refcount_bits  = s->refcount_bits,
>   };
>   } else if (s->qcow_version == 3) {

both local_err and bitmaps variables are used only here.
I'd like at least bitmaps to be defined here too.

> +bitmaps = qcow2_get_bitmap_info_list(bs, &local_err);
> +if (local_err != NULL) {
> +error_report_err(local_err);
> +}
>   *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
>   .compat = g_strdup("1.1"),
>   .lazy_refcounts = s->compatible_features &
> @@ -4279,6 +4285,8 @@ static ImageInfoSpecific 
> *qcow2_get_specific_info(BlockDriverState *bs)
> QCOW2_INCOMPAT_CORRUPT,
>   .has_corrupt= true,
>   .refcount_bits  = s->refcount_bits,
> +.has_bitmaps= !!bitmaps,
> +.bitmaps= bitmaps,
>   };
>   } else {
>   /* if this assertion fails, this probably means a new version was
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 8662b68..0ec2b3d 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -685,6 +685,8 @@ int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, 
> BdrvCheckResult *res,
> void **refcount_table,
> int64_t *refcount_table_size);
>   bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp);
> +Qcow2BitmapInfoList *qcow2_get_bitmap_info_list(BlockDriver

Re: [Qemu-block] [PATCH 08/14] nbd/client: Refactor nbd_receive_list()

2018-12-06 Thread Eric Blake


On 12/6/18 8:18 AM, Vladimir Sementsov-Ogievskiy wrote:

01.12.2018 1:03, Eric Blake wrote:

Add some parameters to make this function reusable in upcoming
export listing, where we will want to capture the name and
description rather than compare against a user-supplied name.
No change in semantics to the existing caller.

Signed-off-by: Eric Blake 
---
   nbd/client.c | 66 +---
   1 file changed, 47 insertions(+), 19 deletions(-)



@@ -290,30 +298,49 @@ static int nbd_receive_list(QIOChannel *ioc, const char 
*want, bool *match,
   nbd_send_opt_abort(ioc);
   return -1;
   }
-if (namelen != strlen(want)) {
-if (nbd_drop(ioc, len, errp) < 0) {
-error_prepend(errp,
-  "failed to skip export name with wrong length: ");
-nbd_send_opt_abort(ioc);
-return -1;
+if (want) {
+if (namelen != strlen(want)) {
+if (nbd_drop(ioc, len, errp) < 0) {
+error_prepend(errp,
+  "failed to skip export name with wrong length: 
");
+nbd_send_opt_abort(ioc);
+return -1;
+}
+return 1;
   }
-return 1;
+assert(namelen < sizeof(array));
+} else {
+assert(nameout);


this assert looks a bit redundant, if nameout is 0, next line will abort not 
less clearly


+*nameout = name = g_new(char, namelen + 1);


We should check namelen <= NBD_MAX_NAME_SIZE before it, I think.


Why? We already validated that the overall option is not too large, and 
even if the resulting name from the server is larger than 
NBD_MAX_NAME_SIZE, it's still nice to report that name to the client for 
'qemu-nbd --list'.  And these days, we only call nbd_receive_list if a 
server lacked NBD_OPT_GO, which is getting rarer and rarer, so 
micro-optimizing a rare case to avoid a large malloc isn't going to make 
a noticeable difference.





   }

-assert(namelen < sizeof(name));
   if (nbd_read(ioc, name, namelen, errp) < 0) {
   error_prepend(errp, "failed to read export name: ");
   nbd_send_opt_abort(ioc);
+if (!want) {
+free(name);


g_free


+}
   return -1;
   }
   name[namelen] = '\0';
   len -= namelen;
-if (nbd_drop(ioc, len, errp) < 0) {
+if (!want) {
+assert(description);


NBD_MAX_NAME_SIZE


The description is not bound by NBD_MAX_NAME_SIZE.  The NBD protocol 
DOES document a maximum string size (4k), but we are (so far) not 
actually honoring that limit (our choice for NBD_MAX_NAME_SIZE is 256, 
which is smaller than the NBD protocol limit).





+*description = g_new(char, len + 1);
+if (nbd_read(ioc, *description, len, errp) < 0) {
+error_prepend(errp, "failed to read export description: ");
+nbd_send_opt_abort(ioc);
+free(name);
+free(*description);


g_free


+return -1;
+}
+(*description)[len] = '\0';
+} else if (nbd_drop(ioc, len, errp) < 0) {
   error_prepend(errp, "failed to read export description: ");
   nbd_send_opt_abort(ioc);
   return -1;
   }
-if (!strcmp(name, want)) {
+if (want && !strcmp(name, want)) {
   *match = true;
   }
   return 1;


one more thing: on fail path, you finally fill output name and description
with freed pointers. I'd prefer to keep them unchanged in this case, however,
it's a matter of taste.


Okay, I'll try and be more careful in v2 about not altering the callers 
pointers on failure.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-block] [PATCH 07/14] nbd/client: Refactor nbd_negotiate_simple_meta_context()

2018-12-06 Thread Eric Blake


On 12/6/18 7:20 AM, Vladimir Sementsov-Ogievskiy wrote:

01.12.2018 1:03, Eric Blake wrote:

Change the signature to make it easier for a future patch to
reuse this function for calling NBD_OPT_LIST_META_CONTEXT with
0 or 1 queries.  Also, always allocate space for the received
name, even if it doesn't match expected lengths (no point
trying to optimize the unlikely error case, and tracing the
received rather than expected name can help debug a server
implementation).

While there are now slightly different traces, and the error
message for a server replying with too many contexts is
different, there are no runtime-observable changes in behavior
for the more common case of the lone caller interacting with a
compliant server.

Signed-off-by: Eric Blake 
---
   nbd/client.c | 105 +++
   nbd/trace-events |   2 +-
   2 files changed, 61 insertions(+), 46 deletions(-)

diff --git a/nbd/client.c b/nbd/client.c
index b5818a99d21..1dc8f83e19a 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -603,49 +603,57 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
   }

   /* nbd_negotiate_simple_meta_context:
- * Set one meta context. Simple means that reply must contain zero (not
- * negotiated) or one (negotiated) contexts. More contexts would be considered
- * as a protocol error. It's also implied that meta-data query equals queried
- * context name, so, if server replies with something different than @context,
- * it is considered an error too.
- * return 1 for successful negotiation, context_id is set
- *0 if operation is unsupported,
+ * List or set meta context data for export @info->name, based on @opt.


hm, just list or set meta context? What is "data" about?


Okay, that's two different reviewers complaining. I'll do a bit more 
refactoring and comments for v2 to make it more obvious that I'm writing 
a common routine that can handle the bulk of the commonality between 
list and set; but maybe keep two wrappers so that the existing callers 
don't have to change their calls as drastically.





+ * For list, leave @context NULL for 0 queries, or supplied for a single
+ * query; all replies are ignored, and the call merely traces server behavior.
+ * For set, @context must result in at most one matching server reply, in
+ * which case @info->meta_base_allocation_id is set to the resulting id.


Hmm, looks too cheating. Then it should be renamed to
nbd_negotiate_base_allocation

and parameter @context should be renamed to @x_dirty_bitmap,

and if it is unset, we'll use "base:allocation" here.


No, the next patch uses "qemu:" as the context for LIST when recursing 
to work around qemu-nbd 3.0 not advertising the dirty-bitmap context 
under list with 0 queries.




but in this case, it still weird about opt=list case.. So, it should be named
like nbd_negotiation_helper, as it is doing several different things, which
I can't describe in one word.


Yes, I think a function rename will help.




+ * return 1 for successful negotiation,
+ *0 if operation is unsupported or context unavailable,
*-1 with errp set for any other error


this return value description looks not very related to opt=list case


It's related - but list returns 1 for a lot more cases than set (a 
successful negotiation for list meant that all server replies were 
processed, while a successful negotiation for set requires that the 
server replies with exactly the one context we requested).


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-block] [PATCH 15/18] xen: add a mechanism to automatically create XenDevice-s...

2018-12-06 Thread Paul Durrant

> -Original Message-
> From: Anthony PERARD [mailto:anthony.per...@citrix.com]
> Sent: 06 December 2018 15:24
> To: Paul Durrant 
> Cc: qemu-block@nongnu.org; qemu-de...@nongnu.org; xen-
> de...@lists.xenproject.org; Stefano Stabellini 
> Subject: Re: [PATCH 15/18] xen: add a mechanism to automatically create
> XenDevice-s...
> 
> On Thu, Dec 06, 2018 at 12:36:52PM +, Paul Durrant wrote:
> > > -Original Message-
> > > From: Anthony PERARD [mailto:anthony.per...@citrix.com]
> > > Sent: 04 December 2018 15:35
> > >
> > > On Wed, Nov 21, 2018 at 03:12:08PM +, Paul Durrant wrote:
> > > > +xenbus->backend_watch =
> > > > +xen_bus_add_watch(xenbus, "", /* domain root node */
> > > > +  "backend", xen_bus_enumerate, xenbus,
> > > &local_err);
> > > > +if (local_err) {
> > > > +error_propagate(errp, local_err);
> > > > +error_prepend(errp, "failed to set up enumeration watch:
> ");
> > >
> > > You should use error_propagate_prepend instead
> > > error_propagate;error_prepend. And it looks like there is the same
> > > mistake in other patches that I haven't notice.
> > >
> >
> > Oh, I didn't know about that one either... I've only seen the separate
> calls used elsewhere.
> 
> That information is all in "include/qapi/error.h", if you which to know
> more on how to use Error.
> 

Thanks.

> > > Also you probably want goto fail here.
> > >
> >
> > Not sure about that. Whilst the bus scan won't happen, it doesn't mean
> devices can't be added via QMP.
> 
> In that case, don't modify errp, and use error_reportf_err instead, or
> warn_reportf_err (then local_err = NULL, in case it is reused in a
> future modification of the function).
> 
> Setting errp (with error_propagate) means that the function failed, and
> QEMU is going to exit(1), because of qdev_init_nofail call in
> xen_bus_init.

Ah, good point. I'll wait for more feedback on v2 and then fix in v3.

  Paul

[Qemu-block] [PATCH v2 11/18] xen: remove 'XenBlkDev' and 'blkdev' names from dataplane/xen-block

2018-12-06 Thread Paul Durrant

This is a purely cosmetic patch that substitutes the old 'struct XenBlkDev'
name with 'XenBlockDataPlane' and 'blkdev' field/variable names with
'dataplane', and then does necessary fix-up to adhere to coding style.

No functional change.

Signed-off-by: Paul Durrant 
Acked-by: Anthony Perard 
---
Cc: Stefano Stabellini 
Cc: Stefan Hajnoczi 
Cc: Kevin Wolf 
Cc: Max Reitz 
---
 hw/block/dataplane/xen-block.c | 352 +
 hw/block/dataplane/xen-block.h |   2 +-
 2 files changed, 183 insertions(+), 171 deletions(-)

diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 20d16e7..6ecd160 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -44,12 +44,12 @@ struct ioreq {
 int presync;
 int aio_inflight;
 int aio_errors;
-struct XenBlkDev *blkdev;
+XenBlockDataPlane *dataplane;
 QLIST_ENTRY(ioreq) list;
 BlockAcctCookie acct;
 };
 
-struct XenBlkDev {
+struct XenBlockDataPlane {
 XenDevice *xendev;
 XenEventChannel *event_channel;
 unsigned int *ring_ref;
@@ -85,33 +85,33 @@ static void ioreq_reset(struct ioreq *ioreq)
 ioreq->aio_inflight = 0;
 ioreq->aio_errors = 0;
 
-ioreq->blkdev = NULL;
+ioreq->dataplane = NULL;
 memset(&ioreq->list, 0, sizeof(ioreq->list));
 memset(&ioreq->acct, 0, sizeof(ioreq->acct));
 
 qemu_iovec_reset(&ioreq->v);
 }
 
-static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
+static struct ioreq *ioreq_start(XenBlockDataPlane *dataplane)
 {
 struct ioreq *ioreq = NULL;
 
-if (QLIST_EMPTY(&blkdev->freelist)) {
-if (blkdev->requests_total >= blkdev->max_requests) {
+if (QLIST_EMPTY(&dataplane->freelist)) {
+if (dataplane->requests_total >= dataplane->max_requests) {
 goto out;
 }
 /* allocate new struct */
 ioreq = g_malloc0(sizeof(*ioreq));
-ioreq->blkdev = blkdev;
-blkdev->requests_total++;
+ioreq->dataplane = dataplane;
+dataplane->requests_total++;
 qemu_iovec_init(&ioreq->v, 1);
 } else {
 /* get one from freelist */
-ioreq = QLIST_FIRST(&blkdev->freelist);
+ioreq = QLIST_FIRST(&dataplane->freelist);
 QLIST_REMOVE(ioreq, list);
 }
-QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list);
-blkdev->requests_inflight++;
+QLIST_INSERT_HEAD(&dataplane->inflight, ioreq, list);
+dataplane->requests_inflight++;
 
 out:
 return ioreq;
@@ -119,26 +119,26 @@ out:
 
 static void ioreq_finish(struct ioreq *ioreq)
 {
-struct XenBlkDev *blkdev = ioreq->blkdev;
+XenBlockDataPlane *dataplane = ioreq->dataplane;
 
 QLIST_REMOVE(ioreq, list);
-QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list);
-blkdev->requests_inflight--;
-blkdev->requests_finished++;
+QLIST_INSERT_HEAD(&dataplane->finished, ioreq, list);
+dataplane->requests_inflight--;
+dataplane->requests_finished++;
 }
 
 static void ioreq_release(struct ioreq *ioreq, bool finish)
 {
-struct XenBlkDev *blkdev = ioreq->blkdev;
+XenBlockDataPlane *dataplane = ioreq->dataplane;
 
 QLIST_REMOVE(ioreq, list);
 ioreq_reset(ioreq);
-ioreq->blkdev = blkdev;
-QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list);
+ioreq->dataplane = dataplane;
+QLIST_INSERT_HEAD(&dataplane->freelist, ioreq, list);
 if (finish) {
-blkdev->requests_finished--;
+dataplane->requests_finished--;
 } else {
-blkdev->requests_inflight--;
+dataplane->requests_inflight--;
 }
 }
 
@@ -148,7 +148,7 @@ static void ioreq_release(struct ioreq *ioreq, bool finish)
  */
 static int ioreq_parse(struct ioreq *ioreq)
 {
-struct XenBlkDev *blkdev = ioreq->blkdev;
+XenBlockDataPlane *dataplane = ioreq->dataplane;
 size_t len;
 int i;
 
@@ -171,12 +171,12 @@ static int ioreq_parse(struct ioreq *ioreq)
 };
 
 if (ioreq->req.operation != BLKIF_OP_READ &&
-blk_is_read_only(blkdev->blk)) {
+blk_is_read_only(dataplane->blk)) {
 error_report("error: write req for ro device");
 goto err;
 }
 
-ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
+ioreq->start = ioreq->req.sector_number * dataplane->file_blk;
 for (i = 0; i < ioreq->req.nr_segments; i++) {
 if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
 error_report("error: nr_segments too big");
@@ -186,16 +186,16 @@ static int ioreq_parse(struct ioreq *ioreq)
 error_report("error: first > last sector");
 goto err;
 }
-if (ioreq->req.seg[i].last_sect * blkdev->file_blk >= XC_PAGE_SIZE) {
+if (ioreq->req.seg[i].last_sect * dataplane->file_blk >= XC_PAGE_SIZE) 
{
 error_report("error: page crossing");
 goto err;
 }
 
 len = (ioreq->req.seg[i].last_sect -
-   ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk;
+   ioreq->r

[Qemu-block] [PATCH v2 10/18] xen: add header and build dataplane/xen-block.c

2018-12-06 Thread Paul Durrant

This patch adds the transformations necessary to get dataplane/xen-block.c
to build against the new XenBus/XenDevice framework. MAINTAINERS is also
updated due to the introduction of dataplane/xen-block.h.

NOTE: Existing data structure names are retained for the moment. These will
  be modified by subsequent patches. A typedef for XenBlockDataPlane
  has been added to the header (based on the old struct XenBlkDev name
  for the moment) so that the old names don't need to leak out of the
  dataplane code.

Signed-off-by: Paul Durrant 
---
Cc: Stefan Hajnoczi 
Cc: Kevin Wolf 
Cc: Max Reitz 
Cc: Stefano Stabellini 
Cc: Anthony Perard 

v2:
 - Tidy up header inclusions
 - Get rid of error_fatal
---
 MAINTAINERS  |   1 +
 hw/block/dataplane/Makefile.objs |   1 +
 hw/block/dataplane/xen-block.c   | 356 ---
 hw/block/dataplane/xen-block.h   |  29 
 4 files changed, 287 insertions(+), 100 deletions(-)
 create mode 100644 hw/block/dataplane/xen-block.h

diff --git a/MAINTAINERS b/MAINTAINERS
index ab62ad4..9875581 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -408,6 +408,7 @@ F: hw/block/dataplane/xen*
 F: hw/xen/
 F: hw/xenpv/
 F: hw/i386/xen/
+F: include/hw/block/dataplane/xen*
 F: include/hw/xen/
 F: include/sysemu/xen-mapcache.h
 
diff --git a/hw/block/dataplane/Makefile.objs b/hw/block/dataplane/Makefile.objs
index e786f66..c6c68db 100644
--- a/hw/block/dataplane/Makefile.objs
+++ b/hw/block/dataplane/Makefile.objs
@@ -1 +1,2 @@
 obj-y += virtio-blk.o
+obj-$(CONFIG_XEN) += xen-block.o
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 98f987d..20d16e7 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -24,65 +24,53 @@
  * See the COPYING file in the top-level directory.
  */
 
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/hw.h"
+#include "hw/xen/xen_common.h"
+#include "hw/block/xen_blkif.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/iothread.h"
+#include "xen-block.h"
+
 struct ioreq {
-blkif_request_t req;
-int16_t status;
-
-/* parsed request */
-off_t   start;
-QEMUIOVectorv;
-void*buf;
-size_t  size;
-int presync;
-
-/* aio status */
-int aio_inflight;
-int aio_errors;
-
-struct XenBlkDev*blkdev;
-QLIST_ENTRY(ioreq)   list;
-BlockAcctCookie acct;
+blkif_request_t req;
+int16_t status;
+off_t start;
+QEMUIOVector v;
+void *buf;
+size_t size;
+int presync;
+int aio_inflight;
+int aio_errors;
+struct XenBlkDev *blkdev;
+QLIST_ENTRY(ioreq) list;
+BlockAcctCookie acct;
 };
 
-#define MAX_RING_PAGE_ORDER 4
-
 struct XenBlkDev {
-struct XenLegacyDevicexendev;  /* must be first */
-char*params;
-char*mode;
-char*type;
-char*dev;
-char*devtype;
-booldirectiosafe;
-const char  *fileproto;
-const char  *filename;
-unsigned intring_ref[1 << MAX_RING_PAGE_ORDER];
-unsigned intnr_ring_ref;
-void*sring;
-int64_t file_blk;
-int64_t file_size;
-int protocol;
-blkif_back_rings_t  rings;
-int more_work;
-
-/* request lists */
+XenDevice *xendev;
+XenEventChannel *event_channel;
+unsigned int *ring_ref;
+unsigned int nr_ring_ref;
+void *sring;
+int64_t file_blk;
+int64_t file_size;
+int protocol;
+blkif_back_rings_t rings;
+int more_work;
 QLIST_HEAD(inflight_head, ioreq) inflight;
 QLIST_HEAD(finished_head, ioreq) finished;
 QLIST_HEAD(freelist_head, ioreq) freelist;
-int requests_total;
-int requests_inflight;
-int requests_finished;
-unsigned intmax_requests;
-
-gbooleanfeature_discard;
-
-/* qemu block driver */
-DriveInfo   *dinfo;
-BlockBackend*blk;
-QEMUBH  *bh;
-
-IOThread*iothread;
-AioContext  *ctx;
+int requests_total;
+int requests_inflight;
+int requests_finished;
+unsigned int max_requests;
+BlockBackend *blk;
+QEMUBH *bh;
+IOThread *iothread;
+AioContext *ctx;
 };
 
 static void ioreq_reset(struct ioreq *ioreq)
@@ -161,7 +149,6 @@ static void ioreq_release(struct ioreq *ioreq, bool finish)
 static int ioreq_parse(struct ioreq *ioreq)
 {
 struct XenBlkDev *blkdev = ioreq->blkdev;
-struct XenLegacyDevice *xendev = &blkdev->xendev;
 size_t len;
 int i;
 
@@ -183,7 +170,8 @@ static int ioreq_parse(struct ioreq *ioreq)
 goto err;
 };
 
-if (ioreq->req.oper

[Qemu-block] [PATCH v2 12/18] xen: remove 'ioreq' struct/varable/field names from dataplane/xen-block.c

2018-12-06 Thread Paul Durrant

This is a purely cosmetic patch that purges the name 'ioreq' from struct,
variable and field names. (This name has been problematic for a long time
as 'ioreq' is the name used for generic I/O requests coming from Xen).
The patch replaces 'struct ioreq' with a new 'XenBlockRequest' type and
'ioreq' field/variable names with 'request', and then does necessary
fix-up to adhere to coding style.

Function names are not modified by this patch. They will be dealt with in
a subsequent patch.

No functional change.

Signed-off-by: Paul Durrant 
Acked-by: Anthony Perard 
---
Cc: Stefan Hajnoczi 
Cc: Stefano Stabellini 
Cc: Kevin Wolf 
Cc: Max Reitz 
---
 hw/block/dataplane/xen-block.c | 310 +
 1 file changed, 156 insertions(+), 154 deletions(-)

diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 6ecd160..426e83c 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -34,7 +34,7 @@
 #include "sysemu/iothread.h"
 #include "xen-block.h"
 
-struct ioreq {
+typedef struct XenBlockRequest {
 blkif_request_t req;
 int16_t status;
 off_t start;
@@ -45,9 +45,9 @@ struct ioreq {
 int aio_inflight;
 int aio_errors;
 XenBlockDataPlane *dataplane;
-QLIST_ENTRY(ioreq) list;
+QLIST_ENTRY(XenBlockRequest) list;
 BlockAcctCookie acct;
-};
+} XenBlockRequest;
 
 struct XenBlockDataPlane {
 XenDevice *xendev;
@@ -60,9 +60,9 @@ struct XenBlockDataPlane {
 int protocol;
 blkif_back_rings_t rings;
 int more_work;
-QLIST_HEAD(inflight_head, ioreq) inflight;
-QLIST_HEAD(finished_head, ioreq) finished;
-QLIST_HEAD(freelist_head, ioreq) freelist;
+QLIST_HEAD(inflight_head, XenBlockRequest) inflight;
+QLIST_HEAD(finished_head, XenBlockRequest) finished;
+QLIST_HEAD(freelist_head, XenBlockRequest) freelist;
 int requests_total;
 int requests_inflight;
 int requests_finished;
@@ -73,68 +73,68 @@ struct XenBlockDataPlane {
 AioContext *ctx;
 };
 
-static void ioreq_reset(struct ioreq *ioreq)
+static void ioreq_reset(XenBlockRequest *request)
 {
-memset(&ioreq->req, 0, sizeof(ioreq->req));
-ioreq->status = 0;
-ioreq->start = 0;
-ioreq->buf = NULL;
-ioreq->size = 0;
-ioreq->presync = 0;
+memset(&request->req, 0, sizeof(request->req));
+request->status = 0;
+request->start = 0;
+request->buf = NULL;
+request->size = 0;
+request->presync = 0;
 
-ioreq->aio_inflight = 0;
-ioreq->aio_errors = 0;
+request->aio_inflight = 0;
+request->aio_errors = 0;
 
-ioreq->dataplane = NULL;
-memset(&ioreq->list, 0, sizeof(ioreq->list));
-memset(&ioreq->acct, 0, sizeof(ioreq->acct));
+request->dataplane = NULL;
+memset(&request->list, 0, sizeof(request->list));
+memset(&request->acct, 0, sizeof(request->acct));
 
-qemu_iovec_reset(&ioreq->v);
+qemu_iovec_reset(&request->v);
 }
 
-static struct ioreq *ioreq_start(XenBlockDataPlane *dataplane)
+static XenBlockRequest *ioreq_start(XenBlockDataPlane *dataplane)
 {
-struct ioreq *ioreq = NULL;
+XenBlockRequest *request = NULL;
 
 if (QLIST_EMPTY(&dataplane->freelist)) {
 if (dataplane->requests_total >= dataplane->max_requests) {
 goto out;
 }
 /* allocate new struct */
-ioreq = g_malloc0(sizeof(*ioreq));
-ioreq->dataplane = dataplane;
+request = g_malloc0(sizeof(*request));
+request->dataplane = dataplane;
 dataplane->requests_total++;
-qemu_iovec_init(&ioreq->v, 1);
+qemu_iovec_init(&request->v, 1);
 } else {
 /* get one from freelist */
-ioreq = QLIST_FIRST(&dataplane->freelist);
-QLIST_REMOVE(ioreq, list);
+request = QLIST_FIRST(&dataplane->freelist);
+QLIST_REMOVE(request, list);
 }
-QLIST_INSERT_HEAD(&dataplane->inflight, ioreq, list);
+QLIST_INSERT_HEAD(&dataplane->inflight, request, list);
 dataplane->requests_inflight++;
 
 out:
-return ioreq;
+return request;
 }
 
-static void ioreq_finish(struct ioreq *ioreq)
+static void ioreq_finish(XenBlockRequest *request)
 {
-XenBlockDataPlane *dataplane = ioreq->dataplane;
+XenBlockDataPlane *dataplane = request->dataplane;
 
-QLIST_REMOVE(ioreq, list);
-QLIST_INSERT_HEAD(&dataplane->finished, ioreq, list);
+QLIST_REMOVE(request, list);
+QLIST_INSERT_HEAD(&dataplane->finished, request, list);
 dataplane->requests_inflight--;
 dataplane->requests_finished++;
 }
 
-static void ioreq_release(struct ioreq *ioreq, bool finish)
+static void ioreq_release(XenBlockRequest *request, bool finish)
 {
-XenBlockDataPlane *dataplane = ioreq->dataplane;
+XenBlockDataPlane *dataplane = request->dataplane;
 
-QLIST_REMOVE(ioreq, list);
-ioreq_reset(ioreq);
-ioreq->dataplane = dataplane;
-QLIST_INSERT_HEAD(&dataplane->freelist, ioreq, list);
+QLIST_REMOVE(request, list);
+io

[Qemu-block] [PATCH v2 17/18] MAINTAINERS: add myself as a Xen maintainer

2018-12-06 Thread Paul Durrant

I have made many significant contributions to the Xen code in QEMU,
particularly the recent patches introducing a new PV device framework.
I intend to make further significant contributions, porting other PV back-
ends to the new framework with the intent of eventually removing the
legacy code. It therefore seems reasonable that I become a maintainer of
the Xen code.

Signed-off-by: Paul Durrant 
Acked-by: Anthony Perard 
Acked-by: Stefano Stabellini 
---
Cc: Paolo Bonzini 

v2:
 - Fix typo
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9875581..e6bd441 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -396,6 +396,7 @@ Guest CPU Cores (Xen):
 X86
 M: Stefano Stabellini 
 M: Anthony Perard 
+M: Paul Durrant 
 L: xen-de...@lists.xenproject.org
 S: Supported
 F: */xen*
-- 
2.1.4

[Qemu-block] [PATCH v2 13/18] xen: purge 'blk' and 'ioreq' from function names in dataplane/xen-block.c

2018-12-06 Thread Paul Durrant

This is a purely cosmetic patch that purges remaining use of 'blk' and
'ioreq' in local function names, and then makes sure all functions are
prefixed with 'xen_block_'.

No functional change.

Signed-off-by: Paul Durrant 
---
Cc: Stefano Stabellini 
Cc: Anthony Perard 
Cc: Stefan Hajnoczi 
Cc: Kevin Wolf 
Cc: Max Reitz 

v2:
 - Add 'xen_block_' prefix
---
 hw/block/dataplane/xen-block.c | 90 +-
 1 file changed, 46 insertions(+), 44 deletions(-)

diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 426e83c..8c451ae 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -73,7 +73,7 @@ struct XenBlockDataPlane {
 AioContext *ctx;
 };
 
-static void ioreq_reset(XenBlockRequest *request)
+static void reset_request(XenBlockRequest *request)
 {
 memset(&request->req, 0, sizeof(request->req));
 request->status = 0;
@@ -92,7 +92,7 @@ static void ioreq_reset(XenBlockRequest *request)
 qemu_iovec_reset(&request->v);
 }
 
-static XenBlockRequest *ioreq_start(XenBlockDataPlane *dataplane)
+static XenBlockRequest *xen_block_start_request(XenBlockDataPlane *dataplane)
 {
 XenBlockRequest *request = NULL;
 
@@ -117,7 +117,7 @@ out:
 return request;
 }
 
-static void ioreq_finish(XenBlockRequest *request)
+static void xen_block_finish_request(XenBlockRequest *request)
 {
 XenBlockDataPlane *dataplane = request->dataplane;
 
@@ -127,12 +127,12 @@ static void ioreq_finish(XenBlockRequest *request)
 dataplane->requests_finished++;
 }
 
-static void ioreq_release(XenBlockRequest *request, bool finish)
+static void xen_block_release_request(XenBlockRequest *request, bool finish)
 {
 XenBlockDataPlane *dataplane = request->dataplane;
 
 QLIST_REMOVE(request, list);
-ioreq_reset(request);
+reset_request(request);
 request->dataplane = dataplane;
 QLIST_INSERT_HEAD(&dataplane->freelist, request, list);
 if (finish) {
@@ -146,7 +146,7 @@ static void ioreq_release(XenBlockRequest *request, bool 
finish)
  * translate request into iovec + start offset
  * do sanity checks along the way
  */
-static int ioreq_parse(XenBlockRequest *request)
+static int xen_block_parse_request(XenBlockRequest *request)
 {
 XenBlockDataPlane *dataplane = request->dataplane;
 size_t len;
@@ -207,7 +207,7 @@ err:
 return -1;
 }
 
-static int ioreq_grant_copy(XenBlockRequest *request)
+static int xen_block_copy_request(XenBlockRequest *request)
 {
 XenBlockDataPlane *dataplane = request->dataplane;
 XenDevice *xendev = dataplane->xendev;
@@ -253,9 +253,9 @@ static int ioreq_grant_copy(XenBlockRequest *request)
 return 0;
 }
 
-static int ioreq_runio_qemu_aio(XenBlockRequest *request);
+static int xen_block_do_aio(XenBlockRequest *request);
 
-static void qemu_aio_complete(void *opaque, int ret)
+static void xen_block_complete_aio(void *opaque, int ret)
 {
 XenBlockRequest *request = opaque;
 XenBlockDataPlane *dataplane = request->dataplane;
@@ -272,7 +272,7 @@ static void qemu_aio_complete(void *opaque, int ret)
 request->aio_inflight--;
 if (request->presync) {
 request->presync = 0;
-ioreq_runio_qemu_aio(request);
+xen_block_do_aio(request);
 goto done;
 }
 if (request->aio_inflight > 0) {
@@ -283,7 +283,7 @@ static void qemu_aio_complete(void *opaque, int ret)
 case BLKIF_OP_READ:
 /* in case of failure request->aio_errors is increased */
 if (ret == 0) {
-ioreq_grant_copy(request);
+xen_block_copy_request(request);
 }
 qemu_vfree(request->buf);
 break;
@@ -299,7 +299,7 @@ static void qemu_aio_complete(void *opaque, int ret)
 }
 
 request->status = request->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
-ioreq_finish(request);
+xen_block_finish_request(request);
 
 switch (request->req.operation) {
 case BLKIF_OP_WRITE:
@@ -324,9 +324,9 @@ done:
 aio_context_release(dataplane->ctx);
 }
 
-static bool blk_split_discard(XenBlockRequest *request,
-  blkif_sector_t sector_number,
-  uint64_t nr_sectors)
+static bool xen_block_split_discard(XenBlockRequest *request,
+blkif_sector_t sector_number,
+uint64_t nr_sectors)
 {
 XenBlockDataPlane *dataplane = request->dataplane;
 int64_t byte_offset;
@@ -349,7 +349,7 @@ static bool blk_split_discard(XenBlockRequest *request,
 byte_chunk = byte_remaining > limit ? limit : byte_remaining;
 request->aio_inflight++;
 blk_aio_pdiscard(dataplane->blk, byte_offset, byte_chunk,
- qemu_aio_complete, request);
+ xen_block_complete_aio, request);
 byte_remaining -= byte_chunk;
 byte_offset += byte_chunk;
 } while (byte_remaining > 0);
@@ -357,7 +357,7 @@ static bool blk_s

[Qemu-block] [PATCH v2 15/18] xen: add a mechanism to automatically create XenDevice-s...

2018-12-06 Thread Paul Durrant

...that maintains compatibility with existing Xen toolstacks.

Xen toolstacks instantiate PV backends by simply writing information into
xenstore and expecting a backend implementation to be watching for this.

This patch adds a new 'xen-backend' module to allow individual XenDevice
implementations to register a creator function to be called when a tool-
stack instantiates a new backend in this way.

To support this it is also necessary to add new watchers into the XenBus
implementation to handle enumeration of new backends and also destruction
of XenDevice-s when the toolstack sets the backend 'online' key to 0.

NOTE: This patch only adds the framework. A subsequent patch will add a
  creator function for xen-block devices.

Signed-off-by: Paul Durrant 
---
Cc: Stefano Stabellini 
Cc: Anthony Perard 

v2:
 - Sort out error paths and error reporting
---
 hw/xen/Makefile.objs |   2 +-
 hw/xen/trace-events  |   5 +
 hw/xen/xen-backend.c |  69 +
 hw/xen/xen-bus.c | 226 +++
 include/hw/xen/xen-backend.h |  26 +
 include/hw/xen/xen-bus.h |   5 +-
 include/qemu/module.h|   3 +
 7 files changed, 315 insertions(+), 21 deletions(-)
 create mode 100644 hw/xen/xen-backend.c
 create mode 100644 include/hw/xen/xen-backend.h

diff --git a/hw/xen/Makefile.objs b/hw/xen/Makefile.objs
index 77c0868..84df60a 100644
--- a/hw/xen/Makefile.objs
+++ b/hw/xen/Makefile.objs
@@ -1,5 +1,5 @@
 # xen backend driver support
-common-obj-$(CONFIG_XEN) += xen-legacy-backend.o xen_devconfig.o xen_pvdev.o 
xen-common.o xen-bus.o xen-bus-helper.o
+common-obj-$(CONFIG_XEN) += xen-legacy-backend.o xen_devconfig.o xen_pvdev.o 
xen-common.o xen-bus.o xen-bus-helper.o xen-backend.o
 
 obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen-host-pci-device.o
 obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt.o xen_pt_config_init.o 
xen_pt_graphics.o xen_pt_msi.o
diff --git a/hw/xen/trace-events b/hw/xen/trace-events
index 22055b5..d567242 100644
--- a/hw/xen/trace-events
+++ b/hw/xen/trace-events
@@ -16,13 +16,18 @@ xen_domid_restrict(int err) "err: %u"
 # include/hw/xen/xen-bus.c
 xen_bus_realize(void) ""
 xen_bus_unrealize(void) ""
+xen_bus_enumerate(void) ""
+xen_bus_type_enumerate(const char *type) "type: %s"
+xen_bus_backend_create(const char *type, const char *path) "type: %s path: %s"
 xen_bus_add_watch(const char *node, const char *key, char *token) "node: %s 
key: %s token: %s"
 xen_bus_remove_watch(const char *node, const char *key, char *token) "node: %s 
key: %s token: %s"
 xen_bus_watch(const char *token) "token: %s"
 xen_device_realize(const char *type, char *name) "type: %s name: %s"
 xen_device_unrealize(const char *type, char *name) "type: %s name: %s"
 xen_device_backend_state(const char *type, char *name, const char *state) 
"type: %s name: %s -> %s"
+xen_device_backend_online(const char *type, char *name, bool online) "type: %s 
name: %s -> %u"
 xen_device_frontend_state(const char *type, char *name, const char *state) 
"type: %s name: %s -> %s"
+xen_device_backend_changed(const char *type, char *name) "type: %s name: %s"
 xen_device_frontend_changed(const char *type, char *name) "type: %s name: %s"
 
 # include/hw/xen/xen-bus-helper.c
diff --git a/hw/xen/xen-backend.c b/hw/xen/xen-backend.c
new file mode 100644
index 000..d87e6ec
--- /dev/null
+++ b/hw/xen/xen-backend.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018  Citrix Systems Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "hw/xen/xen-backend.h"
+
+typedef struct XenBackendImpl {
+const char *type;
+XenBackendDeviceCreate create;
+} XenBackendImpl;
+
+static GHashTable *xen_backend_table_get(void)
+{
+static GHashTable *table;
+
+if (table == NULL) {
+table = g_hash_table_new(g_str_hash, g_str_equal);
+}
+
+return table;
+}
+
+static void xen_backend_table_add(XenBackendImpl *impl)
+{
+g_hash_table_insert(xen_backend_table_get(), (void *)impl->type, impl);
+}
+
+static XenBackendImpl *xen_backend_table_lookup(const char *type)
+{
+return g_hash_table_lookup(xen_backend_table_get(), type);
+}
+
+void xen_backend_register(const XenBackendInfo *info)
+{
+XenBackendImpl *impl = g_new0(XenBackendImpl, 1);
+
+g_assert(info->type);
+
+if (xen_backend_table_lookup(info->type)) {
+error_report("attempt to register duplicate Xen backend type '%s'",
+ info->type);
+abort();
+}
+
+if (!info->create) {
+error_report("backend type '%s' has no creator", info->type);
+abort();
+}
+
+impl->type = info->type;
+impl->create = info->create;
+
+xen_backend_table_add(impl);
+}
+
+void xen_backend_device_create(BusState *bus, const char *type,
+   const char *name, QDict *opts, Error **err

[Qemu-block] [PATCH v2 16/18] xen: automatically create XenBlockDevice-s

2018-12-06 Thread Paul Durrant

This patch adds a creator function for XenBlockDevice-s so that they can
be created automatically when the Xen toolstack instantiates a new
PV backend. When the XenBlockDevice is created this way it is also
necessary to create a drive which matches the configuration that the Xen
toolstack has written into xenstore. This drive is marked 'auto_del' so
that it will be removed when the XenBlockDevice is destroyed. Also, for
compatibility with the legacy 'xen_disk' implementation, an iothread
is automatically created for the new XenBlockDevice. This will also be
removed when the XenBlockDevice is destroyed.

Correspondingly the legacy backend scan for 'qdisk' is removed.

After this patch is applied the legacy 'xen_disk' code is redundant. It
will be removed by a subsequent patch.

Signed-off-by: Paul Durrant 
---
Cc: Kevin Wolf 
Cc: Max Reitz 
Cc: Stefano Stabellini 
Cc: Anthony Perard 

v2:
 - Get rid of error_abort
 - Don't use qdev_init_nofail
 - Explain why file locking needs to be off
---
 hw/block/trace-events   |   1 +
 hw/block/xen-block.c| 262 +++-
 hw/xen/xen-bus.c|   2 +-
 hw/xen/xen-legacy-backend.c |   1 -
 include/hw/xen/xen-block.h  |   1 +
 5 files changed, 264 insertions(+), 3 deletions(-)

diff --git a/hw/block/trace-events b/hw/block/trace-events
index 89e2583..a89c8a6 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -137,3 +137,4 @@ xen_disk_realize(void) ""
 xen_disk_unrealize(void) ""
 xen_cdrom_realize(void) ""
 xen_cdrom_unrealize(void) ""
+xen_block_device_create(const char *name) "name: %s"
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index fc64aaf..2430dae 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -7,12 +7,15 @@
 
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
+#include "qemu/option.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
+#include "qapi/qmp/qdict.h"
 #include "hw/hw.h"
 #include "hw/xen/xen_common.h"
 #include "hw/block/xen_blkif.h"
 #include "hw/xen/xen-block.h"
+#include "hw/xen/xen-backend.h"
 #include "sysemu/blockdev.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/iothread.h"
@@ -135,6 +138,11 @@ static void xen_block_unrealize(XenDevice *xendev, Error 
**errp)
 xen_block_dataplane_destroy(blockdev->dataplane);
 blockdev->dataplane = NULL;
 
+if (blockdev->auto_iothread) {
+iothread_destroy(blockdev->auto_iothread);
+blockdev->auto_iothread = NULL;
+}
+
 if (blockdev_class->unrealize) {
 blockdev_class->unrealize(blockdev, &local_err);
 if (local_err) {
@@ -152,6 +160,8 @@ static void xen_block_realize(XenDevice *xendev, Error 
**errp)
 XenBlockVdev *vdev = &blockdev->vdev;
 Error *local_err = NULL;
 BlockConf *conf = &blockdev->conf;
+IOThread *iothread = blockdev->auto_iothread ?
+blockdev->auto_iothread : blockdev->iothread;
 
 if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID) {
 error_setg(errp, "vdev property not set");
@@ -218,7 +228,7 @@ static void xen_block_realize(XenDevice *xendev, Error 
**errp)
   conf->logical_block_size);
 
 blockdev->dataplane = xen_block_dataplane_create(xendev, conf,
- blockdev->iothread);
+ iothread);
 }
 
 static void xen_block_frontend_changed(XenDevice *xendev,
@@ -480,6 +490,8 @@ static void xen_block_class_init(ObjectClass *class, void 
*data)
 DeviceClass *dev_class = DEVICE_CLASS(class);
 XenDeviceClass *xendev_class = XEN_DEVICE_CLASS(class);
 
+xendev_class->backend = "qdisk";
+xendev_class->device = "vbd";
 xendev_class->get_name = xen_block_get_name;
 xendev_class->realize = xen_block_realize;
 xendev_class->frontend_changed = xen_block_frontend_changed;
@@ -591,3 +603,251 @@ static void xen_block_register_types(void)
 }
 
 type_init(xen_block_register_types)
+
+static void xen_block_drive_create(const char *id, const char *device_type,
+   QDict *opts, Error **errp)
+{
+const char *params = qdict_get_try_str(opts, "params");
+const char *mode = qdict_get_try_str(opts, "mode");
+const char *direct_io_safe = qdict_get_try_str(opts, "direct-io-safe");
+const char *discard_enable = qdict_get_try_str(opts, "discard-enable");
+char *format = NULL;
+char *file = NULL;
+char *drive_optstr = NULL;
+QemuOpts *drive_opts;
+Error *local_err = NULL;
+
+if (params) {
+char **v = g_strsplit(params, ":", 2);
+
+if (v[1] == NULL) {
+file = g_strdup(v[0]);
+} else {
+if (strcmp(v[0], "aio") == 0) {
+format = g_strdup("raw");
+} else if (strcmp(v[0], "vhd") == 0) {
+format = g_strdup("vpc");
+} else {
+format = g_strdup(v[0]);
+}
+file = g_strdup(v[1]);

[Qemu-block] [PATCH v2 18/18] xen: remove the legacy 'xen_disk' backend

2018-12-06 Thread Paul Durrant

This backend has now been replaced by the 'xen-qdisk' XenDevice.

Signed-off-by: Paul Durrant 
---
Cc: Kevin Wolf 
Cc: Max Reitz 
Cc: Anthony Perard 
Cc: Stefano Stabellini 
---
 hw/block/Makefile.objs |1 -
 hw/block/xen_disk.c| 1011 
 2 files changed, 1012 deletions(-)
 delete mode 100644 hw/block/xen_disk.c

diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
index f34813a..e206b8e 100644
--- a/hw/block/Makefile.objs
+++ b/hw/block/Makefile.objs
@@ -5,7 +5,6 @@ common-obj-$(CONFIG_NAND) += nand.o
 common-obj-$(CONFIG_PFLASH_CFI01) += pflash_cfi01.o
 common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
 common-obj-$(CONFIG_XEN) += xen-block.o
-common-obj-$(CONFIG_XEN) += xen_disk.o
 common-obj-$(CONFIG_ECC) += ecc.o
 common-obj-$(CONFIG_ONENAND) += onenand.o
 common-obj-$(CONFIG_NVME_PCI) += nvme.o
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
deleted file mode 100644
index 75fe55f..000
--- a/hw/block/xen_disk.c
+++ /dev/null
@@ -1,1011 +0,0 @@
-/*
- *  xen paravirt block device backend
- *
- *  (c) Gerd Hoffmann 
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; under version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, see .
- *
- *  Contributions after 2012-01-13 are licensed under the terms of the
- *  GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/units.h"
-#include 
-#include 
-
-#include "hw/hw.h"
-#include "hw/xen/xen-legacy-backend.h"
-#include "xen_blkif.h"
-#include "sysemu/blockdev.h"
-#include "sysemu/iothread.h"
-#include "sysemu/block-backend.h"
-#include "qapi/error.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qstring.h"
-#include "trace.h"
-
-/* - */
-
-#define BLOCK_SIZE  512
-#define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
-
-struct ioreq {
-blkif_request_t req;
-int16_t status;
-
-/* parsed request */
-off_t   start;
-QEMUIOVectorv;
-void*buf;
-size_t  size;
-int presync;
-
-/* aio status */
-int aio_inflight;
-int aio_errors;
-
-struct XenBlkDev*blkdev;
-QLIST_ENTRY(ioreq)   list;
-BlockAcctCookie acct;
-};
-
-#define MAX_RING_PAGE_ORDER 4
-
-struct XenBlkDev {
-struct XenLegacyDevicexendev;  /* must be first */
-char*params;
-char*mode;
-char*type;
-char*dev;
-char*devtype;
-booldirectiosafe;
-const char  *fileproto;
-const char  *filename;
-unsigned intring_ref[1 << MAX_RING_PAGE_ORDER];
-unsigned intnr_ring_ref;
-void*sring;
-int64_t file_blk;
-int64_t file_size;
-int protocol;
-blkif_back_rings_t  rings;
-int more_work;
-
-/* request lists */
-QLIST_HEAD(inflight_head, ioreq) inflight;
-QLIST_HEAD(finished_head, ioreq) finished;
-QLIST_HEAD(freelist_head, ioreq) freelist;
-int requests_total;
-int requests_inflight;
-int requests_finished;
-unsigned intmax_requests;
-
-gbooleanfeature_discard;
-
-/* qemu block driver */
-DriveInfo   *dinfo;
-BlockBackend*blk;
-QEMUBH  *bh;
-
-IOThread*iothread;
-AioContext  *ctx;
-};
-
-/* - */
-
-static void ioreq_reset(struct ioreq *ioreq)
-{
-memset(&ioreq->req, 0, sizeof(ioreq->req));
-ioreq->status = 0;
-ioreq->start = 0;
-ioreq->buf = NULL;
-ioreq->size = 0;
-ioreq->presync = 0;
-
-ioreq->aio_inflight = 0;
-ioreq->aio_errors = 0;
-
-ioreq->blkdev = NULL;
-memset(&ioreq->list, 0, sizeof(ioreq->list));
-memset(&ioreq->acct, 0, sizeof(ioreq->acct));
-
-qemu_iovec_reset(&ioreq->v);
-}
-
-static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
-{
-struct ioreq *ioreq = NULL;
-
-if (QLIST_EMPTY(&blkdev->freelist)) {
-if (blkdev->requests_total >= blkdev->max_requests) {
-goto out;
-}
-/* allocate new struct */
-ioreq = g_malloc0(sizeof(*ioreq));
-ioreq->blkde

[Qemu-block] [PATCH v2 14/18] xen: add implementations of xen-block connect and disconnect functions...

2018-12-06 Thread Paul Durrant

...and wire in the dataplane.

This patch adds the remaining code to make the xen-block XenDevice
functional. The parameters that a block frontend expects to find are
populated in the backend xenstore area, and the 'ring-ref' and
'event-channel' values specified in the frontend xenstore area are
mapped/bound and used to set up the dataplane.

Signed-off-by: Paul Durrant 
---
Cc: Stefano Stabellini 
Cc: Anthony Perard 
Cc: Kevin Wolf 
Cc: Max Reitz 

v2:
 - Tidy up header inclusions
 - Stop leaking ring_ref on error
 - Auto-create drive for CDRom devices
---
 hw/block/xen-block.c   | 164 +
 hw/xen/xen-bus.c   |  12 ++--
 include/hw/xen/xen-block.h |   9 +++
 include/hw/xen/xen-bus.h   |  10 +++
 4 files changed, 189 insertions(+), 6 deletions(-)

diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index d2334ef..fc64aaf 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -10,7 +10,13 @@
 #include "qapi/error.h"
 #include "qapi/visitor.h"
 #include "hw/hw.h"
+#include "hw/xen/xen_common.h"
+#include "hw/block/xen_blkif.h"
 #include "hw/xen/xen-block.h"
+#include "sysemu/blockdev.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/iothread.h"
+#include "dataplane/xen-block.h"
 #include "trace.h"
 
 static char *xen_block_get_name(XenDevice *xendev, Error **errp)
@@ -28,6 +34,8 @@ static void xen_block_disconnect(XenDevice *xendev, Error 
**errp)
 XenBlockVdev *vdev = &blockdev->vdev;
 
 trace_xen_block_disconnect(type, vdev->disk, vdev->partition);
+
+xen_block_dataplane_stop(blockdev->dataplane);
 }
 
 static void xen_block_connect(XenDevice *xendev, Error **errp)
@@ -35,8 +43,72 @@ static void xen_block_connect(XenDevice *xendev, Error 
**errp)
 XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
 const char *type = object_get_typename(OBJECT(blockdev));
 XenBlockVdev *vdev = &blockdev->vdev;
+unsigned int order, nr_ring_ref, *ring_ref, event_channel, protocol;
+char *str;
 
 trace_xen_block_connect(type, vdev->disk, vdev->partition);
+
+if (xen_device_frontend_scanf(xendev, "ring-page-order", "%u",
+  &order) != 1) {
+nr_ring_ref = 1;
+ring_ref = g_new(unsigned int, nr_ring_ref);
+
+if (xen_device_frontend_scanf(xendev, "ring-ref", "%u",
+  &ring_ref[0]) != 1) {
+error_setg(errp, "failed to read ring-ref");
+g_free(ring_ref);
+return;
+}
+} else if (order <= blockdev->max_ring_page_order) {
+unsigned int i;
+
+nr_ring_ref = 1 << order;
+ring_ref = g_new(unsigned int, nr_ring_ref);
+
+for (i = 0; i < nr_ring_ref; i++) {
+const char *key = g_strdup_printf("ring-ref%u", i);
+
+if (xen_device_frontend_scanf(xendev, key, "%u",
+  &ring_ref[i]) != 1) {
+error_setg(errp, "failed to read %s", key);
+g_free((gpointer)key);
+g_free(ring_ref);
+return;
+}
+
+g_free((gpointer)key);
+}
+} else {
+error_setg(errp, "invalid ring-page-order (%d)", order);
+return;
+}
+
+if (xen_device_frontend_scanf(xendev, "event-channel", "%u",
+  &event_channel) != 1) {
+error_setg(errp, "failed to read event-channel");
+g_free(ring_ref);
+return;
+}
+
+if (xen_device_frontend_scanf(xendev, "protocol", "%ms",
+  &str) != 1) {
+protocol = BLKIF_PROTOCOL_NATIVE;
+} else {
+if (strcmp(str, XEN_IO_PROTO_ABI_X86_32) == 0) {
+protocol = BLKIF_PROTOCOL_X86_32;
+} else if (strcmp(str, XEN_IO_PROTO_ABI_X86_64) == 0) {
+protocol = BLKIF_PROTOCOL_X86_64;
+} else {
+protocol = BLKIF_PROTOCOL_NATIVE;
+}
+
+free(str);
+}
+
+xen_block_dataplane_start(blockdev->dataplane, ring_ref, nr_ring_ref,
+  event_channel, protocol, errp);
+
+g_free(ring_ref);
 }
 
 static void xen_block_unrealize(XenDevice *xendev, Error **errp)
@@ -60,6 +132,9 @@ static void xen_block_unrealize(XenDevice *xendev, Error 
**errp)
 error_propagate(errp, local_err);
 }
 
+xen_block_dataplane_destroy(blockdev->dataplane);
+blockdev->dataplane = NULL;
+
 if (blockdev_class->unrealize) {
 blockdev_class->unrealize(blockdev, &local_err);
 if (local_err) {
@@ -76,6 +151,7 @@ static void xen_block_realize(XenDevice *xendev, Error 
**errp)
 const char *type = object_get_typename(OBJECT(blockdev));
 XenBlockVdev *vdev = &blockdev->vdev;
 Error *local_err = NULL;
+BlockConf *conf = &blockdev->conf;
 
 if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID) {
 error_setg(errp, "vdev property not set");
@@ -90,6 +166,59 @@ static void xen_block_

[Qemu-block] [PATCH v2 07/18] xen: add event channel interface for XenDevice-s

2018-12-06 Thread Paul Durrant

The legacy PV backend infrastructure provides functions to bind, unbind
and send notifications to event channnels. Similar functionality will be
required by XenDevice implementations so this patch adds the necessary
support.

Signed-off-by: Paul Durrant 
---
Cc: Stefano Stabellini 
Cc: Anthony Perard 

v2:
 - Added error pointers to notify and unbind
---
 hw/xen/xen-bus.c | 101 +++
 include/hw/xen/xen-bus.h |  18 +
 2 files changed, 119 insertions(+)

diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index b40dc83..0e6f194 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -621,6 +621,81 @@ done:
 g_free(xengnttab_segs);
 }
 
+struct XenEventChannel {
+unsigned int local_port;
+XenEventHandler handler;
+void *opaque;
+Notifier notifier;
+};
+
+static void event_notify(Notifier *n, void *data)
+{
+XenEventChannel *channel = container_of(n, XenEventChannel, notifier);
+unsigned long port = (unsigned long)data;
+
+if (port == channel->local_port) {
+channel->handler(channel->opaque);
+}
+}
+
+XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
+   unsigned int port,
+   XenEventHandler handler,
+   void *opaque, Error **errp)
+{
+XenEventChannel *channel = g_new0(XenEventChannel, 1);
+
+channel->local_port = xenevtchn_bind_interdomain(xendev->xeh,
+ xendev->frontend_id,
+ port);
+if (xendev->local_port < 0) {
+error_setg_errno(errp, errno, "xenevtchn_bind_interdomain failed");
+
+g_free(channel);
+return NULL;
+}
+
+channel->handler = handler;
+channel->opaque = opaque;
+channel->notifier.notify = event_notify;
+
+notifier_list_add(&xendev->event_notifiers, &channel->notifier);
+
+return channel;
+}
+
+void xen_device_notify_event_channel(XenDevice *xendev,
+ XenEventChannel *channel,
+ Error **errp)
+{
+if (!channel) {
+error_setg(errp, "bad channel");
+return;
+}
+
+if (xenevtchn_notify(xendev->xeh, channel->local_port) < 0) {
+error_setg_errno(errp, errno, "xenevtchn_notify failed");
+}
+}
+
+void xen_device_unbind_event_channel(XenDevice *xendev,
+ XenEventChannel *channel,
+ Error **errp)
+{
+if (!channel) {
+error_setg(errp, "bad channel");
+return;
+}
+
+notifier_remove(&channel->notifier);
+
+if (xenevtchn_unbind(xendev->xeh, channel->local_port) < 0) {
+error_setg_errno(errp, errno, "xenevtchn_unbind failed");
+}
+
+g_free(channel);
+}
+
 static void xen_device_unrealize(DeviceState *dev, Error **errp)
 {
 XenDevice *xendev = XEN_DEVICE(dev);
@@ -649,6 +724,12 @@ static void xen_device_unrealize(DeviceState *dev, Error 
**errp)
 xen_device_frontend_destroy(xendev);
 xen_device_backend_destroy(xendev);
 
+if (xendev->xeh) {
+qemu_set_fd_handler(xenevtchn_fd(xendev->xeh), NULL, NULL, NULL);
+xenevtchn_close(xendev->xeh);
+xendev->xeh = NULL;
+}
+
 if (xendev->xgth) {
 xengnttab_close(xendev->xgth);
 xendev->xgth = NULL;
@@ -665,6 +746,16 @@ static void xen_device_exit(Notifier *n, void *data)
 xen_device_unrealize(DEVICE(xendev), &error_abort);
 }
 
+static void xen_device_event(void *opaque)
+{
+XenDevice *xendev = opaque;
+unsigned long port = xenevtchn_pending(xendev->xeh);
+
+notifier_list_notify(&xendev->event_notifiers, (void *)port);
+
+xenevtchn_unmask(xendev->xeh, port);
+}
+
 static void xen_device_realize(DeviceState *dev, Error **errp)
 {
 XenDevice *xendev = XEN_DEVICE(dev);
@@ -705,6 +796,16 @@ static void xen_device_realize(DeviceState *dev, Error 
**errp)
 xendev->feature_grant_copy =
 (xengnttab_grant_copy(xendev->xgth, 0, NULL) == 0);
 
+xendev->xeh = xenevtchn_open(NULL, 0);
+if (!xendev->xeh) {
+error_setg_errno(errp, errno, "failed xenevtchn_open");
+goto unrealize;
+}
+
+notifier_list_init(&xendev->event_notifiers);
+qemu_set_fd_handler(xenevtchn_fd(xendev->xeh), xen_device_event, NULL,
+xendev);
+
 xen_device_backend_create(xendev, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h
index 63a09b6..f83a95c 100644
--- a/include/hw/xen/xen-bus.h
+++ b/include/hw/xen/xen-bus.h
@@ -26,6 +26,9 @@ typedef struct XenDevice {
 XenWatch *frontend_state_watch;
 xengnttab_handle *xgth;
 bool feature_grant_copy;
+xenevtchn_handle *xeh;
+xenevtchn_port_or_error_t local_port;
+Notifi

[Qemu-block] [PATCH v2 06/18] xen: add grant table interface for XenDevice-s

2018-12-06 Thread Paul Durrant

The legacy PV backend infrastructure provides functions to map, unmap and
copy pages granted by frontends. Similar functionality will be required
by XenDevice implementations so this patch adds the necessary support.

Signed-off-by: Paul Durrant 
Reviewed-by: Anthony Perard 
---
Cc: Stefano Stabellini 
---
 hw/xen/xen-bus.c | 146 +++
 include/hw/xen/xen-bus.h |  25 
 2 files changed, 171 insertions(+)

diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index f0732f8..b40dc83 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -489,6 +489,138 @@ static void xen_device_frontend_destroy(XenDevice *xendev)
 }
 }
 
+void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs,
+   Error **errp)
+{
+if (xengnttab_set_max_grants(xendev->xgth, nr_refs)) {
+error_setg_errno(errp, errno, "xengnttab_set_max_grants failed");
+}
+}
+
+void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs,
+unsigned int nr_refs, int prot,
+Error **errp)
+{
+void *map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_refs,
+xendev->frontend_id, refs,
+prot);
+
+if (!map) {
+error_setg_errno(errp, errno,
+ "xengnttab_map_domain_grant_refs failed");
+}
+
+return map;
+}
+
+void xen_device_unmap_grant_refs(XenDevice *xendev, void *map,
+ unsigned int nr_refs, Error **errp)
+{
+if (xengnttab_unmap(xendev->xgth, map, nr_refs)) {
+error_setg_errno(errp, errno, "xengnttab_unmap failed");
+}
+}
+
+static void compat_copy_grant_refs(XenDevice *xendev, bool to_domain,
+   XenDeviceGrantCopySegment segs[],
+   unsigned int nr_segs, Error **errp)
+{
+uint32_t *refs = g_new(uint32_t, nr_segs);
+int prot = to_domain ? PROT_WRITE : PROT_READ;
+void *map;
+unsigned int i;
+
+for (i = 0; i < nr_segs; i++) {
+XenDeviceGrantCopySegment *seg = &segs[i];
+
+refs[i] = to_domain ? seg->dest.foreign.ref :
+seg->source.foreign.ref;
+}
+
+map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_segs,
+  xendev->frontend_id, refs,
+  prot);
+if (!map) {
+error_setg_errno(errp, errno,
+ "xengnttab_map_domain_grant_refs failed");
+goto done;
+}
+
+for (i = 0; i < nr_segs; i++) {
+XenDeviceGrantCopySegment *seg = &segs[i];
+void *page = map + (i * XC_PAGE_SIZE);
+
+if (to_domain) {
+memcpy(page + seg->dest.foreign.offset, seg->source.virt,
+   seg->len);
+} else {
+memcpy(seg->dest.virt, page + seg->source.foreign.offset,
+   seg->len);
+}
+}
+
+if (xengnttab_unmap(xendev->xgth, map, nr_segs)) {
+error_setg_errno(errp, errno, "xengnttab_unmap failed");
+}
+
+done:
+g_free(refs);
+}
+
+void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain,
+XenDeviceGrantCopySegment segs[],
+unsigned int nr_segs, Error **errp)
+{
+xengnttab_grant_copy_segment_t *xengnttab_segs;
+unsigned int i;
+
+if (!xendev->feature_grant_copy) {
+compat_copy_grant_refs(xendev, to_domain, segs, nr_segs, errp);
+return;
+}
+
+xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs);
+
+for (i = 0; i < nr_segs; i++) {
+XenDeviceGrantCopySegment *seg = &segs[i];
+xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
+
+if (to_domain) {
+xengnttab_seg->flags = GNTCOPY_dest_gref;
+xengnttab_seg->dest.foreign.domid = xendev->frontend_id;
+xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref;
+xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset;
+xengnttab_seg->source.virt = seg->source.virt;
+} else {
+xengnttab_seg->flags = GNTCOPY_source_gref;
+xengnttab_seg->source.foreign.domid = xendev->frontend_id;
+xengnttab_seg->source.foreign.ref = seg->source.foreign.ref;
+xengnttab_seg->source.foreign.offset =
+seg->source.foreign.offset;
+xengnttab_seg->dest.virt = seg->dest.virt;
+}
+
+xengnttab_seg->len = seg->len;
+}
+
+if (xengnttab_grant_copy(xendev->xgth, nr_segs, xengnttab_segs)) {
+error_setg_errno(errp, errno, "xengnttab_grant_copy failed");
+goto done;
+}
+
+for (i = 0; i < nr_segs; i++) {
+xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
+
+if (xengntta

[Qemu-block] [PATCH v2 01/18] xen: re-name XenDevice to XenLegacyDevice...

2018-12-06 Thread Paul Durrant

...and xen_backend.h to xen-legacy-backend.h

Rather than attempting to convert the existing backend infrastructure to
be QOM compliant (which would be hard to do in an incremental fashion),
subsequent patches will introduce a completely new framework for Xen PV
backends. Hence it is necessary to re-name parts of existing code to avoid
name clashes. The re-named 'legacy' infrastructure will be removed once all
backends have been ported to the new framework.

This patch is purely cosmetic. No functional change.

Signed-off-by: Paul Durrant 
Acked-by: Anthony Perard 
---
Cc: Stefano Stabellini 
Cc: Greg Kurz 
Cc: Kevin Wolf 
Cc: Max Reitz 
Cc: "Marc-André Lureau" 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: "Michael S. Tsirkin" 
Cc: Marcel Apfelbaum 
Cc: Jason Wang 
Cc: Gerd Hoffmann 
---
 hw/9pfs/xen-9p-backend.c|  16 +-
 hw/block/xen_disk.c |  24 +-
 hw/char/xen_console.c   |  12 +-
 hw/display/xenfb.c  |  25 +-
 hw/i386/xen/xen-hvm.c   |   2 +-
 hw/i386/xen/xen-mapcache.c  |   2 +-
 hw/i386/xen/xen_platform.c  |   2 +-
 hw/net/xen_nic.c|  14 +-
 hw/usb/xen-usb.c|  25 +-
 hw/xen/Makefile.objs|   2 +-
 hw/xen/xen-common.c |   2 +-
 hw/xen/xen-legacy-backend.c | 854 
 hw/xen/xen_backend.c| 845 ---
 hw/xen/xen_devconfig.c  |   2 +-
 hw/xen/xen_pt.c |   2 +-
 hw/xen/xen_pt_config_init.c |   2 +-
 hw/xen/xen_pt_graphics.c|   2 +-
 hw/xen/xen_pt_msi.c |   2 +-
 hw/xen/xen_pvdev.c  |  20 +-
 hw/xenpv/xen_domainbuild.c  |   2 +-
 hw/xenpv/xen_machine_pv.c   |   2 +-
 include/hw/xen/xen-legacy-backend.h | 104 +
 include/hw/xen/xen_backend.h|  99 -
 include/hw/xen/xen_pvdev.h  |  38 +-
 24 files changed, 1059 insertions(+), 1041 deletions(-)
 create mode 100644 hw/xen/xen-legacy-backend.c
 delete mode 100644 hw/xen/xen_backend.c
 create mode 100644 include/hw/xen/xen-legacy-backend.h
 delete mode 100644 include/hw/xen/xen_backend.h

diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 3f54a21..3859a06 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -12,7 +12,7 @@
 
 #include "hw/hw.h"
 #include "hw/9pfs/9p.h"
-#include "hw/xen/xen_backend.h"
+#include "hw/xen/xen-legacy-backend.h"
 #include "hw/9pfs/xen-9pfs.h"
 #include "qapi/error.h"
 #include "qemu/config-file.h"
@@ -45,7 +45,7 @@ typedef struct Xen9pfsRing {
 } Xen9pfsRing;
 
 typedef struct Xen9pfsDev {
-struct XenDevice xendev;  /* must be first */
+struct XenLegacyDevice xendev;  /* must be first */
 V9fsState state;
 char *path;
 char *security_model;
@@ -56,7 +56,7 @@ typedef struct Xen9pfsDev {
 Xen9pfsRing *rings;
 } Xen9pfsDev;
 
-static void xen_9pfs_disconnect(struct XenDevice *xendev);
+static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev);
 
 static void xen_9pfs_in_sg(Xen9pfsRing *ring,
struct iovec *in_sg,
@@ -243,7 +243,7 @@ static const V9fsTransport xen_9p_transport = {
 .push_and_notify = xen_9pfs_push_and_notify,
 };
 
-static int xen_9pfs_init(struct XenDevice *xendev)
+static int xen_9pfs_init(struct XenLegacyDevice *xendev)
 {
 return 0;
 }
@@ -305,7 +305,7 @@ static void xen_9pfs_evtchn_event(void *opaque)
 qemu_bh_schedule(ring->bh);
 }
 
-static void xen_9pfs_disconnect(struct XenDevice *xendev)
+static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev)
 {
 Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
 int i;
@@ -321,7 +321,7 @@ static void xen_9pfs_disconnect(struct XenDevice *xendev)
 }
 }
 
-static int xen_9pfs_free(struct XenDevice *xendev)
+static int xen_9pfs_free(struct XenLegacyDevice *xendev)
 {
 Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
 int i;
@@ -354,7 +354,7 @@ static int xen_9pfs_free(struct XenDevice *xendev)
 return 0;
 }
 
-static int xen_9pfs_connect(struct XenDevice *xendev)
+static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
 {
 Error *err = NULL;
 int i;
@@ -467,7 +467,7 @@ out:
 return -1;
 }
 
-static void xen_9pfs_alloc(struct XenDevice *xendev)
+static void xen_9pfs_alloc(struct XenLegacyDevice *xendev)
 {
 xenstore_write_be_str(xendev, "versions", VERSIONS);
 xenstore_write_be_int(xendev, "max-rings", MAX_RINGS);
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 36eff94..75fe55f 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -25,7 +25,7 @@
 #include 
 
 #include "hw/hw.h"
-#include "hw/xen/xen_backend.h"
+#include "hw/xen/xen-legacy-backend.h"
 #include "xen_blkif.h"
 #include "sysemu/blockdev.h"
 #include "sysemu/iothread.h"
@@ -63,7 +63,7 @@ struct ioreq {
 #define MAX_RING_PAGE_OR

[Qemu-block] [PATCH v2 04/18] xen: create xenstore areas for XenDevice-s

2018-12-06 Thread Paul Durrant

This patch adds a new source module, xen-bus-helper.c, which builds on
basic libxenstore primitives to provide functions to create (setting
permissions appropriately) and destroy xenstore areas, and functions to
'printf' and 'scanf' nodes therein. The main xen-bus code then uses
these primitives [1] to initialize and destroy the frontend and backend
areas for a XenDevice during realize and unrealize respectively.

The 'xen-block' implementation is extended with a 'get_name' method that
returns the VBD number. This number is required to 'name' the xenstore
areas.

NOTE: An exit handler is also added to make sure the xenstore areas are
  cleaned up if QEMU terminates without devices being unrealized.

[1] The 'scanf' functions are actually not yet needed, but they will be
needed by code delivered in subsequent patches.

Signed-off-by: Paul Durrant 
---
Cc: Stefano Stabellini 
Cc: Anthony Perard 
Cc: Kevin Wolf 
Cc: Max Reitz 

v2:
 - Fix boilerplate
 - Add error pointers to all xs_node... helpers
 - Add GCC_FMT_ATTR to declarations of printf-like helpers
---
 hw/block/xen-block.c|   9 ++
 hw/xen/Makefile.objs|   2 +-
 hw/xen/trace-events |  12 +-
 hw/xen/xen-bus-helper.c | 147 ++
 hw/xen/xen-bus.c| 319 +++-
 include/hw/xen/xen-bus-helper.h |  34 +
 include/hw/xen/xen-bus.h|  12 ++
 7 files changed, 530 insertions(+), 5 deletions(-)
 create mode 100644 hw/xen/xen-bus-helper.c
 create mode 100644 include/hw/xen/xen-bus-helper.h

diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 78f4218..440bec2 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -13,6 +13,14 @@
 #include "hw/xen/xen-block.h"
 #include "trace.h"
 
+static char *xen_block_get_name(XenDevice *xendev, Error **errp)
+{
+XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
+XenBlockVdev *vdev = &blockdev->vdev;
+
+return g_strdup_printf("%lu", vdev->number);
+}
+
 static void xen_block_unrealize(XenDevice *xendev, Error **errp)
 {
 XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
@@ -266,6 +274,7 @@ static void xen_block_class_init(ObjectClass *class, void 
*data)
 DeviceClass *dev_class = DEVICE_CLASS(class);
 XenDeviceClass *xendev_class = XEN_DEVICE_CLASS(class);
 
+xendev_class->get_name = xen_block_get_name;
 xendev_class->realize = xen_block_realize;
 xendev_class->unrealize = xen_block_unrealize;
 
diff --git a/hw/xen/Makefile.objs b/hw/xen/Makefile.objs
index d9d6d7b..77c0868 100644
--- a/hw/xen/Makefile.objs
+++ b/hw/xen/Makefile.objs
@@ -1,5 +1,5 @@
 # xen backend driver support
-common-obj-$(CONFIG_XEN) += xen-legacy-backend.o xen_devconfig.o xen_pvdev.o 
xen-common.o xen-bus.o
+common-obj-$(CONFIG_XEN) += xen-legacy-backend.o xen_devconfig.o xen_pvdev.o 
xen-common.o xen-bus.o xen-bus-helper.o
 
 obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen-host-pci-device.o
 obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt.o xen_pt_config_init.o 
xen_pt_graphics.o xen_pt_msi.o
diff --git a/hw/xen/trace-events b/hw/xen/trace-events
index 0172cd4..75dc226 100644
--- a/hw/xen/trace-events
+++ b/hw/xen/trace-events
@@ -16,5 +16,13 @@ xen_domid_restrict(int err) "err: %u"
 # include/hw/xen/xen-bus.c
 xen_bus_realize(void) ""
 xen_bus_unrealize(void) ""
-xen_device_realize(const char *type) "type: %s"
-xen_device_unrealize(const char *type) "type: %s"
+xen_device_realize(const char *type, char *name) "type: %s name: %s"
+xen_device_unrealize(const char *type, char *name) "type: %s name: %s"
+xen_device_backend_state(const char *type, char *name, const char *state) 
"type: %s name: %s -> %s"
+xen_device_frontend_state(const char *type, char *name, const char *state) 
"type: %s name: %s -> %s"
+
+# include/hw/xen/xen-bus-helper.c
+xs_node_create(const char *node) "%s"
+xs_node_destroy(const char *node) "%s"
+xs_node_vprintf(char *path, char *value) "%s %s"
+xs_node_vscanf(char *path, char *value) "%s %s"
diff --git a/hw/xen/xen-bus-helper.c b/hw/xen/xen-bus-helper.c
new file mode 100644
index 000..2304f8e
--- /dev/null
+++ b/hw/xen/xen-bus-helper.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2018  Citrix Systems Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/hw.h"
+#include "hw/sysbus.h"
+#include "hw/xen/xen.h"
+#include "hw/xen/xen-bus.h"
+#include "hw/xen/xen-bus-helper.h"
+#include "qapi/error.h"
+
+#include 
+
+struct xs_state {
+enum xenbus_state statenum;
+const char *statestr;
+};
+#define XS_STATE(state) { state, #state }
+
+static struct xs_state xs_state[] = {
+XS_STATE(XenbusStateUnknown),
+XS_STATE(XenbusStateInitialising),
+XS_STATE(XenbusStateInitWait),
+XS_STATE(XenbusStateInitialised),
+XS_STATE(XenbusStateConnected),
+XS_STATE(XenbusStateClosing),
+XS_STATE(XenbusStateClosed),
+XS_ST

[Qemu-block] [PATCH v2 00/18] Xen PV backend 'qdevification'

2018-12-06 Thread Paul Durrant

This series introduces a new QOM compliant framework for Xen PV backends.
This is achieved by first moving the current non-compliant framework aside,
before building up a new framework incrementally.

This series was prompted by a thread [1] started by Kevin Wolf in response
to patches against xen_disk.c posted by Tim Smith. Therefore, alongside
the patches introducing the new framework, other patches build up a QOM
compliant replacement for 'xen_disk', called 'xen-qdisk'. Patch #16 swaps
this new device into place (having establisheda mechanism to auto-
instantiate devices that is compliant with existing Xen toolstacks in
patch #15) and patch #18 then removes the old xen_disk code.

Subsequent series will port other Xen PV backends across to the new
framework.

The series is also available as a repository branch [2] on xenbits.xen.org.

[1] https://lists.gnu.org/archive/html/qemu-devel/2018-11/msg00259.html
[2] 
http://xenbits.xen.org/gitweb/?p=people/pauldu/qemu.git;a=shortlog;h=refs/heads/qom27

Paul Durrant (18):
  xen: re-name XenDevice to XenLegacyDevice...
  xen: introduce new 'XenBus' and 'XenDevice' object hierarchy
  xen: introduce 'xen-block', 'xen-disk' and 'xen-cdrom'
  xen: create xenstore areas for XenDevice-s
  xen: add xenstore watcher infrastructure
  xen: add grant table interface for XenDevice-s
  xen: add event channel interface for XenDevice-s
  xen: duplicate xen_disk.c as basis of dataplane/xen-block.c
  xen: remove unnecessary code from dataplane/xen-block.c
  xen: add header and build dataplane/xen-block.c
  xen: remove 'XenBlkDev' and 'blkdev' names from dataplane/xen-block
  xen: remove 'ioreq' struct/varable/field names from
dataplane/xen-block.c
  xen: purge 'blk' and 'ioreq' from function names in
dataplane/xen-block.c
  xen: add implementations of xen-block connect and disconnect
functions...
  xen: add a mechanism to automatically create XenDevice-s...
  xen: automatically create XenBlockDevice-s
  MAINTAINERS: add myself as a Xen maintainer
  xen: remove the legacy 'xen_disk' backend

 MAINTAINERS |5 +-
 hw/9pfs/xen-9p-backend.c|   16 +-
 hw/block/Makefile.objs  |2 +-
 hw/block/dataplane/Makefile.objs|1 +
 hw/block/dataplane/xen-block.c  |  814 ++
 hw/block/dataplane/xen-block.h  |   29 +
 hw/block/trace-events   |   11 +
 hw/block/xen-block.c|  853 +++
 hw/block/xen_disk.c | 1011 
 hw/char/xen_console.c   |   12 +-
 hw/display/xenfb.c  |   25 +-
 hw/i386/xen/xen-hvm.c   |5 +-
 hw/i386/xen/xen-mapcache.c  |2 +-
 hw/i386/xen/xen_platform.c  |2 +-
 hw/net/xen_nic.c|   14 +-
 hw/usb/xen-usb.c|   25 +-
 hw/xen/Makefile.objs|2 +-
 hw/xen/trace-events |   25 +
 hw/xen/xen-backend.c|   69 +++
 hw/xen/xen-bus-helper.c |  181 ++
 hw/xen/xen-bus.c| 1094 +++
 hw/xen/xen-common.c |2 +-
 hw/xen/xen-legacy-backend.c |  853 +++
 hw/xen/xen_backend.c|  845 ---
 hw/xen/xen_devconfig.c  |2 +-
 hw/xen/xen_pt.c |2 +-
 hw/xen/xen_pt_config_init.c |2 +-
 hw/xen/xen_pt_graphics.c|2 +-
 hw/xen/xen_pt_msi.c |2 +-
 hw/xen/xen_pvdev.c  |   20 +-
 hw/xenpv/xen_domainbuild.c  |2 +-
 hw/xenpv/xen_machine_pv.c   |5 +-
 include/hw/xen/xen-backend.h|   26 +
 include/hw/xen/xen-block.h  |   79 +++
 include/hw/xen/xen-bus-helper.h |   40 ++
 include/hw/xen/xen-bus.h|  138 +
 include/hw/xen/xen-legacy-backend.h |  104 
 include/hw/xen/xen_backend.h|   99 
 include/hw/xen/xen_pvdev.h  |   38 +-
 include/qemu/module.h   |3 +
 40 files changed, 4420 insertions(+), 2042 deletions(-)
 create mode 100644 hw/block/dataplane/xen-block.c
 create mode 100644 hw/block/dataplane/xen-block.h
 create mode 100644 hw/block/xen-block.c
 delete mode 100644 hw/block/xen_disk.c
 create mode 100644 hw/xen/xen-backend.c
 create mode 100644 hw/xen/xen-bus-helper.c
 create mode 100644 hw/xen/xen-bus.c
 create mode 100644 hw/xen/xen-legacy-backend.c
 delete mode 100644 hw/xen/xen_backend.c
 create mode 100644 include/hw/xen/xen-backend.h
 create mode 100644 include/hw/xen/xen-block.h
 create mode 100644 include/hw/xen/xen-bus-helper.h
 create mode 100644 include/hw/xen/xen-bus.h
 create mode 100644 include/hw/xen/xen-legacy-backend.h
 delete mode 100644 include/hw/xen/xen_backend.h
---
Cc: Anthony Perard 
Cc: Eduardo Habkost 
Cc: Gerd Hoffmann 
Cc: Greg Kurz 
Cc: Jason Wang 
Cc: Kevin Wolf 
Cc: "Marc-André Lureau"

[Qemu-block] [PATCH v2 05/18] xen: add xenstore watcher infrastructure

2018-12-06 Thread Paul Durrant

A Xen PV frontend communicates its state to the PV backend by writing to
the 'state' key in the frontend area in xenstore. It is therefore
necessary for a XenDevice implementation to be notified whenever the
value of this key changes.

This patch adds code to do this as follows:

- an 'fd handler' is registered on the libxenstore handle which will be
  triggered whenever a 'watch' event occurs
- primitives are added to xen-bus-helper to add or remove watch events
- a list of Notifier objects is added to XenBus to provide a mechanism
  to call the appropriate 'watch handler' when its associated event
  occurs

The xen-block implementation is extended with a 'frontend_changed' method,
which calls as-yet stub 'connect' and 'disconnect' functions when the
relevant frontend state transitions occur. A subsequent patch will supply
a full implementation for these functions.

Signed-off-by: Paul Durrant 
---
Cc: Kevin Wolf 
Cc: Max Reitz 
Cc: Stefano Stabellini 
Cc: Anthony Perard 

v2:
 - Don't crash when xen_block_disconnect() fails
 - Check xs_unwatch() for error
 - Add new_watch() and free_watch() utility functions
 - Use xs_check_watch() rather than xs_read_watch()
---
 hw/block/trace-events   |   2 +
 hw/block/xen-block.c|  73 ++
 hw/xen/trace-events |   6 ++
 hw/xen/xen-bus-helper.c |  34 +++
 hw/xen/xen-bus.c| 217 +++-
 include/hw/xen/xen-bus-helper.h |   6 ++
 include/hw/xen/xen-bus.h|  15 +++
 7 files changed, 351 insertions(+), 2 deletions(-)

diff --git a/hw/block/trace-events b/hw/block/trace-events
index 4afbd62..89e2583 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -130,6 +130,8 @@ xen_disk_free(char *name) "%s"
 
 # hw/block/xen-block.c
 xen_block_realize(const char *type, uint32_t disk, uint32_t partition) "%s 
d%up%u"
+xen_block_connect(const char *type, uint32_t disk, uint32_t partition) "%s 
d%up%u"
+xen_block_disconnect(const char *type, uint32_t disk, uint32_t partition) "%s 
d%up%u"
 xen_block_unrealize(const char *type, uint32_t disk, uint32_t partition) "%s 
d%up%u"
 xen_disk_realize(void) ""
 xen_disk_unrealize(void) ""
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 440bec2..d2334ef 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -21,6 +21,24 @@ static char *xen_block_get_name(XenDevice *xendev, Error 
**errp)
 return g_strdup_printf("%lu", vdev->number);
 }
 
+static void xen_block_disconnect(XenDevice *xendev, Error **errp)
+{
+XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
+const char *type = object_get_typename(OBJECT(blockdev));
+XenBlockVdev *vdev = &blockdev->vdev;
+
+trace_xen_block_disconnect(type, vdev->disk, vdev->partition);
+}
+
+static void xen_block_connect(XenDevice *xendev, Error **errp)
+{
+XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
+const char *type = object_get_typename(OBJECT(blockdev));
+XenBlockVdev *vdev = &blockdev->vdev;
+
+trace_xen_block_connect(type, vdev->disk, vdev->partition);
+}
+
 static void xen_block_unrealize(XenDevice *xendev, Error **errp)
 {
 XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
@@ -36,6 +54,12 @@ static void xen_block_unrealize(XenDevice *xendev, Error 
**errp)
 
 trace_xen_block_unrealize(type, vdev->disk, vdev->partition);
 
+/* Disconnect from the frontend in case this has not already happened */
+xen_block_disconnect(xendev, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+}
+
 if (blockdev_class->unrealize) {
 blockdev_class->unrealize(blockdev, &local_err);
 if (local_err) {
@@ -68,6 +92,54 @@ static void xen_block_realize(XenDevice *xendev, Error 
**errp)
 }
 }
 
+static void xen_block_frontend_changed(XenDevice *xendev,
+   enum xenbus_state frontend_state,
+   Error **errp)
+{
+enum xenbus_state backend_state = xen_device_backend_get_state(xendev);
+Error *local_err = NULL;
+
+switch (frontend_state) {
+case XenbusStateInitialised:
+case XenbusStateConnected:
+if (backend_state == XenbusStateConnected) {
+break;
+}
+
+xen_block_disconnect(xendev, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+break;
+}
+
+xen_block_connect(xendev, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+break;
+}
+
+xen_device_backend_set_state(xendev, XenbusStateConnected);
+break;
+
+case XenbusStateClosing:
+xen_device_backend_set_state(xendev, XenbusStateClosing);
+break;
+
+case XenbusStateClosed:
+xen_block_disconnect(xendev, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+break;
+}
+
+xen_dev

[Qemu-block] [PATCH v2 02/18] xen: introduce new 'XenBus' and 'XenDevice' object hierarchy

2018-12-06 Thread Paul Durrant

This patch adds the basic boilerplate for a 'XenBus' object that will act
as a parent to 'XenDevice' PV backends.
A new 'XenBridge' object is also added to connect XenBus to the system bus.

The XenBus object is instantiated by a new xen_bus_init() function called
from the same sites as the legacy xen_be_init() function.

Subsequent patches will flesh-out the functionality of these objects.

Signed-off-by: Paul Durrant 
---
Cc: Stefano Stabellini 
Cc: Anthony Perard 
Cc: "Michael S. Tsirkin" 
Cc: Marcel Apfelbaum 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 

v2:
 - Fix boilerplate
 - Make xen-bus hotplug capable
---
 hw/i386/xen/xen-hvm.c |   3 ++
 hw/xen/Makefile.objs  |   2 +-
 hw/xen/trace-events   |   6 +++
 hw/xen/xen-bus.c  | 131 ++
 hw/xenpv/xen_machine_pv.c |   3 ++
 include/hw/xen/xen-bus.h  |  55 +++
 6 files changed, 199 insertions(+), 1 deletion(-)
 create mode 100644 hw/xen/xen-bus.c
 create mode 100644 include/hw/xen/xen-bus.h

diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 1d63763..4497f75 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -17,6 +17,7 @@
 #include "hw/i386/apic-msidef.h"
 #include "hw/xen/xen_common.h"
 #include "hw/xen/xen-legacy-backend.h"
+#include "hw/xen/xen-bus.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-misc.h"
 #include "qemu/error-report.h"
@@ -1479,6 +1480,8 @@ void xen_hvm_init(PCMachineState *pcms, MemoryRegion 
**ram_memory)
 QLIST_INIT(&state->dev_list);
 device_listener_register(&state->device_listener);
 
+xen_bus_init();
+
 /* Initialize backend core & drivers */
 if (xen_be_init() != 0) {
 error_report("xen backend core setup failed");
diff --git a/hw/xen/Makefile.objs b/hw/xen/Makefile.objs
index 3f64a44..d9d6d7b 100644
--- a/hw/xen/Makefile.objs
+++ b/hw/xen/Makefile.objs
@@ -1,5 +1,5 @@
 # xen backend driver support
-common-obj-$(CONFIG_XEN) += xen-legacy-backend.o xen_devconfig.o xen_pvdev.o 
xen-common.o
+common-obj-$(CONFIG_XEN) += xen-legacy-backend.o xen_devconfig.o xen_pvdev.o 
xen-common.o xen-bus.o
 
 obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen-host-pci-device.o
 obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt.o xen_pt_config_init.o 
xen_pt_graphics.o xen_pt_msi.o
diff --git a/hw/xen/trace-events b/hw/xen/trace-events
index c7e7a3b..0172cd4 100644
--- a/hw/xen/trace-events
+++ b/hw/xen/trace-events
@@ -12,3 +12,9 @@ xen_unmap_portio_range(uint32_t id, uint64_t start_addr, 
uint64_t end_addr) "id:
 xen_map_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u 
bdf: %02x.%02x.%02x"
 xen_unmap_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u 
bdf: %02x.%02x.%02x"
 xen_domid_restrict(int err) "err: %u"
+
+# include/hw/xen/xen-bus.c
+xen_bus_realize(void) ""
+xen_bus_unrealize(void) ""
+xen_device_realize(const char *type) "type: %s"
+xen_device_unrealize(const char *type) "type: %s"
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
new file mode 100644
index 000..1385bab
--- /dev/null
+++ b/hw/xen/xen-bus.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2018  Citrix Systems Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/hw.h"
+#include "hw/sysbus.h"
+#include "hw/xen/xen-bus.h"
+#include "qapi/error.h"
+#include "trace.h"
+
+static void xen_bus_unrealize(BusState *bus, Error **errp)
+{
+trace_xen_bus_unrealize();
+}
+
+static void xen_bus_realize(BusState *bus, Error **errp)
+{
+trace_xen_bus_realize();
+}
+
+static void xen_bus_class_init(ObjectClass *class, void *data)
+{
+BusClass *bus_class = BUS_CLASS(class);
+
+bus_class->realize = xen_bus_realize;
+bus_class->unrealize = xen_bus_unrealize;
+}
+
+static const TypeInfo xen_bus_type_info = {
+.name = TYPE_XEN_BUS,
+.parent = TYPE_BUS,
+.instance_size = sizeof(XenBus),
+.class_size = sizeof(XenBusClass),
+.class_init = xen_bus_class_init,
+.interfaces = (InterfaceInfo[]) {
+{ TYPE_HOTPLUG_HANDLER },
+{ }
+},
+};
+
+static void xen_device_unrealize(DeviceState *dev, Error **errp)
+{
+XenDevice *xendev = XEN_DEVICE(dev);
+XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev);
+const char *type = object_get_typename(OBJECT(xendev));
+Error *local_err = NULL;
+
+trace_xen_device_unrealize(type);
+
+if (xendev_class->unrealize) {
+xendev_class->unrealize(xendev, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+}
+}
+}
+
+static void xen_device_realize(DeviceState *dev, Error **errp)
+{
+XenDevice *xendev = XEN_DEVICE(dev);
+XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev);
+const char *type = object_get_typename(OBJECT(xendev));
+Error *local_err = NULL;
+
+trace_xen_device_realize(type);
+
+

[Qemu-block] [PATCH v3 1/4] vmdk: Refactor vmdk_create_extent

2018-12-06 Thread Kevin Wolf

From: Fam Zheng 

The extracted vmdk_init_extent takes a BlockBackend object and
initializes the format metadata. It is the common part between "qemu-img
create" and "blockdev-create".

Add a "BlockBackend *pbb" parameter to vmdk_create_extent, to return the
opened BB to the caller in the next patch.

Signed-off-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/vmdk.c | 69 
 1 file changed, 43 insertions(+), 26 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 2c9e86d98f..32fc2c84b3 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1741,35 +1741,17 @@ static int coroutine_fn 
vmdk_co_pwrite_zeroes(BlockDriverState *bs,
 return ret;
 }
 
-static int vmdk_create_extent(const char *filename, int64_t filesize,
-  bool flat, bool compress, bool zeroed_grain,
-  QemuOpts *opts, Error **errp)
+static int vmdk_init_extent(BlockBackend *blk,
+int64_t filesize, bool flat,
+bool compress, bool zeroed_grain,
+Error **errp)
 {
 int ret, i;
-BlockBackend *blk = NULL;
 VMDK4Header header;
-Error *local_err = NULL;
 uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
 uint32_t *gd_buf = NULL;
 int gd_buf_size;
 
-ret = bdrv_create_file(filename, opts, &local_err);
-if (ret < 0) {
-error_propagate(errp, local_err);
-goto exit;
-}
-
-blk = blk_new_open(filename, NULL, NULL,
-   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
-   &local_err);
-if (blk == NULL) {
-error_propagate(errp, local_err);
-ret = -EIO;
-goto exit;
-}
-
-blk_set_allow_write_beyond_eof(blk, true);
-
 if (flat) {
 ret = blk_truncate(blk, filesize, PREALLOC_MODE_OFF, errp);
 goto exit;
@@ -1863,15 +1845,50 @@ static int vmdk_create_extent(const char *filename, 
int64_t filesize,
  gd_buf, gd_buf_size, 0);
 if (ret < 0) {
 error_setg(errp, QERR_IO_ERROR);
-goto exit;
 }
 
 ret = 0;
+exit:
+g_free(gd_buf);
+return ret;
+}
+
+static int vmdk_create_extent(const char *filename, int64_t filesize,
+  bool flat, bool compress, bool zeroed_grain,
+  BlockBackend **pbb,
+  QemuOpts *opts, Error **errp)
+{
+int ret;
+BlockBackend *blk = NULL;
+Error *local_err = NULL;
+
+ret = bdrv_create_file(filename, opts, &local_err);
+if (ret < 0) {
+error_propagate(errp, local_err);
+goto exit;
+}
+
+blk = blk_new_open(filename, NULL, NULL,
+   BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+   &local_err);
+if (blk == NULL) {
+error_propagate(errp, local_err);
+ret = -EIO;
+goto exit;
+}
+
+blk_set_allow_write_beyond_eof(blk, true);
+
+ret = vmdk_init_extent(blk, filesize, flat, compress, zeroed_grain, errp);
 exit:
 if (blk) {
-blk_unref(blk);
+if (pbb) {
+*pbb = blk;
+} else {
+blk_unref(blk);
+blk = NULL;
+}
 }
-g_free(gd_buf);
 return ret;
 }
 
@@ -2094,7 +2111,7 @@ static int coroutine_fn vmdk_co_create_opts(const char 
*filename, QemuOpts *opts
 snprintf(ext_filename, PATH_MAX, "%s%s", path, desc_filename);
 
 if (vmdk_create_extent(ext_filename, size,
-   flat, compress, zeroed_grain, opts, errp)) {
+   flat, compress, zeroed_grain, NULL, opts, 
errp)) {
 ret = -EINVAL;
 goto exit;
 }
-- 
2.19.2

[Qemu-block] [PATCH v3 3/4] iotests: Filter cid numbers in VMDK extent info

2018-12-06 Thread Kevin Wolf

From: Fam Zheng 

Signed-off-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/common.filter | 1 +
 tests/qemu-iotests/iotests.py| 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 2031e353a5..1aa7d57140 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -165,6 +165,7 @@ _filter_img_info()
 -e "/table_size: [0-9]\\+/d" \
 -e "/compat: '[^']*'/d" \
 -e "/compat6: \\(on\\|off\\)/d" \
+-e "s/cid: [0-9]\+/cid: XX/" \
 -e "/static: \\(on\\|off\\)/d" \
 -e "/zeroed_grain: \\(on\\|off\\)/d" \
 -e "/subformat: '[^']*'/d" \
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index d537538ba0..4142937239 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -248,6 +248,7 @@ def filter_img_info(output, filename):
.replace(imgfmt, 'IMGFMT')
 line = re.sub('iters: [0-9]+', 'iters: XXX', line)
 line = re.sub('uuid: [-a-f0-9]+', 'uuid: 
----', line)
+line = re.sub('cid: [0-9]+', 'cid: XX', line)
 lines.append(line)
 return '\n'.join(lines)
 
-- 
2.19.2

[Qemu-block] [PATCH v3 2/4] vmdk: Implement .bdrv_co_create callback

2018-12-06 Thread Kevin Wolf

From: Fam Zheng 

This makes VMDK support blockdev-create. The implementation reuses the
image creation code in vmdk_co_create_opts which now acceptes a callback
pointer to "retrieve" BlockBackend pointers from the caller. This way we
separate the logic between file/extent acquisition and initialization.

The QAPI command parameters are mostly the same as the old create_opts
except the dropped legacy @compat6 switch, which is redundant with
@hwversion.

Signed-off-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 qapi/block-core.json  |  70 +++
 qapi/qapi-schema.json |   1 +
 block/vmdk.c  | 452 ++
 3 files changed, 396 insertions(+), 127 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index d4fe710836..4778f88dd8 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4021,6 +4021,75 @@
 'size': 'size',
 '*cluster-size' :   'size' } }
 
+##
+# @BlockdevVmdkSubformat:
+#
+# Subformat options for VMDK images
+#
+# @monolithicSparse: Single file image with sparse cluster allocation
+#
+# @monolithicFlat:   Single flat data image and a descriptor file
+#
+# @twoGbMaxExtentSparse: Data is split into 2GB (per virtual LBA) sparse extent
+#files, in addition to a descriptor file
+#
+# @twoGbMaxExtentFlat:   Data is split into 2GB (per virtual LBA) flat extent
+#files, in addition to a descriptor file
+#
+# @streamOptimized:  Single file image sparse cluster allocation, optimized
+#for streaming over network.
+#
+# Since: 4.0
+##
+{ 'enum': 'BlockdevVmdkSubformat',
+  'data': [ 'monolithicSparse', 'monolithicFlat', 'twoGbMaxExtentSparse',
+'twoGbMaxExtentFlat', 'streamOptimized'] }
+
+##
+# @BlockdevVmdkAdapterType:
+#
+# Adapter type info for VMDK images
+#
+# Since: 4.0
+##
+{ 'enum': 'BlockdevVmdkAdapterType',
+  'data': [ 'ide', 'buslogic', 'lsilogic', 'legacyesx'] }
+
+##
+# @BlockdevCreateOptionsVmdk:
+#
+# Driver specific image creation options for VMDK.
+#
+# @file Where to store the new image file. This refers to the image
+#   file for monolithcSparse and streamOptimized format, or the
+#   descriptor file for other formats.
+# @size Size of the virtual disk in bytes
+# @extents  Where to store the data extents. Required for monolithcflat,
+#   twoGbMaxExtentSparse and twoGbMaxExtentFlat formats. For
+#   monolithicflat, only one entry is required; for
+#   twoGbMaxExtent* formats, the number of entries required is
+#   calculated as extent_number = virtual_size / 2GB.
+# @subformatThe subformat of the VMDK image. Default: "monolithicsparse".
+# @backing-file The path of backing file. Default: no backing file is used.
+# @adapter-type The adapter type used to fill in the descriptor. Default: ide.
+# @hwversionHardware version. The meaningful options are "4" or "6".
+#   Defaulted to "4".
+# @zeroed-grain Whether to enable zeroed-grain feature for sparse subformats.
+#   Default: false.
+#
+# Since: 4.0
+##
+{ 'struct': 'BlockdevCreateOptionsVmdk',
+  'data': { 'file': 'BlockdevRef',
+'size': 'size',
+'*extents':  ['BlockdevRef'],
+'*subformat':   'BlockdevVmdkSubformat',
+'*backing-file':'str',
+'*adapter-type':'BlockdevVmdkAdapterType',
+'*hwversion':   'str',
+'*zeroed-grain':'bool' } }
+
+
 ##
 # @SheepdogRedundancyType:
 #
@@ -4215,6 +4284,7 @@
   'ssh':'BlockdevCreateOptionsSsh',
   'vdi':'BlockdevCreateOptionsVdi',
   'vhdx':   'BlockdevCreateOptionsVhdx',
+  'vmdk':   'BlockdevCreateOptionsVmdk',
   'vpc':'BlockdevCreateOptionsVpc'
   } }
 
diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
index 65b6dc2f6f..78e8bcd561 100644
--- a/qapi/qapi-schema.json
+++ b/qapi/qapi-schema.json
@@ -66,6 +66,7 @@
 'ACPISlotType', # DIMM, visible through query-acpi-ospm-status
 'CpuInfoMIPS',  # PC, visible through query-cpu
 'CpuInfoTricore',   # PC, visible through query-cpu
+'BlockdevVmdkSubformat',# all members, to match VMDK spec spellings
 'QapiErrorClass',   # all members, visible through errors
 'UuidInfo', # UUID, visible through query-uuid
 'X86CPURegister32', # all members, visible indirectly through 
qom-get
diff --git a/block/vmdk.c b/block/vmdk.c
index 32fc2c84b3..16f86457d7 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1932,33 +1932,56 @@ static int filename_decompose(const char *filename, 
char *path, char *prefix,
 return VMDK_OK;
 }
 
-static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts 
*opts,
-

[Qemu-block] [PATCH v2 03/18] xen: introduce 'xen-block', 'xen-disk' and 'xen-cdrom'

2018-12-06 Thread Paul Durrant

This patch adds new XenDevice-s: 'xen-disk' and 'xen-cdrom', both derived
from a common 'xen-block' parent type. These will eventually replace the
'xen_disk' (note the underscore rather than hyphen) legacy PV backend but
it is illustrative to build up the implementation incrementally, along with
the XenBus/XenDevice framework. Subsequent patches will therefore add to
these devices' implementation as new features are added to the framework.

After this patch has been applied it is possible to instantiate new
'xen-disk' or 'xen-cdrom' devices with a single 'vdev' parameter, which
accepts values adhering to the Xen VBD naming scheme [1]. For example, a
command-line instantiation of a xen-disk can be done with an argument
similar to the following:

-device xen-disk,vdev=hda

The implementation of the vdev parameter formulates the appropriate VBD
number for use in the PV protocol.

[1] https://xenbits.xen.org/docs/unstable/man/xen-vbd-interface.7.html

Signed-off-by: Paul Durrant 
---
Cc: Kevin Wolf 
Cc: Max Reitz 
Cc: Stefano Stabellini 
Cc: Anthony Perard 

v2:
 - Fix boilerplate
 - Fix vdev parsing
 - Change name from 'xen-qdisk' to 'xen-block', make abstract, and split
   off 'xen-disk' and 'xen-cdrom' as concrete sub-types
---
 MAINTAINERS|   2 +-
 hw/block/Makefile.objs |   1 +
 hw/block/trace-events  |   8 ++
 hw/block/xen-block.c   | 347 +
 include/hw/xen/xen-block.h |  69 +
 5 files changed, 426 insertions(+), 1 deletion(-)
 create mode 100644 hw/block/xen-block.c
 create mode 100644 include/hw/xen/xen-block.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 63effdc..dd728c3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -403,7 +403,7 @@ F: hw/9pfs/xen-9p-backend.c
 F: hw/char/xen_console.c
 F: hw/display/xenfb.c
 F: hw/net/xen_nic.c
-F: hw/block/xen_*
+F: hw/block/xen*
 F: hw/xen/
 F: hw/xenpv/
 F: hw/i386/xen/
diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
index 53ce575..f34813a 100644
--- a/hw/block/Makefile.objs
+++ b/hw/block/Makefile.objs
@@ -4,6 +4,7 @@ common-obj-$(CONFIG_SSI_M25P80) += m25p80.o
 common-obj-$(CONFIG_NAND) += nand.o
 common-obj-$(CONFIG_PFLASH_CFI01) += pflash_cfi01.o
 common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
+common-obj-$(CONFIG_XEN) += xen-block.o
 common-obj-$(CONFIG_XEN) += xen_disk.o
 common-obj-$(CONFIG_ECC) += ecc.o
 common-obj-$(CONFIG_ONENAND) += onenand.o
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 335c092..4afbd62 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -127,3 +127,11 @@ xen_disk_init(char *name) "%s"
 xen_disk_connect(char *name) "%s"
 xen_disk_disconnect(char *name) "%s"
 xen_disk_free(char *name) "%s"
+
+# hw/block/xen-block.c
+xen_block_realize(const char *type, uint32_t disk, uint32_t partition) "%s 
d%up%u"
+xen_block_unrealize(const char *type, uint32_t disk, uint32_t partition) "%s 
d%up%u"
+xen_disk_realize(void) ""
+xen_disk_unrealize(void) ""
+xen_cdrom_realize(void) ""
+xen_cdrom_unrealize(void) ""
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
new file mode 100644
index 000..78f4218
--- /dev/null
+++ b/hw/block/xen-block.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (c) 2018  Citrix Systems Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "hw/hw.h"
+#include "hw/xen/xen-block.h"
+#include "trace.h"
+
+static void xen_block_unrealize(XenDevice *xendev, Error **errp)
+{
+XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
+XenBlockDeviceClass *blockdev_class =
+XEN_BLOCK_DEVICE_GET_CLASS(xendev);
+const char *type = object_get_typename(OBJECT(blockdev));
+XenBlockVdev *vdev = &blockdev->vdev;
+Error *local_err = NULL;
+
+if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID) {
+return;
+}
+
+trace_xen_block_unrealize(type, vdev->disk, vdev->partition);
+
+if (blockdev_class->unrealize) {
+blockdev_class->unrealize(blockdev, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+}
+}
+}
+
+static void xen_block_realize(XenDevice *xendev, Error **errp)
+{
+XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
+XenBlockDeviceClass *blockdev_class =
+XEN_BLOCK_DEVICE_GET_CLASS(xendev);
+const char *type = object_get_typename(OBJECT(blockdev));
+XenBlockVdev *vdev = &blockdev->vdev;
+Error *local_err = NULL;
+
+if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID) {
+error_setg(errp, "vdev property not set");
+return;
+}
+
+trace_xen_block_realize(type, vdev->disk, vdev->partition);
+
+if (blockdev_class->realize) {
+blockdev_class->realize(blockdev, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+}
+

[Qemu-block] [PATCH v2 09/18] xen: remove unnecessary code from dataplane/xen-block.c

2018-12-06 Thread Paul Durrant

Not all of the code duplicated from xen_disk.c is required as the basis for
the new dataplane implementation so this patch removes extraneous code,
along with the legacy #includes and calls to the legacy xen_pv_printf()
function. Error messages are changed to be reported using error_report().

NOTE: The code is still not yet built. Further transformations will be
  required to make it correctly interface to the new XenBus/XenDevice
  framework. They will be delivered in a subsequent patch.

Signed-off-by: Paul Durrant 
Acked-by: Anthony Perard 
---
Cc: Stefano Stabellini 
Cc: Stefan Hajnoczi 
Cc: Kevin Wolf 
Cc: Max Reitz 

v2:
 - Leave existing boilerplate alone, other than removing the now-incorrect
   description
---
 hw/block/dataplane/xen-block.c | 409 ++---
 1 file changed, 16 insertions(+), 393 deletions(-)

diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 9fae505..98f987d 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -1,6 +1,4 @@
 /*
- *  xen paravirt block device backend
- *
  *  (c) Gerd Hoffmann 
  *
  *  This program is free software; you can redistribute it and/or modify
@@ -19,26 +17,12 @@
  *  GNU GPL, version 2 or (at your option) any later version.
  */
 
-#include "qemu/osdep.h"
-#include "qemu/units.h"
-#include 
-#include 
-
-#include "hw/hw.h"
-#include "hw/xen/xen_backend.h"
-#include "xen_blkif.h"
-#include "sysemu/blockdev.h"
-#include "sysemu/iothread.h"
-#include "sysemu/block-backend.h"
-#include "qapi/error.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qstring.h"
-#include "trace.h"
-
-/* - */
-
-#define BLOCK_SIZE  512
-#define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
+/*
+ * Copyright (c) 2018  Citrix Systems Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
 
 struct ioreq {
 blkif_request_t req;
@@ -101,8 +85,6 @@ struct XenBlkDev {
 AioContext  *ctx;
 };
 
-/* - */
-
 static void ioreq_reset(struct ioreq *ioreq)
 {
 memset(&ioreq->req, 0, sizeof(ioreq->req));
@@ -183,11 +165,6 @@ static int ioreq_parse(struct ioreq *ioreq)
 size_t len;
 int i;
 
-xen_pv_printf(
-xendev, 3,
-"op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n",
-ioreq->req.operation, ioreq->req.nr_segments,
-ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
 switch (ioreq->req.operation) {
 case BLKIF_OP_READ:
 break;
@@ -202,28 +179,27 @@ static int ioreq_parse(struct ioreq *ioreq)
 case BLKIF_OP_DISCARD:
 return 0;
 default:
-xen_pv_printf(xendev, 0, "error: unknown operation (%d)\n",
-  ioreq->req.operation);
+error_report("error: unknown operation (%d)", ioreq->req.operation);
 goto err;
 };
 
 if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
-xen_pv_printf(xendev, 0, "error: write req for ro device\n");
+error_report("error: write req for ro device");
 goto err;
 }
 
 ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
 for (i = 0; i < ioreq->req.nr_segments; i++) {
 if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
-xen_pv_printf(xendev, 0, "error: nr_segments too big\n");
+error_report("error: nr_segments too big");
 goto err;
 }
 if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) {
-xen_pv_printf(xendev, 0, "error: first > last sector\n");
+error_report("error: first > last sector");
 goto err;
 }
 if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
-xen_pv_printf(xendev, 0, "error: page crossing\n");
+error_report("error: page crossing");
 goto err;
 }
 
@@ -232,7 +208,7 @@ static int ioreq_parse(struct ioreq *ioreq)
 ioreq->size += len;
 }
 if (ioreq->start + ioreq->size > blkdev->file_size) {
-xen_pv_printf(xendev, 0, "error: access beyond end of file\n");
+error_report("error: access beyond end of file");
 goto err;
 }
 return 0;
@@ -278,8 +254,7 @@ static int ioreq_grant_copy(struct ioreq *ioreq)
 rc = xen_be_copy_grant_refs(xendev, to_domain, segs, count);
 
 if (rc) {
-xen_pv_printf(xendev, 0,
-  "failed to copy data %d\n", rc);
+error_report("failed to copy data %d", rc);
 ioreq->aio_errors++;
 return -1;
 }
@@ -298,8 +273,9 @@ static void qemu_aio_complete(void *opaque, int ret)
 aio_context_acquire(blkdev->ctx);
 
 if (ret != 0) {
-xen_pv_printf(xendev, 0, "%s I/O error\n",
-  ioreq->req.operation ==

[Qemu-block] [PATCH v2 08/18] xen: duplicate xen_disk.c as basis of dataplane/xen-block.c

2018-12-06 Thread Paul Durrant

The new xen-block XenDevice implementation requires the same core
dataplane as the legacy xen_disk implementation it will eventually replace.
This patch therefore copies the legacy xen_disk.c source module into a new
dataplane/xen-block.c source module as the basis for the new dataplane and
adjusts the MAINTAINERS file accordingly.

NOTE: The duplicated code is not yet built. It is simply put into place by
  this patch (just fixing style violations) such that the
  modifications that will need to be made to the code are not
  conflated with code movement, thus making review harder.

Signed-off-by: Paul Durrant 
Acked-by: Anthony Perard 
---
Cc: Stefano Stabellini 
Cc: Stefan Hajnoczi 
Cc: Kevin Wolf 
Cc: Max Reitz 
---
 MAINTAINERS|1 +
 hw/block/dataplane/xen-block.c | 1019 
 2 files changed, 1020 insertions(+)
 create mode 100644 hw/block/dataplane/xen-block.c

diff --git a/MAINTAINERS b/MAINTAINERS
index dd728c3..ab62ad4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -404,6 +404,7 @@ F: hw/char/xen_console.c
 F: hw/display/xenfb.c
 F: hw/net/xen_nic.c
 F: hw/block/xen*
+F: hw/block/dataplane/xen*
 F: hw/xen/
 F: hw/xenpv/
 F: hw/i386/xen/
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
new file mode 100644
index 000..9fae505
--- /dev/null
+++ b/hw/block/dataplane/xen-block.c
@@ -0,0 +1,1019 @@
+/*
+ *  xen paravirt block device backend
+ *
+ *  (c) Gerd Hoffmann 
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see .
+ *
+ *  Contributions after 2012-01-13 are licensed under the terms of the
+ *  GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include 
+#include 
+
+#include "hw/hw.h"
+#include "hw/xen/xen_backend.h"
+#include "xen_blkif.h"
+#include "sysemu/blockdev.h"
+#include "sysemu/iothread.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "trace.h"
+
+/* - */
+
+#define BLOCK_SIZE  512
+#define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
+
+struct ioreq {
+blkif_request_t req;
+int16_t status;
+
+/* parsed request */
+off_t   start;
+QEMUIOVectorv;
+void*buf;
+size_t  size;
+int presync;
+
+/* aio status */
+int aio_inflight;
+int aio_errors;
+
+struct XenBlkDev*blkdev;
+QLIST_ENTRY(ioreq)   list;
+BlockAcctCookie acct;
+};
+
+#define MAX_RING_PAGE_ORDER 4
+
+struct XenBlkDev {
+struct XenLegacyDevicexendev;  /* must be first */
+char*params;
+char*mode;
+char*type;
+char*dev;
+char*devtype;
+booldirectiosafe;
+const char  *fileproto;
+const char  *filename;
+unsigned intring_ref[1 << MAX_RING_PAGE_ORDER];
+unsigned intnr_ring_ref;
+void*sring;
+int64_t file_blk;
+int64_t file_size;
+int protocol;
+blkif_back_rings_t  rings;
+int more_work;
+
+/* request lists */
+QLIST_HEAD(inflight_head, ioreq) inflight;
+QLIST_HEAD(finished_head, ioreq) finished;
+QLIST_HEAD(freelist_head, ioreq) freelist;
+int requests_total;
+int requests_inflight;
+int requests_finished;
+unsigned intmax_requests;
+
+gbooleanfeature_discard;
+
+/* qemu block driver */
+DriveInfo   *dinfo;
+BlockBackend*blk;
+QEMUBH  *bh;
+
+IOThread*iothread;
+AioContext  *ctx;
+};
+
+/* - */
+
+static void ioreq_reset(struct ioreq *ioreq)
+{
+memset(&ioreq->req, 0, sizeof(ioreq->req));
+ioreq->status = 0;
+ioreq->start = 0;
+ioreq->buf = NULL;
+ioreq->size = 0;
+ioreq->presync = 0;
+
+ioreq->aio_inflight = 0;
+ioreq->aio_errors = 0;
+
+ioreq->blkdev = NULL;
+memset(&ioreq->list, 0, sizeof(ioreq->list));
+memset(&ioreq->acct, 0, sizeof(ioreq->acct))

[Qemu-block] [PATCH v3 4/4] iotests: Add VMDK tests for blockdev-create

2018-12-06 Thread Kevin Wolf

Signed-off-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/237 | 201 
 tests/qemu-iotests/237.out | 309 +
 tests/qemu-iotests/group   |   1 +
 3 files changed, 511 insertions(+)
 create mode 100755 tests/qemu-iotests/237
 create mode 100644 tests/qemu-iotests/237.out

diff --git a/tests/qemu-iotests/237 b/tests/qemu-iotests/237
new file mode 100755
index 00..08e575bea2
--- /dev/null
+++ b/tests/qemu-iotests/237
@@ -0,0 +1,201 @@
+#!/usr/bin/env python
+#
+# Test vmdk and file image creation
+#
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# Creator/Owner: Kevin Wolf 
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import iotests
+from iotests import imgfmt
+
+iotests.verify_image_format(supported_fmts=['vmdk'])
+
+def blockdev_create(vm, options):
+result = vm.qmp_log('blockdev-create', job_id='job0', options=options)
+
+if 'return' in result:
+assert result['return'] == {}
+vm.run_job('job0')
+iotests.log("")
+
+with iotests.FilePath('t.vmdk') as disk_path, \
+ iotests.FilePath('t.vmdk.1') as extent1_path, \
+ iotests.FilePath('t.vmdk.2') as extent2_path, \
+ iotests.FilePath('t.vmdk.3') as extent3_path, \
+ iotests.VM() as vm:
+
+#
+# Successful image creation (defaults)
+#
+iotests.log("=== Successful image creation (defaults) ===")
+iotests.log("")
+
+size = 5 * 1024 * 1024 * 1024
+
+vm.launch()
+blockdev_create(vm, { 'driver': 'file',
+  'filename': disk_path,
+  'size': 0 })
+
+vm.qmp_log('blockdev-add', driver='file', filename=disk_path,
+   node_name='imgfile')
+
+blockdev_create(vm, { 'driver': imgfmt,
+  'file': 'imgfile',
+  'size': size })
+vm.shutdown()
+
+iotests.img_info_log(disk_path)
+
+#
+# Successful image creation (inline blockdev-add, explicit defaults)
+#
+iotests.log("=== Successful image creation (inline blockdev-add, explicit 
defaults) ===")
+iotests.log("")
+
+# Choose a different size to show that we got a new image
+size = 64 * 1024 * 1024
+
+vm.launch()
+blockdev_create(vm, { 'driver': 'file',
+  'filename': disk_path,
+  'size': 0 })
+
+blockdev_create(vm, { 'driver': imgfmt,
+  'file': {
+  'driver': 'file',
+  'filename': disk_path,
+  },
+  'size': size,
+  'extents': [],
+  'subformat': 'monolithicSparse',
+  'adapter-type': 'ide',
+  'hwversion': '4',
+  'zeroed-grain': False })
+vm.shutdown()
+
+iotests.img_info_log(disk_path)
+
+#
+# Successful image creation (non-default options)
+#
+iotests.log("=== Successful image creation (with non-default options) ===")
+iotests.log("")
+
+# Choose a different size to show that we got a new image
+size = 32 * 1024 * 1024
+
+vm.launch()
+blockdev_create(vm, { 'driver': 'file',
+  'filename': disk_path,
+  'size': 0 })
+
+blockdev_create(vm, { 'driver': imgfmt,
+  'file': {
+  'driver': 'file',
+  'filename': disk_path,
+  },
+  'size': size,
+  'extents': [],
+  'subformat': 'monolithicSparse',
+  'adapter-type': 'buslogic',
+  'zeroed-grain': True })
+vm.shutdown()
+
+iotests.img_info_log(disk_path)
+
+#
+# Invalid BlockdevRef
+#
+iotests.log("=== Invalid BlockdevRef ===")
+iotests.log("")
+
+vm.launch()
+blockdev_create(vm, { 'driver': imgfmt,
+  'file': "this doesn't exist",
+  'size': size })
+vm.shutdown()
+
+#
+# Other subformats
+#
+iotests.log("=== Other subformats ===")
+iotests.log("")
+
+for path in [ extent1_path, extent2_path, extent3_path ]:
+msg = iotests.qemu_img_pipe('create', '-

[Qemu-block] [PATCH v3 0/4] vmdk: Implement blockdev-create

2018-12-06 Thread Kevin Wolf

I picked up the patch series from Fam and rebased it to current master
(which involved a major rework on the test case) and tried to address
Markus' review comments for v2. I did not do any further review of the
actual code, but it passes the tests, so I guess having it in tree is
better than continuing to let it bitrot.

Fam Zheng (3):
  vmdk: Refactor vmdk_create_extent
  vmdk: Implement .bdrv_co_create callback
  iotests: Filter cid numbers in VMDK extent info

Kevin Wolf (1):
  iotests: Add VMDK tests for blockdev-create

 qapi/block-core.json |  70 +
 qapi/qapi-schema.json|   1 +
 block/vmdk.c | 519 ++-
 tests/qemu-iotests/237   | 201 
 tests/qemu-iotests/237.out   | 309 ++
 tests/qemu-iotests/common.filter |   1 +
 tests/qemu-iotests/group |   1 +
 tests/qemu-iotests/iotests.py|   1 +
 8 files changed, 951 insertions(+), 152 deletions(-)
 create mode 100755 tests/qemu-iotests/237
 create mode 100644 tests/qemu-iotests/237.out

-- 
2.19.2

Re: [Qemu-block] [PATCH 10/14] nbd/client: Split handshake into two functions

2018-12-06 Thread Vladimir Sementsov-Ogievskiy

01.12.2018 1:03, Eric Blake wrote:
> An upcoming patch will add the ability for qemu-nbd to list
> the services provided by an NBD server.  Share the common
> code of the TLS handshake by splitting the initial exchange
> into a separate function, leaving only the export handling
> in the original function.  Functionally, there should be no
> change in behavior in this patch, although some of the code
> motion may be difficult to follow due to indentation changes
> (view with 'git diff -w' for a smaller changeset).
> 
> Signed-off-by: Eric Blake 

Reviewed-by: Vladimir Sementsov-Ogievskiy 

> ---
>   nbd/client.c | 142 ++-
>   nbd/trace-events |   2 +-
>   2 files changed, 92 insertions(+), 52 deletions(-)
> 
> diff --git a/nbd/client.c b/nbd/client.c
> index 1ed5009642e..a282712724d 100644
> --- a/nbd/client.c
> +++ b/nbd/client.c
> @@ -768,21 +768,22 @@ static int nbd_negotiate_simple_meta_context(QIOChannel 
> *ioc,
>   return received || opt == NBD_OPT_LIST_META_CONTEXT;
>   }
> 
> -int nbd_receive_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
> -  const char *hostname, QIOChannel **outioc,
> -  NBDExportInfo *info, Error **errp)
> +/* Start the handshake to the server.  After a positive return, the server
> + * is ready to accept additional NBD_OPT requests.
> + * Returns: negative errno: failure talking to server
> + *  0: server is oldstyle, client must still parse export size
> + *  1: server is newstyle, but can only accept EXPORT_NAME
> + *  2: server is newstyle, but lacks structured replies
> + *  3: server is newstyle and set up for structured replies
> + */

hmm, may be, introduce enum of named constants, instead of raw numbers?

NBD_STARTED_OLD_STYLE
NBD_STARTED_NEW_STYLE
NBD_STARTED_NEW_STYLE_FIXED
NBD_STARTED_STRUCTURED_REPLIES

or, at least, add short comment after each return statement, to not scroll up
every time (like you gracefully do after each case: statement).

However, I'm okay with either way.


-- 
Best regards,
Vladimir

Re: [Qemu-block] [PATCH 15/18] xen: add a mechanism to automatically create XenDevice-s...

2018-12-06 Thread Anthony PERARD

On Thu, Dec 06, 2018 at 12:36:52PM +, Paul Durrant wrote:
> > -Original Message-
> > From: Anthony PERARD [mailto:anthony.per...@citrix.com]
> > Sent: 04 December 2018 15:35
> > 
> > On Wed, Nov 21, 2018 at 03:12:08PM +, Paul Durrant wrote:
> > > +xenbus->backend_watch =
> > > +xen_bus_add_watch(xenbus, "", /* domain root node */
> > > +  "backend", xen_bus_enumerate, xenbus,
> > &local_err);
> > > +if (local_err) {
> > > +error_propagate(errp, local_err);
> > > +error_prepend(errp, "failed to set up enumeration watch: ");
> > 
> > You should use error_propagate_prepend instead
> > error_propagate;error_prepend. And it looks like there is the same
> > mistake in other patches that I haven't notice.
> > 
> 
> Oh, I didn't know about that one either... I've only seen the separate calls 
> used elsewhere.

That information is all in "include/qapi/error.h", if you which to know
more on how to use Error.

> > Also you probably want goto fail here.
> > 
> 
> Not sure about that. Whilst the bus scan won't happen, it doesn't mean 
> devices can't be added via QMP.

In that case, don't modify errp, and use error_reportf_err instead, or
warn_reportf_err (then local_err = NULL, in case it is reused in a
future modification of the function).

Setting errp (with error_propagate) means that the function failed, and
QEMU is going to exit(1), because of qdev_init_nofail call in
xen_bus_init.

> > > +static void xen_device_backend_changed(void *opaque)
> > > +{
> > > +XenDevice *xendev = opaque;
> > > +const char *type = object_get_typename(OBJECT(xendev));
> > > +enum xenbus_state state;
> > > +unsigned int online;
> > > +
> > > +trace_xen_device_backend_changed(type, xendev->name);
> > > +
> > > +if (xen_device_backend_scanf(xendev, "state", "%u", &state) != 1) {
> > > +state = XenbusStateUnknown;
> > > +}
> > > +
> > > +xen_device_backend_set_state(xendev, state);
> > 
> > It's kind of weird to set the internal state base on the external one
> > that something else may have modified. Shouldn't we check that it is
> > fine for something else to modify the state and that it is a correct
> > transition?
> 
> The only thing (apart from this code) that's going to have perms to write the 
> backend state is the toolstack... which is, of course, be definition trusted.

"trusted" doesn't mean that there isn't a bug somewhere else :-). But I
guess it's good enough for now.

> > Also aren't we going in a loop by having QEMU set the state, then the
> > watch fires again? (Not really a loop since the function _set_state
> > check for changes.
> 
> No. It's de-bounced inside the set_state function.
> 
> > 
> > Also maybe we should watch for the state changes only when something
> > else like libxl creates (ask for) the backend, and ignore changes when
> > QEMU did it itself.
> 
> I don't think it's necessary to add that complexity.

Ok.

-- 
Anthony PERARD

Re: [Qemu-block] [PATCH 09/14] nbd/client: Refactor return of nbd_receive_negotiate()

2018-12-06 Thread Vladimir Sementsov-Ogievskiy

01.12.2018 1:03, Eric Blake wrote:
> The function could only ever return 0 or -EINVAL; make this
> clearer by dropping a useless 'fail:' label.
> 
> Signed-off-by: Eric Blake 

Reviewed-by: Vladimir Sementsov-Ogievskiy 

> ---
>   nbd/client.c | 51 +++
>   1 file changed, 23 insertions(+), 28 deletions(-)
> 
> diff --git a/nbd/client.c b/nbd/client.c
> index 27785c55d0a..1ed5009642e 100644
> --- a/nbd/client.c
> +++ b/nbd/client.c

[...]

>   trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
>   if (zeroes && nbd_drop(ioc, 124, errp) < 0) {
>   error_prepend(errp, "Failed to read reserved block: ");
> -goto fail;
> +return -EINVAL;
>   }
> -rc = 0;
> -

hmm, personally I like this empty line

> -fail:
> -return rc;
> +return 0;
>   }
> 
>   #ifdef __linux__
> 


-- 
Best regards,
Vladimir

Re: [Qemu-block] [PATCH 08/14] nbd/client: Refactor nbd_receive_list()

2018-12-06 Thread Vladimir Sementsov-Ogievskiy

01.12.2018 1:03, Eric Blake wrote:
> Add some parameters to make this function reusable in upcoming
> export listing, where we will want to capture the name and
> description rather than compare against a user-supplied name.
> No change in semantics to the existing caller.
> 
> Signed-off-by: Eric Blake 
> ---
>   nbd/client.c | 66 +---
>   1 file changed, 47 insertions(+), 19 deletions(-)
> 
> diff --git a/nbd/client.c b/nbd/client.c
> index 1dc8f83e19a..27785c55d0a 100644
> --- a/nbd/client.c
> +++ b/nbd/client.c
> @@ -232,18 +232,21 @@ static int nbd_handle_reply_err(QIOChannel *ioc, 
> NBDOptionReply *reply,
>   return result;
>   }
> 
> -/* Process another portion of the NBD_OPT_LIST reply.  Set *@match if
> - * the current reply matches @want or if the server does not support
> - * NBD_OPT_LIST, otherwise leave @match alone.  Return 0 if iteration
> - * is complete, positive if more replies are expected, or negative
> - * with @errp set if an unrecoverable error occurred. */
> +/* Process another portion of the NBD_OPT_LIST reply.  If @want, then
> + * set *@match if the current reply matches @want or if the server
> + * does not support NBD_OPT_LIST, otherwise leave @match alone.
> + * Otherwise, @nameout and @description are malloc'd to contain

what this "otherwise" referred to?

upd: aha, after rereading, I understood that it relates to the very first
If, and the whole thing became clear. Hmm, I'm okay with this now, but it
may be still worth some simplifying.

> + * NUL-terminated copies of the reply.

You didn't say anything about @match in "Otherwise" branch

   Return 0 if iteration is

however, "iterations is complete" may differ for @want/!@want cases

> + * complete, positive if more replies are expected, or negative with
> + * @errp set if an unrecoverable error occurred. */
>   static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
> -Error **errp)
> +char **nameout, char **description, Error **errp)
>   {
>   NBDOptionReply reply;
>   uint32_t len;
>   uint32_t namelen;
> -char name[NBD_MAX_NAME_SIZE + 1];
> +char array[NBD_MAX_NAME_SIZE + 1];
> +char *name = array;
>   int error;
> 
>   if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
> @@ -253,7 +256,12 @@ static int nbd_receive_list(QIOChannel *ioc, const char 
> *want, bool *match,
>   if (error <= 0) {
>   /* The server did not support NBD_OPT_LIST, so set *match on
>* the assumption that any name will be accepted.  */
> -*match = true;
> +if (want) {
> +*match = true;
> +} else if (!error) {
> +error_setg(errp, "Server does not support export lists");
> +error = -1;
> +}
>   return error;
>   }
>   len = reply.length;
> @@ -290,30 +298,49 @@ static int nbd_receive_list(QIOChannel *ioc, const char 
> *want, bool *match,
>   nbd_send_opt_abort(ioc);
>   return -1;
>   }
> -if (namelen != strlen(want)) {
> -if (nbd_drop(ioc, len, errp) < 0) {
> -error_prepend(errp,
> -  "failed to skip export name with wrong length: ");
> -nbd_send_opt_abort(ioc);
> -return -1;
> +if (want) {
> +if (namelen != strlen(want)) {
> +if (nbd_drop(ioc, len, errp) < 0) {
> +error_prepend(errp,
> +  "failed to skip export name with wrong length: 
> ");
> +nbd_send_opt_abort(ioc);
> +return -1;
> +}
> +return 1;
>   }
> -return 1;
> +assert(namelen < sizeof(array));
> +} else {
> +assert(nameout);

this assert looks a bit redundant, if nameout is 0, next line will abort not 
less clearly

> +*nameout = name = g_new(char, namelen + 1);

We should check namelen <= NBD_MAX_NAME_SIZE before it, I think.

>   }
> 
> -assert(namelen < sizeof(name));
>   if (nbd_read(ioc, name, namelen, errp) < 0) {
>   error_prepend(errp, "failed to read export name: ");
>   nbd_send_opt_abort(ioc);
> +if (!want) {
> +free(name);

g_free

> +}
>   return -1;
>   }
>   name[namelen] = '\0';
>   len -= namelen;
> -if (nbd_drop(ioc, len, errp) < 0) {
> +if (!want) {
> +assert(description);

NBD_MAX_NAME_SIZE

> +*description = g_new(char, len + 1);
> +if (nbd_read(ioc, *description, len, errp) < 0) {
> +error_prepend(errp, "failed to read export description: ");
> +nbd_send_opt_abort(ioc);
> +free(name);
> +free(*description);

g_free

> +return -1;
> +}
> +(*description)[len] = '\0';
> +} else if (nbd_drop(ioc, len, errp) < 0) {
>   error_prepend(errp,

[Qemu-block] [PATCH v3] qemu-img info lists bitmap directory entries

2018-12-06 Thread Andrey Shinkevich

The 'Format specific information' of qemu-img info command will show
the name, flags and granularity for every QCOW2 bitmap as follows:

image: /vz/vmprivate/VM1/harddisk.hdd
file format: qcow2
virtual size: 64G (68719476736 bytes)
disk size: 3.0M
cluster_size: 1048576
Format specific information:
compat: 1.1
lazy refcounts: true
bitmaps:
[0]:
flags:
[0]: in-use
[1]: auto
name: back-up1
unknown flags: 4
granularity: 65536
[1]:
flags:
[0]: in-use
[1]: auto
name: back-up2
unknown flags: 8
granularity: 65536
refcount bits: 16
corrupt: false

Signed-off-by: Andrey Shinkevich 
---
v3:
Now, qcow2_get_bitmap_info_list() is invoked under the condition of QCOW
version #3 to avoid memory leaks in case of QCOW version #2.
Furthermore, qcow2_get_bitmap_info_list() checks the number of existing bitmaps.
So, if no bitmap exists, no bitmap error message is printed in the output.
The data type of the bitmap 'granularity' parameter was left as 'uint32'
because bitmap_list_load() returns error if granularity_bits is grater than 31.

v2:
'[PATCH v2] qemu-img info lists bitmap directory entries'
The targeted version of the release at 'Since' word of the comments to the new
structures changed to 4.0 in the file qapi/block-core.json.
A comment to the 'bitmaps' new member was supplied.
The 'unknown flags' parameter was introduced to indicate presence of QCOW2
bitmap unknown flags, if any.
The word 'dirty' was removed from the code and from the comments as we list all
the bitmaps.
The 'bitmaps' printed parameter was removed for the release versions earlier
than 3.x.
The example of the output was moved above the 'Signed-off-by' line.

The first version was '[PATCH] qemu-img info lists bitmap directory entries'

 block/qcow2-bitmap.c | 56 
 block/qcow2.c|  8 
 block/qcow2.h|  2 ++
 qapi/block-core.json | 40 -
 4 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index accebef..00b33c8 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -1008,6 +1008,62 @@ fail:
 return false;
 }
 
+static Qcow2BitmapInfoFlagsList *get_bitmap_info_flags(uint32_t flags)
+{
+Qcow2BitmapInfoFlagsList *list = NULL;
+Qcow2BitmapInfoFlagsList **plist = &list;
+
+if (flags & BME_FLAG_IN_USE) {
+Qcow2BitmapInfoFlagsList *entry = g_new0(Qcow2BitmapInfoFlagsList, 1);
+entry->value = QCOW2_BITMAP_INFO_FLAGS_IN_USE;
+*plist = entry;
+plist = &entry->next;
+}
+if (flags & BME_FLAG_AUTO) {
+Qcow2BitmapInfoFlagsList *entry = g_new0(Qcow2BitmapInfoFlagsList, 1);
+entry->value = QCOW2_BITMAP_INFO_FLAGS_AUTO;
+*plist = entry;
+}
+return list;
+}
+
+Qcow2BitmapInfoList *qcow2_get_bitmap_info_list(BlockDriverState *bs,
+Error **errp)
+{
+BDRVQcow2State *s = bs->opaque;
+Qcow2BitmapList *bm_list;
+Qcow2Bitmap *bm;
+Qcow2BitmapInfoList *list = NULL;
+Qcow2BitmapInfoList **plist = &list;
+
+if (s->nb_bitmaps == 0) {
+return NULL;
+}
+
+bm_list = bitmap_list_load(bs, s->bitmap_directory_offset,
+   s->bitmap_directory_size, errp);
+if (bm_list == NULL) {
+return NULL;
+}
+
+QSIMPLEQ_FOREACH(bm, bm_list, entry) {
+Qcow2BitmapInfo *info = g_new0(Qcow2BitmapInfo, 1);
+Qcow2BitmapInfoList *obj = g_new0(Qcow2BitmapInfoList, 1);
+info->granularity = 1U << bm->granularity_bits;
+info->name = g_strdup(bm->name);
+info->flags = get_bitmap_info_flags(bm->flags);
+info->unknown_flags = bm->flags & ~(BME_FLAG_IN_USE | BME_FLAG_AUTO);
+info->has_unknown_flags = !!info->unknown_flags;
+obj->value = info;
+*plist = obj;
+plist = &obj->next;
+}
+
+bitmap_list_free(bm_list);
+
+return list;
+}
+
 int qcow2_reopen_bitmaps_rw_hint(BlockDriverState *bs, bool *header_updated,
  Error **errp)
 {
diff --git a/block/qcow2.c b/block/qcow2.c
index 991d6ac..a023856 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -4254,6 +4254,8 @@ static ImageInfoSpecific 
*qcow2_get_specific_info(BlockDriverState *bs)
 BDRVQcow2State *s = bs->opaque;
 ImageInfoSpecific *spec_info;
 QCryptoBlockInfo *encrypt_info = NULL;
+Error *local_err = NULL;
+Qcow2BitmapInfoList *bitmaps;
 
 if (s->crypto != NULL) {
 encrypt_info = qcrypto_block_get_info(s->crypto, &error_abort);
@@ -4270,6 +4272,10 @@ static ImageInfoSpecific 
*qcow2_get_specific_info(BlockDriverState *bs)
 .refcount_bits  = s->refcount_bits,
 };
 } else if (s->qcow_version == 3) {
+

Re: [Qemu-block] [PATCH 07/14] nbd/client: Refactor nbd_negotiate_simple_meta_context()

2018-12-06 Thread Vladimir Sementsov-Ogievskiy

01.12.2018 1:03, Eric Blake wrote:
> Change the signature to make it easier for a future patch to
> reuse this function for calling NBD_OPT_LIST_META_CONTEXT with
> 0 or 1 queries.  Also, always allocate space for the received
> name, even if it doesn't match expected lengths (no point
> trying to optimize the unlikely error case, and tracing the
> received rather than expected name can help debug a server
> implementation).
> 
> While there are now slightly different traces, and the error
> message for a server replying with too many contexts is
> different, there are no runtime-observable changes in behavior
> for the more common case of the lone caller interacting with a
> compliant server.
> 
> Signed-off-by: Eric Blake 
> ---
>   nbd/client.c | 105 +++
>   nbd/trace-events |   2 +-
>   2 files changed, 61 insertions(+), 46 deletions(-)
> 
> diff --git a/nbd/client.c b/nbd/client.c
> index b5818a99d21..1dc8f83e19a 100644
> --- a/nbd/client.c
> +++ b/nbd/client.c
> @@ -603,49 +603,57 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
>   }
> 
>   /* nbd_negotiate_simple_meta_context:
> - * Set one meta context. Simple means that reply must contain zero (not
> - * negotiated) or one (negotiated) contexts. More contexts would be 
> considered
> - * as a protocol error. It's also implied that meta-data query equals queried
> - * context name, so, if server replies with something different than 
> @context,
> - * it is considered an error too.
> - * return 1 for successful negotiation, context_id is set
> - *0 if operation is unsupported,
> + * List or set meta context data for export @info->name, based on @opt.

hm, just list or set meta context? What is "data" about?

> + * For list, leave @context NULL for 0 queries, or supplied for a single
> + * query; all replies are ignored, and the call merely traces server 
> behavior.
> + * For set, @context must result in at most one matching server reply, in
> + * which case @info->meta_base_allocation_id is set to the resulting id.

Hmm, looks too cheating. Then it should be renamed to
nbd_negotiate_base_allocation

and parameter @context should be renamed to @x_dirty_bitmap,

and if it is unset, we'll use "base:allocation" here.

but in this case, it still weird about opt=list case.. So, it should be named
like nbd_negotiation_helper, as it is doing several different things, which
I can't describe in one word.

> + * return 1 for successful negotiation,
> + *0 if operation is unsupported or context unavailable,
>*-1 with errp set for any other error

this return value description looks not very related to opt=list case

>*/
>   static int nbd_negotiate_simple_meta_context(QIOChannel *ioc,
> - const char *export,
> + int32_t opt,
>const char *context,
> - uint32_t *context_id,
> + NBDExportInfo *info,
>Error **errp)
>   {
>   int ret;
>   NBDOptionReply reply;
>   uint32_t received_id = 0;
>   bool received = false;
> -uint32_t export_len = strlen(export);
> -uint32_t context_len = strlen(context);
> -uint32_t data_len = sizeof(export_len) + export_len +
> -sizeof(uint32_t) + /* number of queries */
> -sizeof(context_len) + context_len;
> -char *data = g_malloc(data_len);
> -char *p = data;
> +uint32_t export_len = strlen(info->name);
> +uint32_t context_len;
> +uint32_t data_len = sizeof(export_len) + export_len + sizeof(uint32_t);

I'd prefer to leave the comment /* number of queries */

> +char *data;
> +char *p;
> 
> -trace_nbd_opt_meta_request(context, export);
> +if (!context) {
> +assert(opt == NBD_OPT_LIST_META_CONTEXT);
> +} else {
> +context_len = strlen(context);
> +data_len += sizeof(context_len) + context_len;
> +}
> +data = g_malloc(data_len);
> +p = data;
> +
> +trace_nbd_opt_meta_request(nbd_opt_lookup(opt), context ?: "(all)",
> +   info->name);
>   stl_be_p(p, export_len);
> -memcpy(p += sizeof(export_len), export, export_len);
> -stl_be_p(p += export_len, 1);
> -stl_be_p(p += sizeof(uint32_t), context_len);
> -memcpy(p += sizeof(context_len), context, context_len);
> +memcpy(p += sizeof(export_len), info->name, export_len);
> +stl_be_p(p += export_len, !!context);
> +if (context) {
> +stl_be_p(p += sizeof(uint32_t), context_len);
> +memcpy(p += sizeof(context_len), context, context_len);
> +}
> 
> -ret = nbd_send_option_request(ioc, NBD_OPT_SET_META_CONTEXT, data_len, 
> data,
> -  errp);
> +ret = nbd_send_option_requ

Re: [Qemu-block] [PATCH 16/18] xen: automatically create XenQdiskDevice-s

2018-12-06 Thread Paul Durrant

> -Original Message-
> From: Anthony PERARD [mailto:anthony.per...@citrix.com]
> Sent: 04 December 2018 16:41
> To: Paul Durrant 
> Cc: qemu-block@nongnu.org; qemu-de...@nongnu.org; xen-
> de...@lists.xenproject.org; Kevin Wolf ; Max Reitz
> ; Stefano Stabellini 
> Subject: Re: [PATCH 16/18] xen: automatically create XenQdiskDevice-s
> 
> On Wed, Nov 21, 2018 at 03:12:09PM +, Paul Durrant wrote:
> > This patch adds a creator function for XenQdiskDevice-s so that they can
> > be created automatically when the Xen toolstack instantiates a new
> > PV backend. When the XenQdiskDevice is created this way it is also
> > necessary to create a drive which matches the configuration that the Xen
> > toolstack has written into xenstore. This drive is marked 'auto_del' so
> > that it will be removed when the XenQdiskDevice is destroyed. Also, for
> > compatibilitye with the legacy 'xen_disk' implementation, an iothread
> > is automatically created for the new XenQdiskDevice. This will also be
> > removed when he XenQdiskDevice is destroyed.
> 
> "the XenQdiskDevice"
> 
> [...]
> > +qemu_opt_set(drive_opts, "file.locking", "off", &local_err);
> 
> That looks new, compared to the xen_disk.c implementation. Maybe that
> should be mention in the commit message.
> 

It's necessary to avoid problems when an emulated device is also present. The 
xen_disk code avoided the issue by basically bypassing the checks and stooging 
into the middle of the block code. I'll add a comment to the code saying why 
locking needs to be off.

> 
> [..]
> 
> > +static void xen_qdisk_device_create(BusState *bus, const char *name,
> > +QDict *opts, Error **errp)
> > +{
> [...]
> > +iothread = iothread_create(vdev, &error_abort);
> 
> I would just propagate the error, since iothread could fail for external
> reason. No need to crash qemu while a VM is running.

True.

> 
> > +
> > +dev = qdev_create(bus, TYPE_XEN_QDISK_DEVICE);
> > +
> > +qdev_prop_set_string(dev, "vdev", vdev);
> > +
> > +if (XEN_QDISK_DEVICE(dev)->vdev.number != number) {
> > +error_setg(errp, "invalid dev parameter '%s'", vdev);
> > +goto unref;
> > +}
> > +
> > +qdev_prop_set_drive(dev, "drive", blk, &local_err);
> > +if (local_err) {
> > +error_propagate(errp, local_err);
> > +error_prepend(errp, "failed to set 'drive': ");
> > +goto unref;
> > +}
> > +
> > +XEN_QDISK_DEVICE(dev)->auto_iothread = iothread;
> > +
> > +qdev_init_nofail(dev);
> 
> That function shouldn't be use during hotplug. But I'm not sure what
> should be done instead, probably object_property_set_bool(..., true
> "realized") and check for error.

Ok, I'll do that.

  Paul

> 
> 
> Thanks,
> 
> --
> Anthony PERARD

Re: [Qemu-block] [PATCH 14/18] xen: add implementations of xen-qdisk connect and disconnect functions...

2018-12-06 Thread Paul Durrant

> -Original Message-
> From: Anthony PERARD [mailto:anthony.per...@citrix.com]
> Sent: 04 December 2018 12:34
> To: Paul Durrant 
> Cc: qemu-block@nongnu.org; qemu-de...@nongnu.org; xen-
> de...@lists.xenproject.org; Stefano Stabellini ;
> Kevin Wolf ; Max Reitz 
> Subject: Re: [PATCH 14/18] xen: add implementations of xen-qdisk connect
> and disconnect functions...
> 
> On Wed, Nov 21, 2018 at 03:12:07PM +, Paul Durrant wrote:
> > diff --git a/hw/block/xen-qdisk.c b/hw/block/xen-qdisk.c
> > index 35f7b70480..8c88393832 100644
> > --- a/hw/block/xen-qdisk.c
> > +++ b/hw/block/xen-qdisk.c
> >  static void xen_qdisk_connect(XenQdiskDevice *qdiskdev, Error **errp)
> >  {
> >  XenQdiskVdev *vdev = &qdiskdev->vdev;
> > +XenDevice *xendev = XEN_DEVICE(qdiskdev);
> > +unsigned int order, nr_ring_ref, *ring_ref, event_channel,
> protocol;
> > +char *str;
> >
> >  trace_xen_qdisk_connect(vdev->disk, vdev->partition);
> > +
> > +if (xen_device_frontend_scanf(xendev, "ring-page-order", "%u",
> > +  &order) != 1) {
> > +nr_ring_ref = 1;
> > +ring_ref = g_new(unsigned int, nr_ring_ref);
> > +
> > +if (xen_device_frontend_scanf(xendev, "ring-ref", "%u",
> > +  &ring_ref[0]) != 1) {
> > +error_setg(errp, "failed to read ring-ref");
> 
> Don't you need to free `ring_ref`?

Yes.

> 
> > +return;
> > +}
> [...]
> 
> > diff --git a/include/hw/xen/xen-qdisk.h b/include/hw/xen/xen-qdisk.h
> > index ade0866037..d7dd2bf0ee 100644
> > --- a/include/hw/xen/xen-qdisk.h
> > +++ b/include/hw/xen/xen-qdisk.h
> > @@ -6,7 +6,15 @@
> >  #ifndef HW_XEN_QDISK_H
> >  #define HW_XEN_QDISK_H
> >
> > +#include "hw/xen/xen.h"
> >  #include "hw/xen/xen-bus.h"
> > +#include "hw/block/block.h"
> > +#include "hw/block/xen_blkif.h"
> > +#include "hw/block/dataplane/xen-qdisk.h"
> > +#include "sysemu/blockdev.h"
> > +#include "sysemu/iothread.h"
> > +#include "sysemu/block-backend.h"
> > +#include "sysemu/iothread.h"
> 
> You don't need that many includes, especially not iothread.h twice ;-).
> 

Oops.

> I think those new includes would be enough:
> #include "hw/block/block.h"; for BlockConf
> #include "sysemu/iothread.h"
> #include "hw/block/dataplane/xen-qdisk.h"
> 

Yes, those seem to be enough.

  Paul

> >
> >  typedef enum XenQdiskVdevType {
> >  XEN_QDISK_VDEV_TYPE_DP,
> > @@ -33,6 +41,10 @@ typedef struct XenQdiskDevice XenQdiskDevice;
> >  struct XenQdiskDevice {
> >  XenDevice xendev;
> >  XenQdiskVdev vdev;
> > +BlockConf conf;
> > +unsigned int max_ring_page_order;
> > +IOThread *iothread;
> > +XenQdiskDataPlane *dataplane;
> >  };
> >
> >  #endif /* HW_XEN_QDISK_H */
> 
> --
> Anthony PERARD

Re: [Qemu-block] [PATCH 15/18] xen: add a mechanism to automatically create XenDevice-s...

2018-12-06 Thread Paul Durrant

> -Original Message-
> From: Anthony PERARD [mailto:anthony.per...@citrix.com]
> Sent: 04 December 2018 15:35
> To: Paul Durrant 
> Cc: qemu-block@nongnu.org; qemu-de...@nongnu.org; xen-
> de...@lists.xenproject.org; Stefano Stabellini 
> Subject: Re: [PATCH 15/18] xen: add a mechanism to automatically create
> XenDevice-s...
> 
> On Wed, Nov 21, 2018 at 03:12:08PM +, Paul Durrant wrote:
> > +xen_backend_device_create(BUS(xenbus), type, name, opts,
> &local_err);
> > +qobject_unref(opts);
> > +
> > +if (local_err) {
> > +const char *msg = error_get_pretty(local_err);
> > +
> > +error_report("failed to create '%s' device '%s': %s", type,
> name,
> > + msg);
> > +error_free(local_err);
> 
> You can use error_reportf_err() instead of those three calls. I may have
> only suggest error_report_err in a previous patch, but error_reportf_err
> does the error_prepend as well.
> 

Ah. I'll go back over the patches and use that where necessary.

> > +}
> > +}
> > +
> > +static void xen_bus_type_enumerate(XenBus *xenbus, const char *type)
> > +{
> > +char *domain_path = g_strdup_printf("backend/%s/%u", type,
> xen_domid);
> > +char **backend;
> > +unsigned int i, n;
> > +
> > +trace_xen_bus_type_enumerate(type);
> > +
> > +backend = xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n);
> > +if (!backend) {
> 
> domain_path isn't free here, you probably want a `goto out` which would
> free everything.

Ok.

> 
> > +return;
> > +}
> > +
> > @@ -193,6 +302,17 @@ static void xen_bus_realize(BusState *bus, Error
> **errp)
> >  notifier_list_init(&xenbus->watch_notifiers);
> >  qemu_set_fd_handler(xs_fileno(xenbus->xsh), xen_bus_watch, NULL,
> >  xenbus);
> > +
> > +module_call_init(MODULE_INIT_XEN_BACKEND);
> > +
> > +xenbus->backend_watch =
> > +xen_bus_add_watch(xenbus, "", /* domain root node */
> > +  "backend", xen_bus_enumerate, xenbus,
> &local_err);
> > +if (local_err) {
> > +error_propagate(errp, local_err);
> > +error_prepend(errp, "failed to set up enumeration watch: ");
> 
> You should use error_propagate_prepend instead
> error_propagate;error_prepend. And it looks like there is the same
> mistake in other patches that I haven't notice.
> 

Oh, I didn't know about that one either... I've only seen the separate calls 
used elsewhere.

> Also you probably want goto fail here.
> 

Not sure about that. Whilst the bus scan won't happen, it doesn't mean devices 
can't be added via QMP.

> 
> > +static void xen_device_backend_changed(void *opaque)
> > +{
> > +XenDevice *xendev = opaque;
> > +const char *type = object_get_typename(OBJECT(xendev));
> > +enum xenbus_state state;
> > +unsigned int online;
> > +
> > +trace_xen_device_backend_changed(type, xendev->name);
> > +
> > +if (xen_device_backend_scanf(xendev, "state", "%u", &state) != 1) {
> > +state = XenbusStateUnknown;
> > +}
> > +
> > +xen_device_backend_set_state(xendev, state);
> 
> It's kind of weird to set the internal state base on the external one
> that something else may have modified. Shouldn't we check that it is
> fine for something else to modify the state and that it is a correct
> transition?

The only thing (apart from this code) that's going to have perms to write the 
backend state is the toolstack... which is, of course, be definition trusted.

> 
> Also aren't we going in a loop by having QEMU set the state, then the
> watch fires again? (Not really a loop since the function _set_state
> check for changes.

No. It's de-bounced inside the set_state function.

> 
> Also maybe we should watch for the state changes only when something
> else like libxl creates (ask for) the backend, and ignore changes when
> QEMU did it itself.

I don't think it's necessary to add that complexity.

> 
> > +
> > +if (xen_device_backend_scanf(xendev, "online", "%u", &online) != 1)
> {
> > +online = 0;
> > +}
> > +
> > +xen_device_backend_set_online(xendev, !!online);
> > +
> > +/*
> > + * If a backend is still 'online' then its state should be cycled
> > + * back round to InitWait in order for a new frontend instance to
> > + * connect. This may happen when, for example, a frontend driver is
> > + * re-installed or updated.
> > + * If a backend id not 'online' then the device should be
> destroyed.
> 
> s/id/is/

Ok.

> 
> > + */
> > +if (xendev->backend_online &&
> > +xendev->backend_state == XenbusStateClosed) {
> > +xen_device_backend_set_state(xendev, XenbusStateInitWait);
> > +} else if (!xendev->backend_online &&
> > +   (xendev->backend_state == XenbusStateClosed ||
> > +xendev->backend_state == XenbusStateInitialising ||
> > +xendev->backend_state == XenbusStateInitWait ||
> > +xendev->backe

Re: [Qemu-block] [qemu-s390x] [Qemu-devel] [PULL 2/2] iotests: simple mirror test with kvm on 1G image

2018-12-06 Thread Christian Borntraeger




On 05.12.2018 17:09, Vladimir Sementsov-Ogievskiy wrote:
> 05.12.2018 18:52, Christian Borntraeger wrote:
>>
>>
>> On 05.12.2018 14:39, Vladimir Sementsov-Ogievskiy wrote:
>>> 05.12.2018 15:35, Christian Borntraeger wrote:


 On 05.12.2018 13:00, Vladimir Sementsov-Ogievskiy wrote:
> 05.12.2018 12:01, Christian Borntraeger wrote:
>>
>>
>> On 05.12.2018 09:46, Kevin Wolf wrote:
>>> Am 05.12.2018 um 09:23 hat Christian Borntraeger geschrieben:
>>> +# prepare source image
>>> +qemu_img_create('-f', iotests.imgfmt, '-o', 
>>> 'preallocation=metadata', disk,
>>> +str(size))
>>> +
>>> +vm = QEMUMachine(iotests.qemu_prog)
>>> +vm.add_args('-machine', 'pc,accel=kvm')

 This (pc) clearly does not work on other architectures.
 In addition to that, I also need to add -no-shutdown on s390 (see 068 
 for a similar case)
>>>
>>> Leaving out pc definitely makes sense, and the bug still reproduces for
>>> me without it.
>>>
>>> I don't understand the -no-shutdown, though. Already for 068, neither
>>> the code nor the commit message when it was added explain why this is
>>> needed.
>>>
>>> Can you turn this into a proper patch and add a comment why -no-shutdown
>>> is needed?
>>
>> I already sent this patch. The reason is that there is no BIOS in a 
>> classical sense
>> on s390x. If no bootable image (external kernel or from disk) is found, 
>> the small boot
>> bios loads a disabled wait PSW. The default action for that is then 
>> shutdown.
>>
>
> Is it an option for you just drop the whole line "vm.add_args('-machine', 
> 'pc,accel=kvm')"?
> The problem without it for me was that gdb failed to produce full and 
> nice backtrace, but
> test worked anyway

 In the commid message Vladimir said that kvm is necessary to trigger the 
 problem.

>>>
>>> No, I didn't)
>>>
>>> and it's in the comment:
>>> # 3. drop kvm and use iotests.VM() (maybe, because of qtest) (however, it 
>>> still
>>> #reproduces, if just drop kvm, but gdb failed to produce full backtraces
>>> #for me)
>>
>> Ok, so I would be fine with completely dropping that line.
>>
>> the patch would then be
>>
>>
>>
>> "-machine pc" will not work all architectures. Lets fall back to the
>> default machine by not specifying anything for the machine.
>>
>> In addition we also need to specify -no-shutdown on s390 as qemu will
>> exit on guest shutdown. This happens when there is no kernel or bootable
>> disk on s390.
>>
>> Signed-off-by: Christian Borntraeger 
>> ---
>>   tests/qemu-iotests/235 | 3 ++-
>>   1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/tests/qemu-iotests/235 b/tests/qemu-iotests/235
>> index da044ed34e..329da8f0c2 100755
>> --- a/tests/qemu-iotests/235
>> +++ b/tests/qemu-iotests/235
>> @@ -49,7 +49,8 @@ qemu_img_create('-f', iotests.imgfmt, '-o', 
>> 'preallocation=metadata', disk,
>>   str(size))
>>   
>>   vm = QEMUMachine(iotests.qemu_prog)
>> -vm.add_args('-machine', 'pc,accel=kvm')
>> +if iotests.qemu_default_machine == 's390-ccw-virtio':
>> +vm.add_args('-no-shutdown')
>>   vm.add_args('-drive', 'id=src,file=' + disk)
>>   vm.launch()
>>   
>>
>>
>> Shall I resend a v2?
>>
> 
> so, we need -no-shutdown even if we drop kvm? I hoped that not.. Hmm. grep 
> points only to one iotest doing the same about no-shutdown - 068..

Kevin, shall I send the above patch as v2?

Re: [Qemu-block] [PATCH v2 5/5] crypto: support multiple threads accessing one QCryptoBlock

2018-12-06 Thread Daniel P . Berrangé

On Wed, Dec 05, 2018 at 05:47:00PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> The two thing that should be handled are cipher and ivgen. For ivgen
> the solution is just mutex, as iv calculations should not be long in
> comparison with encryption/decryption. And for cipher let's just keep
> per-thread ciphers.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  crypto/blockpriv.h|  16 -
>  include/crypto/block.h|   3 +
>  block/crypto.c|   1 +
>  block/qcow.c  |   2 +-
>  block/qcow2.c |   4 +-
>  crypto/block-luks.c   |  25 +++
>  crypto/block-qcow.c   |  20 +++---
>  crypto/block.c| 146 +-
>  tests/test-crypto-block.c |   3 +
>  9 files changed, 176 insertions(+), 44 deletions(-)
> 
> diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h
> index e9fe3e5687..86dae49210 100644
> --- a/crypto/blockpriv.h
> +++ b/crypto/blockpriv.h
> @@ -22,6 +22,7 @@
>  #define QCRYPTO_BLOCKPRIV_H
>  
>  #include "crypto/block.h"
> +#include "qemu/thread.h"
>  
>  typedef struct QCryptoBlockDriver QCryptoBlockDriver;
>  
> @@ -31,8 +32,12 @@ struct QCryptoBlock {
>  const QCryptoBlockDriver *driver;
>  void *opaque;
>  
> -QCryptoCipher *cipher;
> +QCryptoCipher **ciphers;
> +int n_ciphers;
> +int n_free_ciphers;

size_t for both of these since they're effectively array indexes.

>  QCryptoIVGen *ivgen;
> +QemuMutex mutex;
> +
>  QCryptoHashAlgorithm kdfhash;
>  size_t niv;
>  uint64_t payload_offset; /* In bytes */
> @@ -46,6 +51,7 @@ struct QCryptoBlockDriver {
>  QCryptoBlockReadFunc readfunc,
>  void *opaque,
>  unsigned int flags,
> +int n_threads,

unsigned int, and more below which I won't repeat...


> diff --git a/include/crypto/block.h b/include/crypto/block.h
> index cd18f46d56..866a89b06a 100644
> --- a/include/crypto/block.h
> +++ b/include/crypto/block.h
> @@ -75,6 +75,8 @@ typedef enum {
>   * @readfunc: callback for reading data from the volume
>   * @opaque: data to pass to @readfunc
>   * @flags: bitmask of QCryptoBlockOpenFlags values
> + * @n_threads: prepare block to multi tasking with up to
> + * @n_threads threads

The description is a little awkward, Lets say

  @n_threads: allow concurrent I/O from up to @n_threads threads


> diff --git a/crypto/block-luks.c b/crypto/block-luks.c
> index bbffd80fff..41fb0aa2e7 100644
> --- a/crypto/block-luks.c
> +++ b/crypto/block-luks.c
> @@ -863,7 +863,7 @@ qcrypto_block_luks_open(QCryptoBlock *block,
>  
>   fail:
>  g_free(masterkey);
> -qcrypto_cipher_free(block->cipher);
> +qcrypto_block_free_cipher(block);
>  qcrypto_ivgen_free(block->ivgen);
>  g_free(luks);
>  g_free(password);
> @@ -888,6 +888,7 @@ qcrypto_block_luks_create(QCryptoBlock *block,
>void *opaque,
>Error **errp)
>  {
> +int ret;
>  QCryptoBlockLUKS *luks;
>  QCryptoBlockCreateOptionsLUKS luks_opts;
>  Error *local_err = NULL;
> @@ -1030,11 +1031,11 @@ qcrypto_block_luks_create(QCryptoBlock *block,
>  
>  
>  /* Setup the block device payload encryption objects */
> -block->cipher = qcrypto_cipher_new(luks_opts.cipher_alg,
> -   luks_opts.cipher_mode,
> -   masterkey, luks->header.key_bytes,
> -   errp);
> -if (!block->cipher) {
> +ret = qcrypto_block_init_cipher(block, luks_opts.cipher_alg,
> +luks_opts.cipher_mode,
> +masterkey, luks->header.key_bytes,
> +1, errp);
> +if (ret < 0) {

No need for ret, since we're only using it for a failure check. Just
put the method call in the if ().

> diff --git a/crypto/block.c b/crypto/block.c
> index 3edd9ec251..43426268e6 100644
> --- a/crypto/block.c
> +++ b/crypto/block.c
> @@ -52,10 +52,13 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions 
> *options,
>   QCryptoBlockReadFunc readfunc,
>   void *opaque,
>   unsigned int flags,
> + int n_threads,
>   Error **errp)
>  {
>  QCryptoBlock *block = g_new0(QCryptoBlock, 1);
>  
> +qemu_mutex_init(&block->mutex);

We need a corresponding qemu_mutex_destroy() when free'ing the QCryptoBlock.

> @@ -148,12 +154,94 @@ int qcrypto_block_encrypt(QCryptoBlock *block,
>  
>  QCryptoCipher *qcrypto_block_get_cipher(QCryptoBlock *block)
>  {
> -return block->cipher;
> +/* ivgen should be accessed under mutex. However, this function is used 
> only
> + * in test with one thread, so it's enough to assert it here:
> + */

Comment is wrong since this is about ciphers,

Re: [Qemu-block] [PATCH v2 3/5] crypto/block: rename qcrypto_block_*crypt_helper

2018-12-06 Thread Daniel P . Berrangé

On Wed, Dec 05, 2018 at 05:46:58PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> Rename qcrypto_block_*crypt_helper to qcrypto_cipher_*crypt_helper, as
> it's not about QCryptoBlock. This is needed to introduce
> qcrypto_block_*crypt_helper in the next commit, which will have
> QCryptoBlock pointer and than will be able to use additional fields of
> it, which in turn will be used to implement thread-safe QCryptoBlock
> operations.

The naming convention I'm following is that methods defined in a file
should all use the name of the file as their prefix. Hence everything
in crypto/block.* should be named with a qcrypto_block_ prefix

> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  crypto/blockpriv.h  | 34 +-
>  crypto/block-luks.c | 44 +-
>  crypto/block-qcow.c | 16 ++---
>  crypto/block.c  | 58 ++---
>  4 files changed, 76 insertions(+), 76 deletions(-)
> 
> diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h
> index 41840abcec..347a7c010a 100644
> --- a/crypto/blockpriv.h
> +++ b/crypto/blockpriv.h
> @@ -78,22 +78,22 @@ struct QCryptoBlockDriver {
>  };
>  
>  
> -int qcrypto_block_decrypt_helper(QCryptoCipher *cipher,
> - size_t niv,
> - QCryptoIVGen *ivgen,
> - int sectorsize,
> - uint64_t offset,
> - uint8_t *buf,
> - size_t len,
> - Error **errp);
> -
> -int qcrypto_block_encrypt_helper(QCryptoCipher *cipher,
> - size_t niv,
> - QCryptoIVGen *ivgen,
> - int sectorsize,
> - uint64_t offset,
> - uint8_t *buf,
> - size_t len,
> - Error **errp);
> +int qcrypto_cipher_decrypt_helper(QCryptoCipher *cipher,
> +  size_t niv,
> +  QCryptoIVGen *ivgen,
> +  int sectorsize,
> +  uint64_t offset,
> +  uint8_t *buf,
> +  size_t len,
> +  Error **errp);
> +
> +int qcrypto_cipher_encrypt_helper(QCryptoCipher *cipher,
> +  size_t niv,
> +  QCryptoIVGen *ivgen,
> +  int sectorsize,
> +  uint64_t offset,
> +  uint8_t *buf,
> +  size_t len,
> +  Error **errp);

Rather than dropping 'block' from the method name, just add
'cipher'. eg

  qcrypto_block_cipher_decrypt_helper
  qcrypto_block_cipher_encrypt_helper


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [Qemu-block] [PATCH v2 4/5] crypto/block: introduce qcrypto_block_*crypt_helper functions

2018-12-06 Thread Daniel P . Berrangé

On Wed, Dec 05, 2018 at 05:46:59PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> Introduce QCryptoBlock-based functions and use them where possible.
> This is needed to implement thread-safe encrypt/decrypt operations.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  crypto/blockpriv.h  | 14 ++
>  crypto/block-luks.c | 14 ++
>  crypto/block-qcow.c | 14 ++
>  crypto/block.c  | 26 ++
>  4 files changed, 52 insertions(+), 16 deletions(-)

Reviewed-by: Daniel P. Berrangé 


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [Qemu-block] [PATCH v2 1/5] crypto/block-luks: fix memory leak in qcrypto_block_luks_create

2018-12-06 Thread Daniel P . Berrangé

On Wed, Dec 05, 2018 at 05:46:56PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> Free block->cipher and block->ivgen on error path.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  crypto/block-luks.c | 3 +++
>  1 file changed, 3 insertions(+)

Reviewed-by: Daniel P. Berrangé 


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [Qemu-block] [PATCH v2 2/5] crypto/block: refactor qcrypto_block_*crypt_helper functions

2018-12-06 Thread Daniel P . Berrangé

On Wed, Dec 05, 2018 at 05:46:57PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> qcrypto_block_encrypt_helper and qcrypto_block_decrypt_helper are
> almost identical, let's reduce code duplication and simplify further
> improvements.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  crypto/block.c | 81 +++---
>  1 file changed, 31 insertions(+), 50 deletions(-)
> 
> diff --git a/crypto/block.c b/crypto/block.c
> index e59d1140fe..f4101f0841 100644
> --- a/crypto/block.c
> +++ b/crypto/block.c
> @@ -190,14 +190,21 @@ void qcrypto_block_free(QCryptoBlock *block)
>  }
>  
>  
> -int qcrypto_block_decrypt_helper(QCryptoCipher *cipher,
> - size_t niv,
> - QCryptoIVGen *ivgen,
> - int sectorsize,
> - uint64_t offset,
> - uint8_t *buf,
> - size_t len,
> - Error **errp)
> +typedef int (*QCryptoCipherEncryptFunc)(QCryptoCipher *cipher,
> +const void *in,
> +void *out,
> +size_t len,
> +Error **errp);
> +
> +static int do_qcrypto_block_encrypt(QCryptoCipher *cipher,

Can we call this functuon 'encdec', since it is misleading to call
it just 'encrypt' when its used for decrypt too.

> +size_t niv,
> +QCryptoIVGen *ivgen,
> +int sectorsize,
> +uint64_t offset,
> +uint8_t *buf,
> +size_t len,
> +QCryptoCipherEncryptFunc func,

And call this 'EncDecFunc' too

> +Error **errp)
>  {
>  uint8_t *iv;
>  int ret = -1;
> @@ -226,8 +233,7 @@ int qcrypto_block_decrypt_helper(QCryptoCipher *cipher,
>  }
>  
>  nbytes = len > sectorsize ? sectorsize : len;
> -if (qcrypto_cipher_decrypt(cipher, buf, buf,
> -   nbytes, errp) < 0) {
> +if (func(cipher, buf, buf, nbytes, errp) < 0) {
>  goto cleanup;
>  }
>  
> @@ -243,7 +249,7 @@ int qcrypto_block_decrypt_helper(QCryptoCipher *cipher,
>  }
>  
>  
> -int qcrypto_block_encrypt_helper(QCryptoCipher *cipher,
> +int qcrypto_block_decrypt_helper(QCryptoCipher *cipher,
>   size_t niv,
>   QCryptoIVGen *ivgen,
>   int sectorsize,
> @@ -252,45 +258,20 @@ int qcrypto_block_encrypt_helper(QCryptoCipher *cipher,
>   size_t len,
>   Error **errp)
>  {
> -uint8_t *iv;
> -int ret = -1;
> -uint64_t startsector = offset / sectorsize;
> -
> -assert(QEMU_IS_ALIGNED(offset, sectorsize));
> -assert(QEMU_IS_ALIGNED(len, sectorsize));
> -
> -iv = niv ? g_new0(uint8_t, niv) : NULL;
> -
> -while (len > 0) {
> -size_t nbytes;
> -if (niv) {
> -if (qcrypto_ivgen_calculate(ivgen,
> -startsector,
> -iv, niv,
> -errp) < 0) {
> -goto cleanup;
> -}
> -
> -if (qcrypto_cipher_setiv(cipher,
> - iv, niv,
> - errp) < 0) {
> -goto cleanup;
> -}
> -}
> -
> -nbytes = len > sectorsize ? sectorsize : len;
> -if (qcrypto_cipher_encrypt(cipher, buf, buf,
> -   nbytes, errp) < 0) {
> -goto cleanup;
> -}
> +return do_qcrypto_block_encrypt(cipher, niv, ivgen, sectorsize, offset,
> +buf, len, qcrypto_cipher_decrypt, errp);
> +}
>  
> -startsector++;
> -buf += nbytes;
> -len -= nbytes;
> -}
>  
> -ret = 0;
> - cleanup:
> -g_free(iv);
> -return ret;
> +int qcrypto_block_encrypt_helper(QCryptoCipher *cipher,
> + size_t niv,
> + QCryptoIVGen *ivgen,
> + int sectorsize,
> + uint64_t offset,
> + uint8_t *buf,
> + size_t len,
> + Error **errp)
> +{
> +return do_qcrypto_block_encrypt(cipher, niv, ivgen, sectorsize, offset,
> +buf, len, qcrypto_cipher_encrypt, errp);
>  }
> -- 
> 2.18.0
> 

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.fl

[Qemu-block] [QEMU-devel][PATCH v2] aio-posix: Fix concurrent aio_poll/set_fd_handler.

2018-12-06 Thread remy . noel

From: Remy Noel 

It is possible for an io_poll callback to be concurrently executed along
with an aio_set_fd_handlers. This can cause all sorts of problems, like
a NULL callback or a bad opaque pointer.

This changes set_fd_handlers so that it no longer modify existing handlers
entries and instead, always insert those after having proper initialisation.

Also, we do not call aio_epoll_update for deleted handlers as this has
no impact whatsoever.

Signed-off-by: Remy Noel 
---
 util/aio-posix.c | 85 
 util/aio-win32.c | 54 ++
 2 files changed, 74 insertions(+), 65 deletions(-)

diff --git a/util/aio-posix.c b/util/aio-posix.c
index 51c41ed3c9..b34d97292a 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -200,6 +200,31 @@ static AioHandler *find_aio_handler(AioContext *ctx, int 
fd)
 return NULL;
 }
 
+static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
+{
+/* If the GSource is in the process of being destroyed then
+ * g_source_remove_poll() causes an assertion failure.  Skip
+ * removal in that case, because glib cleans up its state during
+ * destruction anyway.
+ */
+if (!g_source_is_destroyed(&ctx->source)) {
+g_source_remove_poll(&ctx->source, &node->pfd);
+}
+
+/* If a read is in progress, just mark the node as deleted */
+if (qemu_lockcnt_count(&ctx->list_lock)) {
+node->deleted = 1;
+node->pfd.revents = 0;
+return false;
+}
+/* Otherwise, delete it for real.  We can't just mark it as
+ * deleted because deleted nodes are only cleaned up while
+ * no one is walking the handlers list.
+ */
+QLIST_REMOVE(node, node);
+return true;
+}
+
 void aio_set_fd_handler(AioContext *ctx,
 int fd,
 bool is_external,
@@ -209,6 +234,7 @@ void aio_set_fd_handler(AioContext *ctx,
 void *opaque)
 {
 AioHandler *node;
+AioHandler *new_node = NULL;
 bool is_new = false;
 bool deleted = false;
 int poll_disable_change;
@@ -223,50 +249,35 @@ void aio_set_fd_handler(AioContext *ctx,
 qemu_lockcnt_unlock(&ctx->list_lock);
 return;
 }
-
-/* If the GSource is in the process of being destroyed then
- * g_source_remove_poll() causes an assertion failure.  Skip
- * removal in that case, because glib cleans up its state during
- * destruction anyway.
- */
-if (!g_source_is_destroyed(&ctx->source)) {
-g_source_remove_poll(&ctx->source, &node->pfd);
-}
-
-/* If a read is in progress, just mark the node as deleted */
-if (qemu_lockcnt_count(&ctx->list_lock)) {
-node->deleted = 1;
-node->pfd.revents = 0;
-} else {
-/* Otherwise, delete it for real.  We can't just mark it as
- * deleted because deleted nodes are only cleaned up while
- * no one is walking the handlers list.
- */
-QLIST_REMOVE(node, node);
-deleted = true;
-}
+deleted = aio_remove_fd_handler(ctx, node);
 poll_disable_change = -!node->io_poll;
 } else {
 poll_disable_change = !io_poll - (node && !node->io_poll);
 if (node == NULL) {
-/* Alloc and insert if it's not already there */
-node = g_new0(AioHandler, 1);
-node->pfd.fd = fd;
-QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
-
-g_source_add_poll(&ctx->source, &node->pfd);
 is_new = true;
 }
+/* Alloc and insert if it's not already there */
+new_node = g_new0(AioHandler, 1);
 
 /* Update handler with latest information */
-node->io_read = io_read;
-node->io_write = io_write;
-node->io_poll = io_poll;
-node->opaque = opaque;
-node->is_external = is_external;
+new_node->io_read = io_read;
+new_node->io_write = io_write;
+new_node->io_poll = io_poll;
+new_node->opaque = opaque;
+new_node->is_external = is_external;
+
+if (is_new) {
+new_node->pfd.fd = fd;
+} else {
+deleted = aio_remove_fd_handler(ctx, node);
+new_node->pfd = node->pfd;
+}
+g_source_add_poll(&ctx->source, &new_node->pfd);
 
-node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
-node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
+new_node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
+new_node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
+
+QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, new_node, node);
 }
 
 /* No need to order poll_disable_cnt writes against other updates;
@@ -278,7 +289,9 @@ void aio_set_fd_handler(AioContext *ctx,
 atomic_set(&ctx->poll_disable_c

Re: [Qemu-block] util/aio-posix: Use RCU for handler insertion.

2018-12-06 Thread Remy NOEL

I did some tests and noticed the second and third patch to incur some 
performance loss (on a scenario using virtio device)


I will therefore resubmit just the first patch alone.

On 11/16/18 8:02 PM, remy.n...@blade-group.com wrote:

From: Remy Noel 

get rid of the delete attribute.

We still need to get rid of the context list lock.

Signed-off-by: Remy Noel 
---
  util/aio-posix.c | 75 ++--
  util/aio-win32.c | 43 ++-
  2 files changed, 49 insertions(+), 69 deletions(-)

diff --git a/util/aio-posix.c b/util/aio-posix.c
index b34d97292a..83db3f65f4 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -16,6 +16,7 @@
  #include "qemu/osdep.h"
  #include "qemu-common.h"
  #include "block/block.h"
+#include "qemu/rcu.h"
  #include "qemu/rcu_queue.h"
  #include "qemu/sockets.h"
  #include "qemu/cutils.h"
@@ -26,13 +27,14 @@
  
  struct AioHandler

  {
+struct rcu_head rcu;
+
  GPollFD pfd;
  IOHandler *io_read;
  IOHandler *io_write;
  AioPollFn *io_poll;
  IOHandler *io_poll_begin;
  IOHandler *io_poll_end;
-int deleted;
  void *opaque;
  bool is_external;
  QLIST_ENTRY(AioHandler) node;
@@ -65,19 +67,25 @@ static bool aio_epoll_try_enable(AioContext *ctx)
  {
  AioHandler *node;
  struct epoll_event event;
+int r = 0;
+
  
+rcu_read_lock();

  QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
-int r;
-if (node->deleted || !node->pfd.events) {
+if (!node->pfd.events) {
  continue;
  }
  event.events = epoll_events_from_pfd(node->pfd.events);
  event.data.ptr = node;
  r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
  if (r) {
-return false;
+break;
  }
  }
+rcu_read_unlock();
+if (r) {
+return false;
+}
  ctx->epoll_enabled = true;
  return true;
  }
@@ -193,14 +201,13 @@ static AioHandler *find_aio_handler(AioContext *ctx, int 
fd)
  
  QLIST_FOREACH(node, &ctx->aio_handlers, node) {

  if (node->pfd.fd == fd)
-if (!node->deleted)
-return node;
+return node;
  }
  
  return NULL;

  }
  
-static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)

+static void aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
  {
  /* If the GSource is in the process of being destroyed then
   * g_source_remove_poll() causes an assertion failure.  Skip
@@ -210,19 +217,7 @@ static bool aio_remove_fd_handler(AioContext *ctx, 
AioHandler *node)
  if (!g_source_is_destroyed(&ctx->source)) {
  g_source_remove_poll(&ctx->source, &node->pfd);
  }
-
-/* If a read is in progress, just mark the node as deleted */
-if (qemu_lockcnt_count(&ctx->list_lock)) {
-node->deleted = 1;
-node->pfd.revents = 0;
-return false;
-}
-/* Otherwise, delete it for real.  We can't just mark it as
- * deleted because deleted nodes are only cleaned up while
- * no one is walking the handlers list.
- */
-QLIST_REMOVE(node, node);
-return true;
+QLIST_REMOVE_RCU(node, node);
  }
  
  void aio_set_fd_handler(AioContext *ctx,

@@ -249,7 +244,8 @@ void aio_set_fd_handler(AioContext *ctx,
  qemu_lockcnt_unlock(&ctx->list_lock);
  return;
  }
-deleted = aio_remove_fd_handler(ctx, node);
+aio_remove_fd_handler(ctx, node);
+deleted = true;
  poll_disable_change = -!node->io_poll;
  } else {
  poll_disable_change = !io_poll - (node && !node->io_poll);
@@ -269,7 +265,8 @@ void aio_set_fd_handler(AioContext *ctx,
  if (is_new) {
  new_node->pfd.fd = fd;
  } else {
-deleted = aio_remove_fd_handler(ctx, node);
+aio_remove_fd_handler(ctx, node);
+deleted = true;
  new_node->pfd = node->pfd;
  }
  g_source_add_poll(&ctx->source, &new_node->pfd);
@@ -296,7 +293,7 @@ void aio_set_fd_handler(AioContext *ctx,
  aio_notify(ctx);
  
  if (deleted) {

-g_free(node);
+g_free_rcu(node, rcu);
  }
  }
  
@@ -345,13 +342,10 @@ static void poll_set_started(AioContext *ctx, bool started)

  ctx->poll_started = started;
  
  qemu_lockcnt_inc(&ctx->list_lock);

+rcu_read_lock();
  QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
  IOHandler *fn;
  
-if (node->deleted) {

-continue;
-}
-
  if (started) {
  fn = node->io_poll_begin;
  } else {
@@ -362,6 +356,7 @@ static void poll_set_started(AioContext *ctx, bool started)
  fn(node->opaque);
  }
  }
+rcu_read_unlock();
  qemu_lockcnt_dec(&ctx->list_lock);
  }
  
@@ -385,6 +380,7 @@ bool aio_pending(AioContext *ctx)

   */
  qemu_lockcnt_inc(&ctx->list_lock);
  
+rcu_re

62 matches

Mail list logo