Re: [PATCH v3 13/28] hw/xen: automatically assign device index to block devices

2023-10-27 Thread David Woodhouse
On Fri, 2023-10-27 at 11:32 +0100, Durrant, Paul wrote:
> On 27/10/2023 11:25, David Woodhouse wrote:
> > On Fri, 2023-10-27 at 10:01 +0100, Durrant, Paul wrote:
> > > 
> > > This code is allocating a name automatically so I think the onus is on
> > > it not create a needless clash which is likely to have unpredictable
> > > results depending on what the guest is. Just avoid any aliasing in the
> > > first place and things will be fine.
> > 
> > 
> > Yeah, fair enough. In which case I'll probably switch to using
> > xs_directory() and then processing those results to find a free slot,
> > instead of going out to XenStore for every existence check.
> > 
> > This isn't exactly fast path and I'm prepared to tolerate a little bit
> > of O(n²), but only within reason :)
> 
> Yes, doing an xs_directory() and then using the code 
> xen_block_get_vdev() to populate a list of existent disks will be neater.

diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 5011fe9430..9b7d7ef7e1 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -30,48 +30,105 @@
 #include "hw/xen/interface/io/xs_wire.h"
 #include "trace.h"
 
-static char *xen_block_get_name(XenDevice *xendev, Error **errp)
+#define XVDA_MAJOR 202
+#define XVDQ_MAJOR (1<<20)
+#define XVDBGQCV_MAJOR ((1<<21) - 1)
+#define HDA_MAJOR 3
+#define HDC_MAJOR 22
+#define SDA_MAJOR 8
+
+/*
+ * This is fairly arbitrary just to avoid a stupidly sized bitmap, but Linux
+ * as of v6.6 only supports up to /dev/xvdfan (disk 4095) anyway.
+ */
+#define MAX_AUTO_VDEV 4096
+
+static int vdev_to_diskno(unsigned int vdev_nr)
 {
-XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
+switch (vdev_nr >> 8) {
+case XVDA_MAJOR:
+case SDA_MAJOR:
+return (vdev_nr >> 4) & 0x15;
+
+case HDA_MAJOR:
+return (vdev_nr >> 6) & 1;
+
+case HDC_MAJOR:
+return ((vdev_nr >> 6) & 1) + 2;
+
+case XVDQ_MAJOR ... XVDBGQCV_MAJOR:
+return (vdev_nr >> 8) & 0xf;
+
+default:
+return -1;
+}
+}
+
+static bool xen_block_find_free_vdev(XenBlockDevice *blockdev, Error **errp)
+{
+XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(blockdev)));
+unsigned long used_devs[BITS_TO_LONGS(MAX_AUTO_VDEV)];
 XenBlockVdev *vdev = >props.vdev;
+char fe_path[XENSTORE_ABS_PATH_MAX + 1];
+char **existing_frontends;
+unsigned int nr_existing = 0;
+unsigned int vdev_nr;
+int i, disk = 0;
+
+snprintf(fe_path, sizeof(fe_path), "/local/domain/%u/device/vbd",
+ blockdev->xendev.frontend_id);
+
+existing_frontends = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, fe_path,
+   _existing);
+if (!existing_frontends && errno != ENOENT) {
+error_setg_errno(errp, errno, "cannot read %s", fe_path);
+return false;
+}
 
-if (blockdev->props.vdev.type == XEN_BLOCK_VDEV_TYPE_INVALID) {
-XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
-char fe_path[XENSTORE_ABS_PATH_MAX + 1];
-char *value;
-int disk = 0;
-unsigned long idx;
-
-/* Find an unoccupied device name */
-while (disk < (1 << 20)) {
-if (disk < (1 << 4)) {
-idx = (202 << 8) | (disk << 4);
-} else {
-idx = (1 << 28) | (disk << 8);
-}
-snprintf(fe_path, sizeof(fe_path),
- "/local/domain/%u/device/vbd/%lu",
- xendev->frontend_id, idx);
-value = qemu_xen_xs_read(xenbus->xsh, XBT_NULL, fe_path, NULL);
-if (!value) {
-if (errno == ENOENT) {
-vdev->type = XEN_BLOCK_VDEV_TYPE_XVD;
-vdev->partition = 0;
-vdev->disk = disk;
-vdev->number = idx;
-goto found;
-}
-error_setg(errp, "cannot read %s: %s", fe_path,
-   strerror(errno));
-return NULL;
-}
-free(value);
-disk++;
+memset(used_devs, 0, sizeof(used_devs));
+for (i = 0; i < nr_existing; i++) {
+if (qemu_strtoui(existing_frontends[i], NULL, 10, _nr)) {
+free(existing_frontends[i]);
+continue;
 }
+
+free(existing_frontends[i]);
+
+disk = vdev_to_diskno(vdev_nr);
+if (disk < 0 || disk >= MAX_AUTO_VDEV) {
+continue;
+}
+
+set_bit(disk, used_devs);
+}
+free(existing_frontends);
+
+disk = find_first_zero_bit(used_devs, MAX_AUTO_VDEV);
+if (disk == MAX_AUTO_VDEV) {
 error_setg(errp, "cannot find device vdev for block device");
+return false;
+}
+
+vdev->type = XEN_BLOCK_VDEV_TYPE_XVD;
+vdev->partition = 0;
+vdev->disk = disk;
+if (disk < (1 << 4)) {
+vdev->number = (XVDA_MAJOR << 8) | (disk << 4);
+} else {
+vdev->number = 

Re: [PATCH v3 13/28] hw/xen: automatically assign device index to block devices

2023-10-27 Thread Durrant, Paul

On 27/10/2023 11:25, David Woodhouse wrote:

On Fri, 2023-10-27 at 10:01 +0100, Durrant, Paul wrote:


This code is allocating a name automatically so I think the onus is on
it not create a needless clash which is likely to have unpredictable
results depending on what the guest is. Just avoid any aliasing in the
first place and things will be fine.



Yeah, fair enough. In which case I'll probably switch to using
xs_directory() and then processing those results to find a free slot,
instead of going out to XenStore for every existence check.

This isn't exactly fast path and I'm prepared to tolerate a little bit
of O(n²), but only within reason :)


Yes, doing an xs_directory() and then using the code 
xen_block_get_vdev() to populate a list of existent disks will be neater.





Re: [PATCH v3 13/28] hw/xen: automatically assign device index to block devices

2023-10-27 Thread David Woodhouse
On Fri, 2023-10-27 at 10:01 +0100, Durrant, Paul wrote:
> 
> This code is allocating a name automatically so I think the onus is on 
> it not create a needless clash which is likely to have unpredictable 
> results depending on what the guest is. Just avoid any aliasing in the 
> first place and things will be fine.


Yeah, fair enough. In which case I'll probably switch to using
xs_directory() and then processing those results to find a free slot,
instead of going out to XenStore for every existence check.

This isn't exactly fast path and I'm prepared to tolerate a little bit
of O(n²), but only within reason :)


smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH v3 13/28] hw/xen: automatically assign device index to block devices

2023-10-27 Thread Durrant, Paul

On 27/10/2023 09:45, David Woodhouse wrote:

On Fri, 2023-10-27 at 08:30 +0100, Durrant, Paul wrote:



+    if (blockdev->props.vdev.type == XEN_BLOCK_VDEV_TYPE_INVALID) {
+    XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
+    char fe_path[XENSTORE_ABS_PATH_MAX + 1];
+    char *value;
+    int disk = 0;
+    unsigned long idx;
+
+    /* Find an unoccupied device name */


Not sure this is going to work is it? What happens if 'hda' or 'sda', or
'd0' exists? I think you need to use the core of the code in
xen_block_set_vdev() to generate names and search all possible encodings
for each disk.


Do we care? You're allowed to have *all* of "hda", "sda" and "xvda" at
the same time. If a user explicitly provides "sda" and then provides
another disk without giving it a name, we're allowed to use "xvda".



Maybe sda and xvda can co-exist, but

https://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=docs/man/xen-vbd-interface.7.pandoc;h=ba0d159dfa7eaf359922583ccd6d2b413acddb13;hb=HEAD#l125

says that you'll likely run into trouble if hda exists and you happen to 
create xvda.



Hell, you can also have *separate* backing stores provided as "hda1",
"sda1" and "xvda1". I *might* have tolerated a heckle that this
function should check for at least the latter of those, but when I was
first coding it up I was more inclined to argue "Don't Do That Then".


This code is allocating a name automatically so I think the onus is on 
it not create a needless clash which is likely to have unpredictable 
results depending on what the guest is. Just avoid any aliasing in the 
first place and things will be fine.


  Paul




Re: [PATCH v3 13/28] hw/xen: automatically assign device index to block devices

2023-10-27 Thread David Woodhouse
On Fri, 2023-10-27 at 08:30 +0100, Durrant, Paul wrote:
> 
> > +    if (blockdev->props.vdev.type == XEN_BLOCK_VDEV_TYPE_INVALID) {
> > +    XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
> > +    char fe_path[XENSTORE_ABS_PATH_MAX + 1];
> > +    char *value;
> > +    int disk = 0;
> > +    unsigned long idx;
> > +
> > +    /* Find an unoccupied device name */
> 
> Not sure this is going to work is it? What happens if 'hda' or 'sda', or 
> 'd0' exists? I think you need to use the core of the code in 
> xen_block_set_vdev() to generate names and search all possible encodings 
> for each disk.

Do we care? You're allowed to have *all* of "hda", "sda" and "xvda" at
the same time. If a user explicitly provides "sda" and then provides
another disk without giving it a name, we're allowed to use "xvda".

Hell, you can also have *separate* backing stores provided as "hda1",
"sda1" and "xvda1". I *might* have tolerated a heckle that this
function should check for at least the latter of those, but when I was
first coding it up I was more inclined to argue "Don't Do That Then".


smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH v3 13/28] hw/xen: automatically assign device index to block devices

2023-10-27 Thread Durrant, Paul

On 25/10/2023 15:50, David Woodhouse wrote:

From: David Woodhouse 

There's no need to force the user to assign a vdev. We can automatically
assign one, starting at xvda and searching until we find the first disk
name that's unused.

This means we can now allow '-drive if=xen,file=xxx' to work without an
explicit separate -driver argument, just like if=virtio.

Rip out the legacy handling from the xenpv machine, which was scribbling
over any disks configured by the toolstack, and didn't work with anything
but raw images.

Signed-off-by: David Woodhouse 
Acked-by: Kevin Wolf 
---
  blockdev.c  | 15 +---
  hw/block/xen-block.c| 38 +
  hw/xen/xen_devconfig.c  | 28 -
  hw/xenpv/xen_machine_pv.c   |  9 ---
  include/hw/xen/xen-legacy-backend.h |  1 -
  5 files changed, 50 insertions(+), 41 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index a01c62596b..5d9f2e5395 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -255,13 +255,13 @@ void drive_check_orphaned(void)
   * Ignore default drives, because we create certain default
   * drives unconditionally, then leave them unclaimed.  Not the
   * users fault.
- * Ignore IF_VIRTIO, because it gets desugared into -device,
- * so we can leave failing to -device.
+ * Ignore IF_VIRTIO or IF_XEN, because it gets desugared into
+ * -device, so we can leave failing to -device.
   * Ignore IF_NONE, because leaving unclaimed IF_NONE remains
   * available for device_add is a feature.
   */
  if (dinfo->is_default || dinfo->type == IF_VIRTIO
-|| dinfo->type == IF_NONE) {
+|| dinfo->type == IF_XEN || dinfo->type == IF_NONE) {
  continue;
  }
  if (!blk_get_attached_dev(blk)) {
@@ -977,6 +977,15 @@ DriveInfo *drive_new(QemuOpts *all_opts, 
BlockInterfaceType block_default_type,
  qemu_opt_set(devopts, "driver", "virtio-blk", _abort);
  qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
   _abort);
+} else if (type == IF_XEN) {
+QemuOpts *devopts;
+devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
+   _abort);
+qemu_opt_set(devopts, "driver",
+ (media == MEDIA_CDROM) ? "xen-cdrom" : "xen-disk",
+ _abort);
+qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
+ _abort);
  }
  
  filename = qemu_opt_get(legacy_opts, "file");

diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 64470fc579..5011fe9430 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -27,6 +27,7 @@
  #include "sysemu/block-backend.h"
  #include "sysemu/iothread.h"
  #include "dataplane/xen-block.h"
+#include "hw/xen/interface/io/xs_wire.h"
  #include "trace.h"
  
  static char *xen_block_get_name(XenDevice *xendev, Error **errp)

@@ -34,6 +35,43 @@ static char *xen_block_get_name(XenDevice *xendev, Error 
**errp)
  XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
  XenBlockVdev *vdev = >props.vdev;
  
+if (blockdev->props.vdev.type == XEN_BLOCK_VDEV_TYPE_INVALID) {

+XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
+char fe_path[XENSTORE_ABS_PATH_MAX + 1];
+char *value;
+int disk = 0;
+unsigned long idx;
+
+/* Find an unoccupied device name */


Not sure this is going to work is it? What happens if 'hda' or 'sda', or 
'd0' exists? I think you need to use the core of the code in 
xen_block_set_vdev() to generate names and search all possible encodings 
for each disk.


  Paul


+while (disk < (1 << 20)) {
+if (disk < (1 << 4)) {
+idx = (202 << 8) | (disk << 4);
+} else {
+idx = (1 << 28) | (disk << 8);
+}
+snprintf(fe_path, sizeof(fe_path),
+ "/local/domain/%u/device/vbd/%lu",
+ xendev->frontend_id, idx);
+value = qemu_xen_xs_read(xenbus->xsh, XBT_NULL, fe_path, NULL);
+if (!value) {
+if (errno == ENOENT) {
+vdev->type = XEN_BLOCK_VDEV_TYPE_XVD;
+vdev->partition = 0;
+vdev->disk = disk;
+vdev->number = idx;
+goto found;
+}
+error_setg(errp, "cannot read %s: %s", fe_path,
+   strerror(errno));
+return NULL;
+}
+free(value);
+disk++;
+}
+error_setg(errp, "cannot find device vdev for block device");
+return NULL;
+}
+ found:
  return g_strdup_printf("%lu", vdev->number);
  }
  
diff --git a/hw/xen/xen_devconfig.c b/hw/xen/xen_devconfig.c

index