from:"Cédric Le Goater"


On 2/27/24 21:36, Caleb Schlossin wrote:

Big (SMT8) cores have a complicated function to map the core, thread ID
to pervasive topology (PIR). Fix this for power8, power9, and power10.

Signed-off-by: Caleb Schlossin 
---

Version 2 fixes the PIR calculation for core, thread ID
for power10 big cores (SMT8).


Looks good for SMT4 and this change prepares ground SMT8. We would need
a new CPU definition to activate big cores. It can come later.

Reviewed-by: Cédric Le Goater 

Thanks,

C.





  include/hw/ppc/pnv_chip.h |  2 +-
  include/hw/ppc/pnv_core.h |  1 +
  hw/ppc/pnv.c  | 71 ---
  hw/ppc/pnv_core.c |  8 ++---
  target/ppc/misc_helper.c  |  3 --
  5 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h
index af4cd7a8b8..8589f3291e 100644
--- a/include/hw/ppc/pnv_chip.h
+++ b/include/hw/ppc/pnv_chip.h
@@ -147,7 +147,7 @@ struct PnvChipClass {
  
  DeviceRealize parent_realize;
  
-uint32_t (*core_pir)(PnvChip *chip, uint32_t core_id);

+uint32_t (*chip_pir)(PnvChip *chip, uint32_t core_id, uint32_t thread_id);
  void (*intc_create)(PnvChip *chip, PowerPCCPU *cpu, Error **errp);
  void (*intc_reset)(PnvChip *chip, PowerPCCPU *cpu);
  void (*intc_destroy)(PnvChip *chip, PowerPCCPU *cpu);
diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index 4db21229a6..c6d62fd145 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h
@@ -36,6 +36,7 @@ struct PnvCore {
  /*< public >*/
  PowerPCCPU **threads;
  uint32_t pir;
+uint32_t hwid;
  uint64_t hrmor;
  PnvChip *chip;
  
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c

index 0b47b92baa..aa5aba60b4 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -141,8 +141,10 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void 
*fdt)
  int smt_threads = CPU_CORE(pc)->nr_threads;
  CPUPPCState *env = >env;
  PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+PnvChipClass *pnv_cc = PNV_CHIP_GET_CLASS(chip);
  g_autofree uint32_t *servers_prop = g_new(uint32_t, smt_threads);
  int i;
+uint32_t pir;
  uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
 0x, 0x};
  uint32_t tbfreq = PNV_TIMEBASE_FREQ;
@@ -158,15 +160,17 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void 
*fdt)
  char *nodename;
  int cpus_offset = get_cpus_node(fdt);
  
-nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir);

+pir = pnv_cc->chip_pir(chip, pc->hwid, 0);
+
+nodename = g_strdup_printf("%s@%x", dc->fw_name, pir);
  offset = fdt_add_subnode(fdt, cpus_offset, nodename);
  _FDT(offset);
  g_free(nodename);
  
  _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip->chip_id)));
  
-_FDT((fdt_setprop_cell(fdt, offset, "reg", pc->pir)));

-_FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pc->pir)));
+_FDT((fdt_setprop_cell(fdt, offset, "reg", pir)));
+_FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pir)));
  _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
  
  _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));

@@ -241,15 +245,17 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void 
*fdt)
  
  /* Build interrupt servers properties */

  for (i = 0; i < smt_threads; i++) {
-servers_prop[i] = cpu_to_be32(pc->pir + i);
+servers_prop[i] = cpu_to_be32(pnv_cc->chip_pir(chip, pc->hwid, i));
  }
  _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
 servers_prop, sizeof(*servers_prop) * smt_threads)));
  }
  
-static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t pir,

+static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t hwid,
 uint32_t nr_threads)
  {
+PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+uint32_t pir = pcc->chip_pir(chip, hwid, 0);
  uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12);
  char *name;
  const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
@@ -263,6 +269,7 @@ static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t 
pir,
  rsize = sizeof(uint64_t) * 2 * nr_threads;
  reg = g_malloc(rsize);
  for (i = 0; i < nr_threads; i++) {
+/* We know P8 PIR is linear with thread id */
  reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000));
  reg[i * 2 + 1] = cpu_to_be64(0x1000);
  }
@@ -315,7 +322,7 @@ static void pnv_chip_power8_dt_populate(PnvChip *chip, void 
*fdt)
  pnv_dt_core(chip, pnv_core, fdt);
  
  /* Interrupt Control Presenters (ICP). One per core. */

-pnv_dt_icp(chip, fdt, pnv_core->pir, CPU_CORE(pnv_core)->nr_threads);
+pnv_dt

[PATCH v2 08/21] memory: Add Error** argument to .log_global*() handlers

Modify all log_global*() handlers to take an Error** parameter and
return a bool. A new MEMORY_LISTENER_CALL_LOG_GLOBAL macro looping on
the listeners is introduced to handle a possible error, which will
would interrupt the loop if necessary.

To be noted a change in memory_global_dirty_log_start() behavior as it
will return as soon as an error is detected.

Cc: Stefano Stabellini 
Cc: Anthony Perard 
Cc: Paul Durrant 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: David Hildenbrand 
Signed-off-by: Cédric Le Goater 
---
 include/exec/memory.h | 15 ++--
 hw/i386/xen/xen-hvm.c |  6 ++--
 hw/vfio/common.c  |  8 +++--
 hw/virtio/vhost.c |  6 ++--
 system/memory.c   | 83 +--
 system/physmem.c  |  5 +--
 6 files changed, 101 insertions(+), 22 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 
8626a355b310ed7b1a1db7978ba4b394032c2f15..4bc146c5ebdd377cd14a4e462f32cc945db5a0a8
 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -998,8 +998,11 @@ struct MemoryListener {
  * active at that time.
  *
  * @listener: The #MemoryListener.
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Return: true on success, else false setting @errp with error.
  */
-void (*log_global_start)(MemoryListener *listener);
+bool (*log_global_start)(MemoryListener *listener, Error **errp);
 
 /**
  * @log_global_stop:
@@ -1009,8 +1012,11 @@ struct MemoryListener {
  * the address space.
  *
  * @listener: The #MemoryListener.
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Return: true on success, else false setting @errp with error.
  */
-void (*log_global_stop)(MemoryListener *listener);
+bool (*log_global_stop)(MemoryListener *listener, Error **errp);
 
 /**
  * @log_global_after_sync:
@@ -1019,8 +1025,11 @@ struct MemoryListener {
  * for any #MemoryRegionSection.
  *
  * @listener: The #MemoryListener.
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Return: true on success, else false setting @errp with error.
  */
-void (*log_global_after_sync)(MemoryListener *listener);
+bool (*log_global_after_sync)(MemoryListener *listener, Error **errp);
 
 /**
  * @eventfd_add:
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 
f42621e6742552035122ea58092c91c3458338ff..925a207b494b4eed52d5f360b554f18ac8a9806d
 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -446,16 +446,18 @@ static void xen_log_sync(MemoryListener *listener, 
MemoryRegionSection *section)
   int128_get64(section->size));
 }
 
-static void xen_log_global_start(MemoryListener *listener)
+static bool xen_log_global_start(MemoryListener *listener, Error **errp)
 {
 if (xen_enabled()) {
 xen_in_migration = true;
 }
+return true;
 }
 
-static void xen_log_global_stop(MemoryListener *listener)
+static bool xen_log_global_stop(MemoryListener *listener, Error **errp)
 {
 xen_in_migration = false;
+return true;
 }
 
 static const MemoryListener xen_memory_listener = {
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
059bfdc07a85e2eb908df828c1f42104d683e911..8bba95ba6a2010b78cae54c6905857686bbb6309
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1075,7 +1075,8 @@ out:
 return ret;
 }
 
-static void vfio_listener_log_global_start(MemoryListener *listener)
+static bool vfio_listener_log_global_start(MemoryListener *listener,
+   Error **errp)
 {
 VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
  listener);
@@ -1092,9 +1093,11 @@ static void 
vfio_listener_log_global_start(MemoryListener *listener)
  ret, strerror(-ret));
 vfio_set_migration_error(ret);
 }
+return !!ret;
 }
 
-static void vfio_listener_log_global_stop(MemoryListener *listener)
+static bool vfio_listener_log_global_stop(MemoryListener *listener,
+  Error **errp)
 {
 VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
  listener);
@@ -,6 +1114,7 @@ static void vfio_listener_log_global_stop(MemoryListener 
*listener)
  ret, strerror(-ret));
 vfio_set_migration_error(ret);
 }
+return !!ret;
 }
 
 static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 
2c9ac794680ea9b65eba6cc22e70cf141e90aa73..7a555f941934991a72a2817e5505fe0ce6d6fc64
 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1044,7 +1044,7 @@ check_dev_state:
 return r;
 }
 
-static void vhost_log_global_start(MemoryListener *listener)
+static bool vhost_log_global_st

[PATCH v2 06/21] migration: Add Error** argument to .save_setup() handler

The purpose is to record a potential error in the migration stream if
qemu_savevm_state_setup() fails. Most of the current .save_setup()
handlers can be modified to use the Error argument instead of managing
their own and calling locally error_report(). The following patches
will introduce such changes for VFIO first.

Cc: Nicholas Piggin 
Cc: Harsh Prateek Bora 
Cc: Halil Pasic 
Cc: Thomas Huth 
Cc: Eric Blake 
Cc: Vladimir Sementsov-Ogievskiy 
Cc: John Snow 
Cc: Stefan Hajnoczi 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Peter Xu 
Signed-off-by: Cédric Le Goater 
---

Changes in v2: 

 - dropped qemu_file_set_error_obj(f, ret, local_err); 

include/migration/register.h   | 3 ++-
 hw/ppc/spapr.c | 2 +-
 hw/s390x/s390-stattrib.c   | 2 +-
 hw/vfio/migration.c| 2 +-
 migration/block-dirty-bitmap.c | 2 +-
 migration/block.c  | 2 +-
 migration/ram.c| 3 ++-
 migration/savevm.c | 2 +-
 8 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index 
2cc71ec45f65bf2884c9e7a823d2968752f15c20..96eae9dba2970552c379c732393e3ab6ef578a58
 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -60,10 +60,11 @@ typedef struct SaveVMHandlers {
  *
  * @f: QEMUFile where to send the data
  * @opaque: data pointer passed to register_savevm_live()
+ * @errp: pointer to Error*, to store an error if it happens.
  *
  * Returns zero to indicate success and negative for error
  */
-int (*save_setup)(QEMUFile *f, void *opaque);
+int (*save_setup)(QEMUFile *f, void *opaque, Error **errp);
 
 /**
  * @save_cleanup
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 
55263f0815ed7671b32ea20b394ae71c82e616cb..045c024ffa76eacfc496bd486cb6cafbee2df73e
 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2142,7 +2142,7 @@ static const VMStateDescription vmstate_spapr = {
 }
 };
 
-static int htab_save_setup(QEMUFile *f, void *opaque)
+static int htab_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 SpaprMachineState *spapr = opaque;
 
diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c
index 
c483b62a9b5f71772639fc180bdad15ecb6711cb..c934df424a555d83d2198f5ddfc0cbe0ea98e9ec
 100644
--- a/hw/s390x/s390-stattrib.c
+++ b/hw/s390x/s390-stattrib.c
@@ -166,7 +166,7 @@ static int cmma_load(QEMUFile *f, void *opaque, int 
version_id)
 return ret;
 }
 
-static int cmma_save_setup(QEMUFile *f, void *opaque)
+static int cmma_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 S390StAttribState *sas = S390_STATTRIB(opaque);
 S390StAttribClass *sac = S390_STATTRIB_GET_CLASS(sas);
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
70e6b1a709f9b67e4c9eb41033d76347275cac42..8bcb4bc73cd5ba5338e3ffa4d907d0e6bfbb9485
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -378,7 +378,7 @@ static int vfio_save_prepare(void *opaque, Error **errp)
 return 0;
 }
 
-static int vfio_save_setup(QEMUFile *f, void *opaque)
+static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 VFIODevice *vbasedev = opaque;
 VFIOMigration *migration = vbasedev->migration;
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 
2708abf3d762de774ed294d3fdb8e56690d2974c..16f84e6c57c2403a8c2d6319f4e7b6360dade28c
 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -1213,7 +1213,7 @@ fail:
 return ret;
 }
 
-static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
+static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 DBMSaveState *s = &((DBMState *)opaque)->save;
 SaveBitmapState *dbms = NULL;
diff --git a/migration/block.c b/migration/block.c
index 
8c6ebafacc1ffe930d1d4f19d968817b14852c69..df15319ceab66201b043f15eac1b0a7d6522b60c
 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -708,7 +708,7 @@ static void block_migration_cleanup(void *opaque)
 blk_mig_unlock();
 }
 
-static int block_save_setup(QEMUFile *f, void *opaque)
+static int block_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 int ret;
 
diff --git a/migration/ram.c b/migration/ram.c
index 
4649a8120492a03d331d660622e1a0a51adb0a96..745482899e18c86b73261b683c1bec04039a76d2
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2930,8 +2930,9 @@ void qemu_guest_free_page_hint(void *addr, size_t len)
  *
  * @f: QEMUFile where to send the data
  * @opaque: RAMState pointer
+ * @errp: pointer to Error*, to store an error if it happens.
  */
-static int ram_save_setup(QEMUFile *f, void *opaque)
+static int ram_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 RAMState **rsp = opaque;
 RAMBlock *block;
diff --git a/migration/savevm.c b/migration/savevm.c
index 
bc168371a31acf85f29f2c284be181250db45df4..b5b3b51bad94dc4c04ae22cd687ba111299339aa
 100644
--- a/migration/save

[PATCH v2 04/21] migration: Do not call PRECOPY_NOTIFY_SETUP notifiers in case of error

When commit bd2270608fa0 ("migration/ram.c: add a notifier chain for
precopy") added PRECOPY_NOTIFY_SETUP notifiers at the end of
qemu_savevm_state_setup(), it didn't take into account a possible
error in the loop calling vmstate_save() or .save_setup() handlers.

Check ret value before calling the notifiers.

Signed-off-by: Cédric Le Goater 
---
 migration/savevm.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index 
d612c8a9020b204d5d078d5df85f0e6449c27645..51876f2ef674bb76c7e7ef96e1119a083883deac
 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1316,7 +1316,7 @@ void qemu_savevm_state_setup(QEMUFile *f)
 MigrationState *ms = migrate_get_current();
 SaveStateEntry *se;
 Error *local_err = NULL;
-int ret;
+int ret = 0;
 
 json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
 json_writer_start_array(ms->vmdesc, "devices");
@@ -1350,6 +1350,10 @@ void qemu_savevm_state_setup(QEMUFile *f)
 }
 }
 
+if (ret) {
+return;
+}
+
 if (precopy_notify(PRECOPY_NOTIFY_SETUP, _err)) {
 error_report_err(local_err);
 }
-- 
2.43.2

[PATCH v2 16/21] vfio: Add Error** argument to .vfio_save_config() handler

Use vmstate_save_state_with_err() to improve error reporting in the
callers and store a reported error under the migration stream. Add
documentation while at it.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-common.h | 25 -
 hw/vfio/migration.c   | 18 --
 hw/vfio/pci.c |  5 +++--
 3 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 
9b7ef7d02b5a0ad5266bcc4d06cd6874178978e4..6d9dee626afc491645d2c2398f3e3210961f67e9
 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -133,7 +133,30 @@ struct VFIODeviceOps {
 int (*vfio_hot_reset_multi)(VFIODevice *vdev);
 void (*vfio_eoi)(VFIODevice *vdev);
 Object *(*vfio_get_object)(VFIODevice *vdev);
-void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f);
+
+/**
+ * @vfio_save_config
+ *
+ * Save device config state
+ *
+ * @vdev: #VFIODevice for which to save the config
+ * @f: #QEMUFile where to send the data
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns zero to indicate success and negative for error
+ */
+int (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f, Error **errp);
+
+/**
+ * @vfio_load_config
+ *
+ * Load device config state
+ *
+ * @vdev: #VFIODevice for which to load the config
+ * @f: #QEMUFile where to get the data
+ *
+ * Returns zero to indicate success and negative for error
+ */
 int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
 };
 
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
8bdc68c66516710c52443135284262580825e0b8..228e8854594f3714b7c6f4fcfc5468d6b56337cb
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -190,14 +190,19 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice 
*vbasedev,
 return ret;
 }
 
-static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
+static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
+ Error **errp)
 {
 VFIODevice *vbasedev = opaque;
+int ret;
 
 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
 
 if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
-vbasedev->ops->vfio_save_config(vbasedev, f);
+ret = vbasedev->ops->vfio_save_config(vbasedev, f, errp);
+if (ret) {
+return ret;
+}
 }
 
 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
@@ -581,13 +586,14 @@ static int vfio_save_complete_precopy(QEMUFile *f, void 
*opaque)
 static void vfio_save_state(QEMUFile *f, void *opaque)
 {
 VFIODevice *vbasedev = opaque;
+Error *local_err = NULL;
 int ret;
 
-ret = vfio_save_device_config_state(f, opaque);
+ret = vfio_save_device_config_state(f, opaque, _err);
 if (ret) {
-error_report("%s: Failed to save device config space",
- vbasedev->name);
-qemu_file_set_error(f, ret);
+error_prepend(_err, "%s: Failed to save device config space",
+  vbasedev->name);
+qemu_file_set_error_obj(f, ret, local_err);
 }
 }
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 
4fa387f0430d62ca2ba1b5ae5b7037f8f06b33f9..99d86e1d40ef25133fc76ad6e58294b07bd20843
 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2585,11 +2585,12 @@ const VMStateDescription vmstate_vfio_pci_config = {
 }
 };
 
-static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
+static int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error 
**errp)
 {
 VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
 
-vmstate_save_state(f, _vfio_pci_config, vdev, NULL);
+return vmstate_save_state_with_err(f, _vfio_pci_config, vdev, NULL,
+   errp);
 }
 
 static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
-- 
2.43.2

[PATCH v2 11/21] migration: Fix migration termination

Handle migration termination when in SETUP state. This can happen if
qemu_savevm_state_setup() fails.

Signed-off-by: Cédric Le Goater 
---
 migration/migration.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/migration/migration.c b/migration/migration.c
index 
c1a62b696f62c0d5aca0505e58bc4dc0ff561fde..63294417ff9cae868ad8a167094a795fc30e4da0
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3161,6 +3161,8 @@ static void migration_iteration_finish(MigrationState *s)
 }
 }
 break;
+case MIGRATION_STATUS_SETUP:
+break;
 
 default:
 /* Should not reach here, but if so, forgive the VM. */
@@ -3192,6 +3194,8 @@ static void bg_migration_iteration_finish(MigrationState 
*s)
 case MIGRATION_STATUS_CANCELLED:
 case MIGRATION_STATUS_CANCELLING:
 break;
+case MIGRATION_STATUS_SETUP:
+break;
 
 default:
 /* Should not reach here, but if so, forgive the VM. */
-- 
2.43.2

[PATCH v2 12/21] vfio: Add Error** argument to .set_dirty_page_tracking() handler

We will use the Error object to improve error reporting in the
.log_global*() handlers of VFIO. Add documentation while at it.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-container-base.h | 18 --
 hw/vfio/common.c  |  4 ++--
 hw/vfio/container-base.c  |  4 ++--
 hw/vfio/container.c   |  6 +++---
 4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/include/hw/vfio/vfio-container-base.h 
b/include/hw/vfio/vfio-container-base.h
index 
b2813b0c117985425c842d91f011bb895955d738..dec2023eceb6c7d62b0ee35008cc58f8e695e190
 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -81,7 +81,7 @@ int vfio_container_add_section_window(VFIOContainerBase 
*bcontainer,
 void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
MemoryRegionSection *section);
 int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
-   bool start);
+   bool start, Error **errp);
 int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
   VFIOBitmap *vbmap,
   hwaddr iova, hwaddr size);
@@ -120,9 +120,23 @@ struct VFIOIOMMUClass {
 int (*attach_device)(const char *name, VFIODevice *vbasedev,
  AddressSpace *as, Error **errp);
 void (*detach_device)(VFIODevice *vbasedev);
+
 /* migration feature */
+
+/**
+ * @set_dirty_page_tracking
+ *
+ * Start or stop dirty pages tracking on VFIO container
+ *
+ * @bcontainer: #VFIOContainerBase on which to de/activate dirty
+ *  pages tracking
+ * @start: indicates whether to start or stop dirty pages tracking
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns zero to indicate success and negative for error
+ */
 int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
-   bool start);
+   bool start, Error **errp);
 int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer,
   VFIOBitmap *vbmap,
   hwaddr iova, hwaddr size);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
8bba95ba6a2010b78cae54c6905857686bbb6309..560f4bc38499f7f4a3bc84ef7e4184fd6dc89935
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1085,7 +1085,7 @@ static bool vfio_listener_log_global_start(MemoryListener 
*listener,
 if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
 ret = vfio_devices_dma_logging_start(bcontainer);
 } else {
-ret = vfio_container_set_dirty_page_tracking(bcontainer, true);
+ret = vfio_container_set_dirty_page_tracking(bcontainer, true, NULL);
 }
 
 if (ret) {
@@ -1106,7 +1106,7 @@ static bool vfio_listener_log_global_stop(MemoryListener 
*listener,
 if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
 vfio_devices_dma_logging_stop(bcontainer);
 } else {
-ret = vfio_container_set_dirty_page_tracking(bcontainer, false);
+ret = vfio_container_set_dirty_page_tracking(bcontainer, false, NULL);
 }
 
 if (ret) {
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 
913ae49077c4f09b7b27517c1231cfbe4befb7fb..7c0764121d24b02b6c4e66e368d7dff78a6d65aa
 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -53,14 +53,14 @@ void vfio_container_del_section_window(VFIOContainerBase 
*bcontainer,
 }
 
 int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
-   bool start)
+   bool start, Error **errp)
 {
 if (!bcontainer->dirty_pages_supported) {
 return 0;
 }
 
 g_assert(bcontainer->ops->set_dirty_page_tracking);
-return bcontainer->ops->set_dirty_page_tracking(bcontainer, start);
+return bcontainer->ops->set_dirty_page_tracking(bcontainer, start, errp);
 }
 
 int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
bd25b9fbad2e717e63c2ab0e331186e5f63cef49..f772ac79b9c413c86d7e60f6dc4e6699852d5aac
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -210,7 +210,7 @@ static int vfio_legacy_dma_map(const VFIOContainerBase 
*bcontainer, hwaddr iova,
 
 static int
 vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
-bool start)
+bool start, Error **errp)
 {
 const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
   bcontain

[PATCH v2 21/21] vfio: Extend vfio_set_migration_error() with Error* argument

vfio_set_migration_error() sets the 'return' error on the migration
stream if a migration is in progress. To improve error reporting, add
a new Error* argument to also set the Error object on the migration
stream, if a migration is progress.

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/common.c | 36 +++-
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
8fbf04e55d1b304bc80fdd9ef6f5f5089acd3360..5e6353ae468c885af0fa169b671902a518df4c75
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -148,16 +148,18 @@ bool vfio_viommu_preset(VFIODevice *vbasedev)
 return vbasedev->bcontainer->space->as != _space_memory;
 }
 
-static void vfio_set_migration_error(int err)
+static void vfio_set_migration_error(int ret, Error *err)
 {
 MigrationState *ms = migrate_get_current();
 
 if (migration_is_setup_or_active(ms->state)) {
 WITH_QEMU_LOCK_GUARD(>qemu_file_lock) {
 if (ms->to_dst_file) {
-qemu_file_set_error(ms->to_dst_file, err);
+qemu_file_set_error_obj(ms->to_dst_file, ret, err);
 }
 }
+} else {
+error_report_err(err);
 }
 }
 
@@ -304,9 +306,10 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
 iova, iova + iotlb->addr_mask);
 
 if (iotlb->target_as != _space_memory) {
-error_report("Wrong target AS \"%s\", only system memory is allowed",
- iotlb->target_as->name ? iotlb->target_as->name : "none");
-vfio_set_migration_error(-EINVAL);
+error_setg(_err,
+   "Wrong target AS \"%s\", only system memory is allowed",
+   iotlb->target_as->name ? iotlb->target_as->name : "none");
+vfio_set_migration_error(-EINVAL, local_err);
 return;
 }
 
@@ -339,11 +342,12 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
 ret = vfio_container_dma_unmap(bcontainer, iova,
iotlb->addr_mask + 1, iotlb);
 if (ret) {
-error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%s)",
- bcontainer, iova,
- iotlb->addr_mask + 1, ret, strerror(-ret));
-vfio_set_migration_error(ret);
+error_setg(_err,
+   "vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+   "0x%"HWADDR_PRIx") = %d (%s)",
+   bcontainer, iova,
+   iotlb->addr_mask + 1, ret, strerror(-ret));
+vfio_set_migration_error(ret, local_err);
 }
 }
 out:
@@ -1239,14 +1243,14 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier 
*n, IOMMUTLBEntry *iotlb)
 trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
 
 if (iotlb->target_as != _space_memory) {
-error_report("Wrong target AS \"%s\", only system memory is allowed",
- iotlb->target_as->name ? iotlb->target_as->name : "none");
+error_setg(_err,
+   "Wrong target AS \"%s\", only system memory is allowed",
+   iotlb->target_as->name ? iotlb->target_as->name : "none");
 goto out;
 }
 
 rcu_read_lock();
 if (!vfio_get_xlat_addr(iotlb, NULL, _addr, NULL, _err)) {
-error_report_err(local_err);
 goto out_lock;
 }
 
@@ -1257,7 +1261,6 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
   "vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
   "0x%"HWADDR_PRIx") failed :", bcontainer, iova,
   iotlb->addr_mask + 1);
-error_report_err(local_err);
 }
 
 out_lock:
@@ -1265,7 +1268,7 @@ out_lock:
 
 out:
 if (ret) {
-vfio_set_migration_error(ret);
+vfio_set_migration_error(ret, local_err);
 }
 }
 
@@ -1385,8 +1388,7 @@ static void vfio_listener_log_sync(MemoryListener 
*listener,
 if (vfio_devices_all_dirty_tracking(bcontainer)) {
 ret = vfio_sync_dirty_bitmap(bcontainer, section, _err);
 if (ret) {
-error_report_err(local_err);
-vfio_set_migration_error(ret);
+vfio_set_migration_error(ret, local_err);
 }
 }
 }
-- 
2.43.2

[PATCH v2 19/21] vfio: Add Error** argument to .get_dirty_bitmap() handler

Let the callers do the error reporting. Add documentation while at it.

Signed-off-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-common.h |  4 +-
 include/hw/vfio/vfio-container-base.h | 17 +++-
 hw/vfio/common.c  | 59 ++-
 hw/vfio/container-base.c  |  5 ++-
 hw/vfio/container.c   | 13 +++---
 5 files changed, 67 insertions(+), 31 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 
6d9dee626afc491645d2c2398f3e3210961f67e9..83ffad89f5cf434452332fe29fb752d9ec71b2f0
 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -271,9 +271,9 @@ bool
 vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer);
 int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
 VFIOBitmap *vbmap, hwaddr iova,
-hwaddr size);
+hwaddr size, Error **errp);
 int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
-  uint64_t size, ram_addr_t ram_addr);
+  uint64_t size, ram_addr_t ram_addr, Error **errp);
 
 /* Returns 0 on success, or a negative errno. */
 int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
diff --git a/include/hw/vfio/vfio-container-base.h 
b/include/hw/vfio/vfio-container-base.h
index 
dec2023eceb6c7d62b0ee35008cc58f8e695e190..3ee713014cb414f18b34092641a17717983b5559
 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -84,7 +84,7 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase 
*bcontainer,
bool start, Error **errp);
 int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
   VFIOBitmap *vbmap,
-  hwaddr iova, hwaddr size);
+  hwaddr iova, hwaddr size, Error **errp);
 
 void vfio_container_init(VFIOContainerBase *bcontainer,
  VFIOAddressSpace *space,
@@ -137,9 +137,22 @@ struct VFIOIOMMUClass {
  */
 int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
bool start, Error **errp);
+/**
+ * @query_dirty_bitmap
+ *
+ * Get list of dirty pages from container
+ *
+ * @bcontainer: #VFIOContainerBase from which to get dirty pages
+ * @vbmap: #VFIOBitmap internal bitmap structure
+ * @iova: iova base address
+ * @size: size of iova range
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns zero to indicate success and negative for error
+ */
 int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer,
   VFIOBitmap *vbmap,
-  hwaddr iova, hwaddr size);
+  hwaddr iova, hwaddr size, Error **errp);
 /* PCI specific */
 int (*pci_hot_reset)(VFIODevice *vbasedev, bool single);
 
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
43f37447e3692ffa97788b02f83b81b44aaf301a..8fbf04e55d1b304bc80fdd9ef6f5f5089acd3360
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1154,7 +1154,7 @@ static int vfio_device_dma_logging_report(VFIODevice 
*vbasedev, hwaddr iova,
 
 int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
 VFIOBitmap *vbmap, hwaddr iova,
-hwaddr size)
+hwaddr size, Error **errp)
 {
 VFIODevice *vbasedev;
 int ret;
@@ -1163,10 +1163,10 @@ int vfio_devices_query_dirty_bitmap(const 
VFIOContainerBase *bcontainer,
 ret = vfio_device_dma_logging_report(vbasedev, iova, size,
  vbmap->bitmap);
 if (ret) {
-error_report("%s: Failed to get DMA logging report, iova: "
- "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx
- ", err: %d (%s)",
- vbasedev->name, iova, size, ret, strerror(-ret));
+error_setg(errp, "%s: Failed to get DMA logging report, iova: "
+   "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx
+   ", err: %d (%s)",
+   vbasedev->name, iova, size, ret, strerror(-ret));
 
 return ret;
 }
@@ -1176,7 +1176,7 @@ int vfio_devices_query_dirty_bitmap(const 
VFIOContainerBase *bcontainer,
 }
 
 int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
-  uint64_t size, ram_addr_t ram_addr)
+  uint64_t size, ram_addr_t ram_addr, Error **errp)
 {
 bo

[PATCH v2 05/21] migration: Add Error** argument to qemu_savevm_state_setup()

This prepares ground for the changes coming next which add an Error**
argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
now handle the error and fail earlier. This is a functional change
that should be examined closely.

Signed-off-by: Cédric Le Goater 
---
 migration/savevm.h|  2 +-
 migration/migration.c | 20 ++--
 migration/savevm.c| 14 +++---
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/migration/savevm.h b/migration/savevm.h
index 
74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328
 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -32,7 +32,7 @@
 bool qemu_savevm_state_blocked(Error **errp);
 void qemu_savevm_non_migratable_list(strList **reasons);
 int qemu_savevm_state_prepare(Error **errp);
-void qemu_savevm_state_setup(QEMUFile *f);
+int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
 bool qemu_savevm_state_guest_unplug_pending(void);
 int qemu_savevm_state_resume_prepare(MigrationState *s);
 void qemu_savevm_state_header(QEMUFile *f);
diff --git a/migration/migration.c b/migration/migration.c
index 
5316bbe6704742e604ae55dc7b47a4e11e73c2a4..c1a62b696f62c0d5aca0505e58bc4dc0ff561fde
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3314,6 +3314,8 @@ static void *migration_thread(void *opaque)
 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
 MigThrError thr_error;
 bool urgent = false;
+Error *local_err = NULL;
+int ret;
 
 thread = migration_threads_add("live_migration", qemu_get_thread_id());
 
@@ -3357,9 +3359,15 @@ static void *migration_thread(void *opaque)
 }
 
 bql_lock();
-qemu_savevm_state_setup(s->to_dst_file);
+ret = qemu_savevm_state_setup(s->to_dst_file, _err);
 bql_unlock();
 
+if (ret) {
+migrate_set_error(s, local_err);
+error_free(local_err);
+goto out;
+ }
+
 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_ACTIVE);
 
@@ -3436,6 +3444,8 @@ static void *bg_migration_thread(void *opaque)
 MigThrError thr_error;
 QEMUFile *fb;
 bool early_fail = true;
+Error *local_err = NULL;
+int ret;
 
 rcu_register_thread();
 object_ref(OBJECT(s));
@@ -3469,9 +3479,15 @@ static void *bg_migration_thread(void *opaque)
 
 bql_lock();
 qemu_savevm_state_header(s->to_dst_file);
-qemu_savevm_state_setup(s->to_dst_file);
+ret = qemu_savevm_state_setup(s->to_dst_file, _err);
 bql_unlock();
 
+if (ret) {
+migrate_set_error(s, local_err);
+error_free(local_err);
+goto fail;
+}
+
 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_ACTIVE);
 
diff --git a/migration/savevm.c b/migration/savevm.c
index 
51876f2ef674bb76c7e7ef96e1119a083883deac..bc168371a31acf85f29f2c284be181250db45df4
 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1311,11 +1311,10 @@ int qemu_savevm_state_prepare(Error **errp)
 return 0;
 }
 
-void qemu_savevm_state_setup(QEMUFile *f)
+int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
 {
 MigrationState *ms = migrate_get_current();
 SaveStateEntry *se;
-Error *local_err = NULL;
 int ret = 0;
 
 json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
@@ -1351,12 +1350,10 @@ void qemu_savevm_state_setup(QEMUFile *f)
 }
 
 if (ret) {
-return;
+return ret;
 }
 
-if (precopy_notify(PRECOPY_NOTIFY_SETUP, _err)) {
-error_report_err(local_err);
-}
+return precopy_notify(PRECOPY_NOTIFY_SETUP, errp);
 }
 
 int qemu_savevm_state_resume_prepare(MigrationState *s)
@@ -1725,7 +1722,10 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
 ms->to_dst_file = f;
 
 qemu_savevm_state_header(f);
-qemu_savevm_state_setup(f);
+ret = qemu_savevm_state_setup(f, errp);
+if (ret) {
+return ret;
+}
 
 while (qemu_file_get_error(f) == 0) {
 if (qemu_savevm_state_iterate(f, false) > 0) {
-- 
2.43.2

[PATCH v2 20/21] vfio: Also trace event failures in vfio_save_complete_precopy()

vfio_save_complete_precopy() currently returns before doing the trace
event. Change that.

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/migration.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
228e8854594f3714b7c6f4fcfc5468d6b56337cb..f3b500dd1cab944722ccbc41575b15046c2420c9
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -574,9 +574,6 @@ static int vfio_save_complete_precopy(QEMUFile *f, void 
*opaque)
 
 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 ret = qemu_file_get_error(f);
-if (ret) {
-return ret;
-}
 
 trace_vfio_save_complete_precopy(vbasedev->name, ret);
 
-- 
2.43.2

[PATCH v2 03/21] migration: Add documentation for SaveVMHandlers

The SaveVMHandlers structure is still in use for complex subsystems
and devices. Document the handlers since we are going to modify a few
later.

Signed-off-by: Cédric Le Goater 
---
 include/migration/register.h | 257 +++
 1 file changed, 231 insertions(+), 26 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index 
2e6a7d766e62f64940086b7b511249c9ff21fa62..2cc71ec45f65bf2884c9e7a823d2968752f15c20
 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -16,30 +16,129 @@
 
 #include "hw/vmstate-if.h"
 
+/**
+ * struct SaveVMHandlers: handler structure to finely control
+ * migration of complex subsystems and devices, such as RAM, block and
+ * VFIO.
+ */
 typedef struct SaveVMHandlers {
-/* This runs inside the BQL.  */
+
+/* The following handlers runs inside the BQL. */
+
+/**
+ * @save_state
+ *
+ * Saves state section on the source using the latest state format
+ * version.
+ *
+ * Legacy method. Should be deprecated when all users are ported
+ * to VMState.
+ *
+ * @f: QEMUFile where to send the data
+ * @opaque: data pointer passed to register_savevm_live()
+ */
 void (*save_state)(QEMUFile *f, void *opaque);
 
-/*
- * save_prepare is called early, even before migration starts, and can be
- * used to perform early checks.
+/**
+ * @save_prepare
+ *
+ * Called early, even before migration starts, and can be used to
+ * perform early checks.
+ *
+ * @opaque: data pointer passed to register_savevm_live()
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns zero to indicate success and negative for error
  */
 int (*save_prepare)(void *opaque, Error **errp);
+
+/**
+ * @save_setup
+ *
+ * Initializes the data structures on the source and transmits
+ * first section containing information on the device
+ *
+ * @f: QEMUFile where to send the data
+ * @opaque: data pointer passed to register_savevm_live()
+ *
+ * Returns zero to indicate success and negative for error
+ */
 int (*save_setup)(QEMUFile *f, void *opaque);
+
+/**
+ * @save_cleanup
+ *
+ * Performs save related cleanup
+ *
+ * @opaque: data pointer passed to register_savevm_live()
+ *
+ * Returns zero to indicate success and negative for error
+ */
 void (*save_cleanup)(void *opaque);
+
+/**
+ * @save_live_complete_postcopy
+ *
+ * Called at the end of postcopy for all postcopyiable devices.
+ *
+ * @f: QEMUFile where to send the data
+ * @opaque: data pointer passed to register_savevm_live()
+ *
+ * Returns zero to indicate success and negative for error
+ */
 int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque);
+
+/**
+ * @save_live_complete_precopy
+ *
+ * Transmits the last section for the device containing any
+ * remaining data.
+ *
+ * @f: QEMUFile where to send the data
+ * @opaque: data pointer passed to register_savevm_live()
+ *
+ * Returns zero to indicate success and negative for error
+ */
 int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);
 
 /* This runs both outside and inside the BQL.  */
+
+/**
+ * @is_active
+ *
+ * Will skip a state section if not active
+ *
+ * @opaque: data pointer passed to register_savevm_live()
+ *
+ * Returns true if state section is active else false
+ */
 bool (*is_active)(void *opaque);
+
+/**
+ * @has_postcopy
+ *
+ * checks if a device supports postcopy
+ *
+ * @opaque: data pointer passed to register_savevm_live()
+ *
+ * Returns true for postcopy support else false
+ */
 bool (*has_postcopy)(void *opaque);
 
-/* is_active_iterate
- * If it is not NULL then qemu_savevm_state_iterate will skip iteration if
- * it returns false. For example, it is needed for only-postcopy-states,
- * which needs to be handled by qemu_savevm_state_setup and
- * qemu_savevm_state_pending, but do not need iterations until not in
- * postcopy stage.
+/**
+ * @is_active_iterate
+ *
+ * As #SaveVMHandlers.is_active(), will skip an inactive state
+ * section in qemu_savevm_state_iterate.
+ *
+ * For example, it is needed for only-postcopy-states, which needs
+ * to be handled by qemu_savevm_state_setup() and
+ * qemu_savevm_state_pending(), but do not need iterations until
+ * not in postcopy stage.
+ *
+ * @opaque: data pointer passed to register_savevm_live()
+ *
+ * Returns true if state section is active else false
  */
 bool (*is_active_iterate)(void *opaque);
 
@@ -48,44 +147,150 @@ typedef struct SaveVMHandlers {
  * use data that is local to the migration thread or protected
  * by o

[PATCH v2 10/21] migration: Modify ram_init_bitmaps() to report dirty tracking errors

The .save_setup() handler has now an Error** argument that we can use
to propagate errors reported by the .log_global_start() handler. Do
that for the RAM. The caller qemu_savevm_state_setup() will store the
error under the migration stream for later detection in the migration
sequence.

Signed-off-by: Cédric Le Goater 
---
 migration/ram.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 
9fb1875aad73b2fa009199bdfa8960339df7287d..23f4df4779309bbbe164c56c1436b60d65749860
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2802,9 +2802,8 @@ static void 
migration_bitmap_clear_discarded_pages(RAMState *rs)
 }
 }
 
-static void ram_init_bitmaps(RAMState *rs)
+static bool ram_init_bitmaps(RAMState *rs, Error **errp)
 {
-Error *local_err = NULL;
 bool ret = true;
 
 qemu_mutex_lock_ramlist();
@@ -2813,10 +2812,8 @@ static void ram_init_bitmaps(RAMState *rs)
 ram_list_init_bitmaps();
 /* We don't use dirty log with background snapshots */
 if (!migrate_background_snapshot()) {
-ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
-_err);
+ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
 if (!ret) {
-error_report_err(local_err);
 goto out_unlock;
 }
 migration_bitmap_sync_precopy(rs, false);
@@ -2826,7 +2823,7 @@ out_unlock:
 qemu_mutex_unlock_ramlist();
 
 if (!ret) {
-return;
+return false;
 }
 
 /*
@@ -2834,9 +2831,10 @@ out_unlock:
  * containing all 1s to exclude any discarded pages from migration.
  */
 migration_bitmap_clear_discarded_pages(rs);
+return true;
 }
 
-static int ram_init_all(RAMState **rsp)
+static int ram_init_all(RAMState **rsp, Error **errp)
 {
 if (ram_state_init(rsp)) {
 return -1;
@@ -2847,7 +2845,9 @@ static int ram_init_all(RAMState **rsp)
 return -1;
 }
 
-ram_init_bitmaps(*rsp);
+if (!ram_init_bitmaps(*rsp, errp)) {
+return -1;
+}
 
 return 0;
 }
@@ -2961,7 +2961,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque, 
Error **errp)
 
 /* migration has already setup the bitmap, reuse it. */
 if (!migration_in_colo_state()) {
-if (ram_init_all(rsp) != 0) {
+if (ram_init_all(rsp, errp) != 0) {
 compress_threads_save_cleanup();
 return -1;
 }
-- 
2.43.2

[PATCH v2 00/21] migration: Improve error reporting

Hello,

The motivation behind these changes is to improve error reporting to
the upper management layer (libvirt) with a more detailed error, this
to let it decide, depending on the reported error, whether to try
migration again later. It would be useful in cases where migration
fails due to lack of HW resources on the host. For instance, some
adapters can only initiate a limited number of simultaneous dirty
tracking requests and this imposes a limit on the the number of VMs
that can be migrated simultaneously.

We are not quite ready for such a mechanism but what we can do first is
to cleanup the error reporting in the early save_setup sequence. This
is what the following changes propose, by adding an Error** argument to
various handlers and propagating it to the core migration subsystem.
 
Thanks,

C.

Changes in v2:

- Removed v1 patches addressing the return-path thread termination as
  they are now superseded by :  
  https://lore.kernel.org/qemu-devel/20240226203122.22894-1-faro...@suse.de/
- Documentation updates of handlers
- Removed call to PRECOPY_NOTIFY_SETUP notifiers in case of errors
- Modified routines taking an Error** argument to return a bool when
  possible and made adjustments in callers.
- new MEMORY_LISTENER_CALL_LOG_GLOBAL macro for .log_global*()
  handlers
- Handled SETUP state when migration terminates
- Modified memory_get_xlat_addr() to take an Error** argument
- Various refinements on error handling

Cédric Le Goater (21):
  migration: Report error when shutdown fails
  migration: Remove SaveStateHandler and LoadStateHandler typedefs
  migration: Add documentation for SaveVMHandlers
  migration: Do not call PRECOPY_NOTIFY_SETUP notifiers in case of error
  migration: Add Error** argument to qemu_savevm_state_setup()
  migration: Add Error** argument to .save_setup() handler
  migration: Add Error** argument to .load_setup() handler
  memory: Add Error** argument to .log_global*() handlers
  memory: Add Error** argument to the global_dirty_log routines
  migration: Modify ram_init_bitmaps() to report dirty tracking errors
  migration: Fix migration termination
  vfio: Add Error** argument to .set_dirty_page_tracking() handler
  vfio: Add Error** argument to vfio_devices_dma_logging_start()
  vfio: Add Error** argument to vfio_devices_dma_logging_stop()
  vfio: Use new Error** argument in vfio_save_setup()
  vfio: Add Error** argument to .vfio_save_config() handler
  vfio: Reverse test on vfio_get_dirty_bitmap()
  memory: Add Error** argument to memory_get_xlat_addr()
  vfio: Add Error** argument to .get_dirty_bitmap() handler
  vfio: Also trace event failures in vfio_save_complete_precopy()
  vfio: Extend vfio_set_migration_error() with Error* argument

 include/exec/memory.h |  40 +++-
 include/hw/vfio/vfio-common.h |  29 ++-
 include/hw/vfio/vfio-container-base.h |  35 +++-
 include/migration/register.h  | 267 +++---
 include/qemu/typedefs.h   |   2 -
 migration/savevm.h|   2 +-
 hw/i386/xen/xen-hvm.c |  10 +-
 hw/ppc/spapr.c|   2 +-
 hw/s390x/s390-stattrib.c  |   2 +-
 hw/vfio/common.c  | 160 +--
 hw/vfio/container-base.c  |   9 +-
 hw/vfio/container.c   |  19 +-
 hw/vfio/migration.c   |  89 ++---
 hw/vfio/pci.c |   5 +-
 hw/virtio/vhost-vdpa.c|   5 +-
 hw/virtio/vhost.c |   6 +-
 migration/block-dirty-bitmap.c|   2 +-
 migration/block.c |   2 +-
 migration/dirtyrate.c |  21 +-
 migration/migration.c |  24 ++-
 migration/qemu-file.c |   5 +-
 migration/ram.c   |  48 -
 migration/savevm.c|  28 +--
 system/memory.c   |  95 +++--
 system/physmem.c  |   5 +-
 25 files changed, 699 insertions(+), 213 deletions(-)

-- 
2.43.2

Re: [PATCH v5 3/4] hw: Set virtio-iommu aw-bits default value on pc_q35 and arm virt


Hello Eric,

On 2/15/24 09:42, Eric Auger wrote:

Currently the default input range can extend to 64 bits. On x86,
when the virtio-iommu protects vfio devices, the physical iommu
may support only 39 bits. Let's set the default to 39, as done
for the intel-iommu. On ARM we set 48b as a default (matching
SMMUv3 SMMU_IDR5.VAX == 0).

We use hw_compat_8_2 to handle the compatibility for machines
before 9.0 which used to have a virtio-iommu default input range
of 64 bits.

Of course if aw-bits is set from the command line, the default
is overriden.

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 
Tested-by: Yanghang Liu


We need a property fixup for pseries also:

$ build/ppc64-softmmu/qemu-system-ppc64 -M pseries  -device 
virtio-iommu-pci,addr=04.0
qemu-system-ppc64: -device virtio-iommu-pci,addr=04.0: aw-bits must be within 
[32,64]


Thanks,

C.




---

v3 -> v4:
- update the qos test to relax the check on the max input IOVA

v2 -> v3:
- collected Zhenzhong's R-b
- use _abort instead of NULL error handle
   on object_property_get_uint() call (Cédric)
- use VTD_HOST_AW_39BIT (Cédric)

v1 -> v2:
- set aw-bits to 48b on ARM
- use hw_compat_8_2 to handle the compat for older machines
   which used 64b as a default
---
  hw/arm/virt.c   | 6 ++
  hw/core/machine.c   | 5 -
  hw/i386/pc.c| 6 ++
  hw/virtio/virtio-iommu.c| 2 +-
  tests/qtest/virtio-iommu-test.c | 2 +-
  5 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 368c2a415a..0994f2a560 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2716,10 +2716,16 @@ static void 
virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
  } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
  virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), 
errp);
  } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+uint8_t aw_bits = object_property_get_uint(OBJECT(dev),
+   "aw-bits", _abort);
  hwaddr db_start = 0, db_end = 0;
  QList *reserved_regions;
  char *resv_prop_str;
  
+if (!aw_bits) {

+qdev_prop_set_uint8(dev, "aw-bits", 48);
+}
+
  if (vms->iommu != VIRT_IOMMU_NONE) {
  error_setg(errp, "virt machine does not support multiple IOMMUs");
  return;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index fb5afdcae4..70ac96954c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -30,9 +30,12 @@
  #include "exec/confidential-guest-support.h"
  #include "hw/virtio/virtio-pci.h"
  #include "hw/virtio/virtio-net.h"
+#include "hw/virtio/virtio-iommu.h"
  #include "audio/audio.h"
  
-GlobalProperty hw_compat_8_2[] = {};

+GlobalProperty hw_compat_8_2[] = {
+{ TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" },
+};
  const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
  
  GlobalProperty hw_compat_8_1[] = {

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 196827531a..ee2d379c90 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1456,6 +1456,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler 
*hotplug_dev,
  } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
  virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), 
errp);
  } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+uint8_t aw_bits = object_property_get_uint(OBJECT(dev),
+   "aw-bits", _abort);
  /* Declare the APIC range as the reserved MSI region */
  char *resv_prop_str = g_strdup_printf("0xfee0:0xfeef:%d",
VIRTIO_IOMMU_RESV_MEM_T_MSI);
@@ -1464,6 +1466,10 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler 
*hotplug_dev,
  qlist_append_str(reserved_regions, resv_prop_str);
  qdev_prop_set_array(dev, "reserved-regions", reserved_regions);
  
+if (!aw_bits) {

+qdev_prop_set_uint8(dev, "aw-bits", VTD_HOST_AW_39BIT);
+}
+
  g_free(resv_prop_str);
  }
  
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c

index 8b541de850..2ec5ef3cd1 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1526,7 +1526,7 @@ static Property virtio_iommu_properties[] = {
  DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus,
   TYPE_PCI_BUS, PCIBus *),
  DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
-DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 64),
+DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 0),
  DEFINE_PROP_END_OF_LIST(),
  };
  
diff --git a/tests/qtest/virtio-iommu-test.c b/tests/qtest/virtio-iommu-test.c

index 068e7a9e6c..0f36381acb 100644
--- a/tests/qtest/virtio-iommu-test.c
+++ b/tests/qtest/virtio-iommu-test.c
@@ -34,7 +34,7 @@

[PATCH v2 09/21] memory: Add Error** argument to the global_dirty_log routines

Now that the log_global*() handlers take an Error** parameter and
return a bool, do the same for memory_global_dirty_log_start() and
memory_global_dirty_log_stop(). The error is reported in the callers
for now and it will be propagated in the call stack in the next
changes.

To be noted a functional change in ram_init_bitmaps(), if the dirty
pages logger fails to start, there is no need to synchronize the dirty
pages bitmaps. colo_incoming_start_dirty_log() could be modified in a
similar way.

Cc: Stefano Stabellini 
Cc: Anthony Perard 
Cc: Paul Durrant 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: David Hildenbrand 
Cc: Hyman Huang 
Signed-off-by: Cédric Le Goater 
---
 include/exec/memory.h | 10 --
 hw/i386/xen/xen-hvm.c |  4 ++--
 migration/dirtyrate.c | 21 +
 migration/ram.c   | 34 ++
 system/memory.c   | 30 --
 5 files changed, 69 insertions(+), 30 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 
4bc146c5ebdd377cd14a4e462f32cc945db5a0a8..8b019465ab13ce85c03075c80865a0865ea1feed
 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -2576,15 +2576,21 @@ void memory_listener_unregister(MemoryListener 
*listener);
  * memory_global_dirty_log_start: begin dirty logging for all regions
  *
  * @flags: purpose of starting dirty log, migration or dirty rate
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Return: true on success, else false setting @errp with error.
  */
-void memory_global_dirty_log_start(unsigned int flags);
+bool memory_global_dirty_log_start(unsigned int flags, Error **errp);
 
 /**
  * memory_global_dirty_log_stop: end dirty logging for all regions
  *
  * @flags: purpose of stopping dirty log, migration or dirty rate
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Return: true on success, else false setting @errp with error.
  */
-void memory_global_dirty_log_stop(unsigned int flags);
+bool memory_global_dirty_log_stop(unsigned int flags, Error **errp);
 
 void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled);
 
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 
925a207b494b4eed52d5f360b554f18ac8a9806d..286269b47572d90e57df5ff44835bb5f8e16c7ad
 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -655,9 +655,9 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t 
length)
 void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
 {
 if (enable) {
-memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
+memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
 } else {
-memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
+memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION, errp);
 }
 }
 
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index 
1d2e85746fb7b10eb7f149976970f9a92125af8a..34f6d803ff5f4e6ccf2e06aaaed65a336c4be469
 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -90,11 +90,17 @@ static int64_t do_calculate_dirtyrate(DirtyPageRecord 
dirty_pages,
 
 void global_dirty_log_change(unsigned int flag, bool start)
 {
+Error *local_err = NULL;
+bool ret;
+
 bql_lock();
 if (start) {
-memory_global_dirty_log_start(flag);
+ret = memory_global_dirty_log_start(flag, _err);
 } else {
-memory_global_dirty_log_stop(flag);
+ret = memory_global_dirty_log_stop(flag, _err);
+}
+if (!ret) {
+error_report_err(local_err);
 }
 bql_unlock();
 }
@@ -106,10 +112,14 @@ void global_dirty_log_change(unsigned int flag, bool 
start)
  */
 static void global_dirty_log_sync(unsigned int flag, bool one_shot)
 {
+Error *local_err = NULL;
+
 bql_lock();
 memory_global_dirty_log_sync(false);
 if (one_shot) {
-memory_global_dirty_log_stop(flag);
+if (!memory_global_dirty_log_stop(flag, _err)) {
+error_report_err(local_err);
+}
 }
 bql_unlock();
 }
@@ -608,9 +618,12 @@ static void calculate_dirtyrate_dirty_bitmap(struct 
DirtyRateConfig config)
 {
 int64_t start_time;
 DirtyPageRecord dirty_pages;
+Error *local_err = NULL;
 
 bql_lock();
-memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
+if (!memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE, _err)) {
+error_report_err(local_err);
+}
 
 /*
  * 1'round of log sync may return all 1 bits with
diff --git a/migration/ram.c b/migration/ram.c
index 
d648134133fc22cd91c7b2064198a90287ee733d..9fb1875aad73b2fa009199bdfa8960339df7287d
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2391,6 +2391,7 @@ static void ram_save_cleanup(void *opaque)
 {
 RAMState **rsp = opaque;
 RAMBlock *block;
+Error *local_err = NULL;
 
 /* We don't use dirty log with background snapshots */
 if (!migrate_background_snapshot()) {
@@ -2403,7 +2404,10 @@ static

[PATCH v2 07/21] migration: Add Error** argument to .load_setup() handler

This will be useful to report errors at a higher level, mostly in VFIO
today.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Cédric Le Goater 
---
 include/migration/register.h |  3 ++-
 hw/vfio/migration.c  |  2 +-
 migration/ram.c  |  3 ++-
 migration/savevm.c   | 10 ++
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index 
96eae9dba2970552c379c732393e3ab6ef578a58..2cfc167f717de8e08c1ca8accdc3011c03eb1554
 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -231,10 +231,11 @@ typedef struct SaveVMHandlers {
  *
  * @f: QEMUFile where to receive the data
  * @opaque: data pointer passed to register_savevm_live()
+ * @errp: pointer to Error*, to store an error if it happens.
  *
  * Returns zero to indicate success and negative for error
  */
-int (*load_setup)(QEMUFile *f, void *opaque);
+int (*load_setup)(QEMUFile *f, void *opaque, Error **errp);
 
 /**
  * @load_cleanup
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
8bcb4bc73cd5ba5338e3ffa4d907d0e6bfbb9485..2dfbe671f6f45aa530c7341177bb532d8292cecd
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -580,7 +580,7 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
 }
 }
 
-static int vfio_load_setup(QEMUFile *f, void *opaque)
+static int vfio_load_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 VFIODevice *vbasedev = opaque;
 
diff --git a/migration/ram.c b/migration/ram.c
index 
745482899e18c86b73261b683c1bec04039a76d2..d648134133fc22cd91c7b2064198a90287ee733d
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3498,8 +3498,9 @@ void colo_release_ram_cache(void)
  *
  * @f: QEMUFile where to receive the data
  * @opaque: RAMState pointer
+ * @errp: pointer to Error*, to store an error if it happens.
  */
-static int ram_load_setup(QEMUFile *f, void *opaque)
+static int ram_load_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 xbzrle_load_setup();
 ramblock_recv_map_init();
diff --git a/migration/savevm.c b/migration/savevm.c
index 
b5b3b51bad94dc4c04ae22cd687ba111299339aa..a4ef41d3ff5b471a1cd4166c2dc5813e44ea3a5a
 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2741,7 +2741,7 @@ static void 
qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis)
 trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num);
 }
 
-static int qemu_loadvm_state_setup(QEMUFile *f)
+static int qemu_loadvm_state_setup(QEMUFile *f, Error **errp)
 {
 SaveStateEntry *se;
 int ret;
@@ -2757,10 +2757,11 @@ static int qemu_loadvm_state_setup(QEMUFile *f)
 }
 }
 
-ret = se->ops->load_setup(f, se->opaque);
+ret = se->ops->load_setup(f, se->opaque, errp);
 if (ret < 0) {
+error_prepend(errp, "Load state of device %s failed: ",
+  se->idstr);
 qemu_file_set_error(f, ret);
-error_report("Load state of device %s failed", se->idstr);
 return ret;
 }
 }
@@ -2941,7 +2942,8 @@ int qemu_loadvm_state(QEMUFile *f)
 return ret;
 }
 
-if (qemu_loadvm_state_setup(f) != 0) {
+if (qemu_loadvm_state_setup(f, _err) != 0) {
+error_report_err(local_err);
 return -EINVAL;
 }
 
-- 
2.43.2

[PATCH v2 01/21] migration: Report error when shutdown fails

This will help detect issues regarding I/O channels usage.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Peter Xu 
Signed-off-by: Cédric Le Goater 
---
 migration/qemu-file.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 
94231ff2955c80b3d0fab11a40510d34c334a826..b69e0c62e2fcf21d346a3687df7eebee23791fdc
 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -62,6 +62,8 @@ struct QEMUFile {
  */
 int qemu_file_shutdown(QEMUFile *f)
 {
+Error *err = NULL;
+
 /*
  * We must set qemufile error before the real shutdown(), otherwise
  * there can be a race window where we thought IO all went though
@@ -90,7 +92,8 @@ int qemu_file_shutdown(QEMUFile *f)
 return -ENOSYS;
 }
 
-if (qio_channel_shutdown(f->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL) < 0) {
+if (qio_channel_shutdown(f->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, ) < 0) {
+error_report_err(err);
 return -EIO;
 }
 
-- 
2.43.2

[PATCH v2 18/21] memory: Add Error** argument to memory_get_xlat_addr()

Let the callers do the reporting. This will be useful in
vfio_iommu_map_dirty_notify().

Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: David Hildenbrand 
Signed-off-by: Cédric Le Goater 
---
 include/exec/memory.h  | 15 ++-
 hw/vfio/common.c   | 13 +
 hw/virtio/vhost-vdpa.c |  5 -
 system/memory.c| 10 +-
 4 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 
8b019465ab13ce85c03075c80865a0865ea1feed..baca989023415b69be3b4b4e7a622f983182314b
 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -771,9 +771,22 @@ void 
ram_discard_manager_register_listener(RamDiscardManager *rdm,
 void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
  RamDiscardListener *rdl);
 
+/**
+ * memory_get_xlat_addr: Extract addresses from a TLB entry
+ *
+ * @iotlb: pointer to an #IOMMUTLBEntry
+ * @vaddr: virtual addressf
+ * @ram_addr: RAM address
+ * @read_only: indicates if writes are allowed
+ * @mr_has_discard_manager: indicates memory is controlled by a
+ *  RamDiscardManager
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Return: true on success, else false setting @errp with error.
+ */
 bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
   ram_addr_t *ram_addr, bool *read_only,
-  bool *mr_has_discard_manager);
+  bool *mr_has_discard_manager, Error **errp);
 
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
 typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
e51757e7d747c60b67deb966bb29b946a511b328..43f37447e3692ffa97788b02f83b81b44aaf301a
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -262,12 +262,13 @@ static bool 
vfio_listener_skipped_section(MemoryRegionSection *section)
 
 /* Called with rcu_read_lock held.  */
 static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
-   ram_addr_t *ram_addr, bool *read_only)
+   ram_addr_t *ram_addr, bool *read_only,
+   Error **errp)
 {
 bool ret, mr_has_discard_manager;
 
 ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only,
-   _has_discard_manager);
+   _has_discard_manager, errp);
 if (ret && mr_has_discard_manager) {
 /*
  * Malicious VMs might trigger discarding of IOMMU-mapped memory. The
@@ -297,6 +298,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
 hwaddr iova = iotlb->iova + giommu->iommu_offset;
 void *vaddr;
 int ret;
+Error *local_err = NULL;
 
 trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
 iova, iova + iotlb->addr_mask);
@@ -313,7 +315,8 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
 if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
 bool read_only;
 
-if (!vfio_get_xlat_addr(iotlb, , NULL, _only)) {
+if (!vfio_get_xlat_addr(iotlb, , NULL, _only, _err)) {
+error_report_err(local_err);
 goto out;
 }
 /*
@@ -1226,6 +1229,7 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
 VFIOContainerBase *bcontainer = giommu->bcontainer;
 hwaddr iova = iotlb->iova + giommu->iommu_offset;
 ram_addr_t translated_addr;
+Error *local_err = NULL;
 int ret = -EINVAL;
 
 trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
@@ -1237,7 +1241,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
 }
 
 rcu_read_lock();
-if (!vfio_get_xlat_addr(iotlb, NULL, _addr, NULL)) {
+if (!vfio_get_xlat_addr(iotlb, NULL, _addr, NULL, _err)) {
+error_report_err(local_err);
 goto out_lock;
 }
 
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 
ddae494ca8e8154ce03b88bc781fe9f1e639aceb..a6f06266cfc798b20b98001fa97ce771722175ec
 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -203,6 +203,7 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
 void *vaddr;
 int ret;
 Int128 llend;
+Error *local_err = NULL;
 
 if (iotlb->target_as != _space_memory) {
 error_report("Wrong target AS \"%s\", only system memory is allowed",
@@ -222,7 +223,9 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
 if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
 bool read_only;
 
-if (!memory_get_xlat_addr(iotlb, , NULL, _only, NULL)) {
+if (!memory_get_xlat_addr(iotlb, , NUL

[PATCH v2 15/21] vfio: Use new Error** argument in vfio_save_setup()

Add an Error** argument to vfio_migration_set_state() and adjust
callers, including vfio_save_setup(). The error will be propagated up
to qemu_savevm_state_setup() where the save_setup() handler is
executed.

Modify vfio_vmstate_change_prepare() and vfio_vmstate_change() to
store a reported error under the migration stream if a migration is in
progress.

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/migration.c | 64 ++---
 1 file changed, 43 insertions(+), 21 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
2dfbe671f6f45aa530c7341177bb532d8292cecd..8bdc68c66516710c52443135284262580825e0b8
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -84,7 +84,8 @@ static const char *mig_state_to_str(enum 
vfio_device_mig_state state)
 
 static int vfio_migration_set_state(VFIODevice *vbasedev,
 enum vfio_device_mig_state new_state,
-enum vfio_device_mig_state recover_state)
+enum vfio_device_mig_state recover_state,
+Error **errp)
 {
 VFIOMigration *migration = vbasedev->migration;
 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
@@ -104,15 +105,15 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
 ret = -errno;
 
 if (recover_state == VFIO_DEVICE_STATE_ERROR) {
-error_report("%s: Failed setting device state to %s, err: %s. "
- "Recover state is ERROR. Resetting device",
- vbasedev->name, mig_state_to_str(new_state),
- strerror(errno));
+error_setg(errp, "%s: Failed setting device state to %s, err: %s. "
+   "Recover state is ERROR. Resetting device",
+   vbasedev->name, mig_state_to_str(new_state),
+   strerror(errno));
 
 goto reset_device;
 }
 
-error_report(
+error_setg(errp,
 "%s: Failed setting device state to %s, err: %s. Setting device in 
recover state %s",
  vbasedev->name, mig_state_to_str(new_state),
  strerror(errno), mig_state_to_str(recover_state));
@@ -120,7 +121,7 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
 mig_state->device_state = recover_state;
 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
 ret = -errno;
-error_report(
+error_setg(errp,
 "%s: Failed setting device in recover state, err: %s. 
Resetting device",
  vbasedev->name, strerror(errno));
 
@@ -139,7 +140,7 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
  * This can happen if the device is asynchronously reset and
  * terminates a data transfer.
  */
-error_report("%s: data_fd out of sync", vbasedev->name);
+error_setg(errp, "%s: data_fd out of sync", vbasedev->name);
 close(mig_state->data_fd);
 
 return -EBADF;
@@ -170,10 +171,11 @@ reset_device:
  */
 static int
 vfio_migration_set_state_or_reset(VFIODevice *vbasedev,
-  enum vfio_device_mig_state new_state)
+  enum vfio_device_mig_state new_state,
+  Error **errp)
 {
 return vfio_migration_set_state(vbasedev, new_state,
-VFIO_DEVICE_STATE_ERROR);
+VFIO_DEVICE_STATE_ERROR, errp);
 }
 
 static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
@@ -391,8 +393,8 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error 
**errp)
   stop_copy_size);
 migration->data_buffer = g_try_malloc0(migration->data_buffer_size);
 if (!migration->data_buffer) {
-error_report("%s: Failed to allocate migration data buffer",
- vbasedev->name);
+error_setg(errp, "%s: Failed to allocate migration data buffer",
+   vbasedev->name);
 return -ENOMEM;
 }
 
@@ -402,7 +404,7 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error 
**errp)
 switch (migration->device_state) {
 case VFIO_DEVICE_STATE_RUNNING:
 ret = vfio_migration_set_state(vbasedev, 
VFIO_DEVICE_STATE_PRE_COPY,
-   VFIO_DEVICE_STATE_RUNNING);
+   VFIO_DEVICE_STATE_RUNNING, errp);
 if (ret) {
 return ret;
 }
@@ -429,13 +431,20 @@ static void vfio_save_cleanup(void *opaque)
 {
 VFIODevice *vbasedev = opaque;
 VFIOMigration *migration

[PATCH v2 17/21] vfio: Reverse test on vfio_get_dirty_bitmap()

It will simplify the changes coming after.

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/common.c | 22 +-
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
706e915a3ba5f8520deb3753d9bb450a986f207a..e51757e7d747c60b67deb966bb29b946a511b328
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1237,16 +1237,20 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier 
*n, IOMMUTLBEntry *iotlb)
 }
 
 rcu_read_lock();
-if (vfio_get_xlat_addr(iotlb, NULL, _addr, NULL)) {
-ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
-translated_addr);
-if (ret) {
-error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%s)",
- bcontainer, iova, iotlb->addr_mask + 1, ret,
- strerror(-ret));
-}
+if (!vfio_get_xlat_addr(iotlb, NULL, _addr, NULL)) {
+goto out_lock;
 }
+
+ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
+translated_addr);
+if (ret) {
+error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
+ "0x%"HWADDR_PRIx") = %d (%s)",
+ bcontainer, iova, iotlb->addr_mask + 1, ret,
+ strerror(-ret));
+}
+
+out_lock:
 rcu_read_unlock();
 
 out:
-- 
2.43.2

[PATCH v2 14/21] vfio: Add Error** argument to vfio_devices_dma_logging_stop()

This improves error reporting in the log_global_stop() VFIO handler.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Cédric Le Goater 
---
 hw/vfio/common.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
a2d26cd08cb132d2b27c388bd75db3d9b8128407..706e915a3ba5f8520deb3753d9bb450a986f207a
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -938,12 +938,14 @@ static void vfio_dirty_tracking_init(VFIOContainerBase 
*bcontainer,
 memory_listener_unregister();
 }
 
-static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer)
+static int vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer,
+  Error **errp)
 {
 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature),
   sizeof(uint64_t))] = {};
 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
 VFIODevice *vbasedev;
+int ret = 0;
 
 feature->argsz = sizeof(buf);
 feature->flags = VFIO_DEVICE_FEATURE_SET |
@@ -955,11 +957,17 @@ static void 
vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer)
 }
 
 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
-warn_report("%s: Failed to stop DMA logging, err %d (%s)",
-vbasedev->name, -errno, strerror(errno));
+/* Keep first error */
+if (!ret) {
+ret = -errno;
+error_setg(errp, "%s: Failed to stop DMA logging, err %d (%s)",
+   vbasedev->name, -errno, strerror(errno));
+}
 }
 vbasedev->dirty_tracking = false;
 }
+
+return ret;
 }
 
 static struct vfio_device_feature *
@@ -1068,7 +1076,8 @@ static int 
vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
 
 out:
 if (ret) {
-vfio_devices_dma_logging_stop(bcontainer);
+/* Ignore the potential errors when doing rollback */
+vfio_devices_dma_logging_stop(bcontainer, NULL);
 }
 
 vfio_device_feature_dma_logging_start_destroy(feature);
@@ -1103,7 +1112,7 @@ static bool vfio_listener_log_global_stop(MemoryListener 
*listener,
 int ret = 0;
 
 if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
-vfio_devices_dma_logging_stop(bcontainer);
+ret = vfio_devices_dma_logging_stop(bcontainer, errp);
 } else {
 ret = vfio_container_set_dirty_page_tracking(bcontainer, false, errp);
 }
-- 
2.43.2

[PATCH v2 02/21] migration: Remove SaveStateHandler and LoadStateHandler typedefs

They are only used once.

Signed-off-by: Cédric Le Goater 
---
 include/migration/register.h | 4 ++--
 include/qemu/typedefs.h  | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index 
9ab1f79512c605f0c88a45b560c57486fa054441..2e6a7d766e62f64940086b7b511249c9ff21fa62
 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -18,7 +18,7 @@
 
 typedef struct SaveVMHandlers {
 /* This runs inside the BQL.  */
-SaveStateHandler *save_state;
+void (*save_state)(QEMUFile *f, void *opaque);
 
 /*
  * save_prepare is called early, even before migration starts, and can be
@@ -71,7 +71,7 @@ typedef struct SaveVMHandlers {
 /* This calculate the exact remaining data to transfer */
 void (*state_pending_exact)(void *opaque, uint64_t *must_precopy,
 uint64_t *can_postcopy);
-LoadStateHandler *load_state;
+int (*load_state)(QEMUFile *f, void *opaque, int version_id);
 int (*load_setup)(QEMUFile *f, void *opaque);
 int (*load_cleanup)(void *opaque);
 /* Called when postcopy migration wants to resume from failure */
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 
d7c703b4ae9c91d9638111bcaafc656686e1dbb8..5fcba1c1b467826d4f7b6bd287690d33cdc48acf
 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -150,8 +150,6 @@ typedef struct IRQState *qemu_irq;
 /*
  * Function types
  */
-typedef void SaveStateHandler(QEMUFile *f, void *opaque);
-typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
 typedef void (*qemu_irq_handler)(void *opaque, int n, int level);
 
 #endif /* QEMU_TYPEDEFS_H */
-- 
2.43.2

[PATCH v2 13/21] vfio: Add Error** argument to vfio_devices_dma_logging_start()

This allows to update the Error argument of the VFIO log_global_start()
handler. Errors detected when device level logging is started will be
propagated up to qemu_savevm_state_setup() when the ram save_setup()
handler is executed.

The vfio_set_migration_error() call becomes redundant. Remove it.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Cédric Le Goater 
---
 hw/vfio/common.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
560f4bc38499f7f4a3bc84ef7e4184fd6dc89935..a2d26cd08cb132d2b27c388bd75db3d9b8128407
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1036,7 +1036,8 @@ static void vfio_device_feature_dma_logging_start_destroy(
 g_free(feature);
 }
 
-static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer)
+static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
+  Error **errp)
 {
 struct vfio_device_feature *feature;
 VFIODirtyRanges ranges;
@@ -1058,8 +1059,8 @@ static int 
vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer)
 ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature);
 if (ret) {
 ret = -errno;
-error_report("%s: Failed to start DMA logging, err %d (%s)",
- vbasedev->name, ret, strerror(errno));
+error_setg(errp, "%s: Failed to start DMA logging, err %d (%s)",
+   vbasedev->name, ret, strerror(errno));
 goto out;
 }
 vbasedev->dirty_tracking = true;
@@ -1083,15 +1084,13 @@ static bool 
vfio_listener_log_global_start(MemoryListener *listener,
 int ret;
 
 if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
-ret = vfio_devices_dma_logging_start(bcontainer);
+ret = vfio_devices_dma_logging_start(bcontainer, errp);
 } else {
-ret = vfio_container_set_dirty_page_tracking(bcontainer, true, NULL);
+ret = vfio_container_set_dirty_page_tracking(bcontainer, true, errp);
 }
 
 if (ret) {
-error_report("vfio: Could not start dirty page tracking, err: %d (%s)",
- ret, strerror(-ret));
-vfio_set_migration_error(ret);
+error_prepend(errp, "vfio: Could not start dirty page tracking - ");
 }
 return !!ret;
 }
@@ -1106,13 +1105,11 @@ static bool 
vfio_listener_log_global_stop(MemoryListener *listener,
 if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
 vfio_devices_dma_logging_stop(bcontainer);
 } else {
-ret = vfio_container_set_dirty_page_tracking(bcontainer, false, NULL);
+ret = vfio_container_set_dirty_page_tracking(bcontainer, false, errp);
 }
 
 if (ret) {
-error_report("vfio: Could not stop dirty page tracking, err: %d (%s)",
- ret, strerror(-ret));
-vfio_set_migration_error(ret);
+error_prepend(errp, "vfio: Could not stop dirty page tracking - ");
 }
 return !!ret;
 }
-- 
2.43.2

Re: [PATCH 2/2] ppc/pnv: Fix pervasive topology calculation for P10


Hello Caleb,

On 2/27/24 15:48, Caleb Schlossin wrote:

Pervasive topology(PIR) calculation for core, thread ID was
wrong for big cores (SMT8). Fixing for P10.

Based on: <20240123195005.8965-1-cal...@linux.vnet.ibm.com>
Signed-off-by: Caleb Schlossin 


Since the initial patch [1] is not merged yet, you can simply send a v2
with the update. There is still some time before soft freeze [2].

The Subject of this patch [PATCH 2/2] seems to refer to a series. Is
there a patch 1/2 ?

Thanks,

C.



[1] https://lore.kernel.org/all/20240123195005.8965-1-cal...@linux.vnet.ibm.com/
[2] https://wiki.qemu.org/Planning/9.0


---
  hw/ppc/pnv.c | 15 +--
  1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 2f53883916..aa5aba60b4 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1068,12 +1068,23 @@ static uint32_t pnv_chip_pir_p9(PnvChip *chip, uint32_t 
core_id,
  }
  }
  
+/*

+ *0:48  Reserved - Read as zeroes
+ *   49:52  Node ID
+ *   53:55  Chip ID
+ *   56 Reserved - Read as zero
+ *   57:59  Quad ID
+ *   60 Core Chiplet Pair ID
+ *   61:63  Thread/Core Chiplet ID t0-t2
+ *
+ * We only care about the lower bits. uint32_t is fine for the moment.
+ */
  static uint32_t pnv_chip_pir_p10(PnvChip *chip, uint32_t core_id,
   uint32_t thread_id)
  {
  if (chip->nr_threads == 8) {
-return (chip->chip_id << 8) | ((thread_id & 1) << 2) | (core_id << 3) |
-   (thread_id >> 1);
+return (chip->chip_id << 8) | ((core_id / 4) << 4) |
+   ((core_id % 2) << 3) | thread_id;
  } else {
  return (chip->chip_id << 8) | (core_id << 2) | thread_id;
  }

[PULL 0/2] aspeed queue

The following changes since commit 1b330dafcdc34315f6837ff3af34dbb4b3106373:

  Merge tag 'edk2-stable202402-20240226-pull-request' of 
https://gitlab.com/kraxel/qemu into staging (2024-02-26 21:28:11 +)

are available in the Git repository at:

  https://github.com/legoater/qemu/ tags/pull-aspeed-20240227

for you to fetch changes up to db052d0eafe86c336d512dba99a1ec7c5c553f63:

  aspeed: fix hardcode boot address 0 (2024-02-27 13:47:05 +0100)


aspeed queue:

* Add support for UART0, in preparation of AST2700 models


Jamin Lin (2):
  aspeed: introduce a new UART0 device name
  aspeed: fix hardcode boot address 0

 include/hw/arm/aspeed_soc.h | 19 +--
 hw/arm/aspeed.c | 17 +++--
 hw/arm/aspeed_ast10x0.c |  1 +
 hw/arm/aspeed_ast2400.c |  6 --
 hw/arm/aspeed_ast2600.c |  3 ++-
 hw/arm/aspeed_soc_common.c  |  6 --
 6 files changed, 39 insertions(+), 13 deletions(-)

[PULL 1/2] aspeed: introduce a new UART0 device name

From: Jamin Lin 

The Aspeed datasheet refers to the UART controllers
as UART1 - UART13 for the ast10x0, ast2600, ast2500
and ast2400 SoCs and the Aspeed ast2700 introduces an UART0
and the UART controllers as UART0 - UART12.

To keep the naming in the QEMU models
in sync with the datasheet, let's introduce a new  UART0 device name
and do the required adjustements.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 
Reviewed-by: Cédric Le Goater 
[ clg: - Kept original assert() in aspeed_soc_uart_set_chr()
   - Fixed 'i' range in connect_serial_hds_to_uarts() loop ]
Signed-off-by: Cédric Le Goater 
---
 include/hw/arm/aspeed_soc.h | 17 +
 hw/arm/aspeed.c | 13 -
 hw/arm/aspeed_ast10x0.c |  1 +
 hw/arm/aspeed_ast2400.c |  2 ++
 hw/arm/aspeed_ast2600.c |  1 +
 hw/arm/aspeed_soc_common.c  |  6 --
 6 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h
index 9d0af84a8cff..e1a023be538b 100644
--- a/include/hw/arm/aspeed_soc.h
+++ b/include/hw/arm/aspeed_soc.h
@@ -140,6 +140,7 @@ struct AspeedSoCClass {
 int wdts_num;
 int macs_num;
 int uarts_num;
+int uarts_base;
 const int *irqmap;
 const hwaddr *memmap;
 uint32_t num_cpus;
@@ -151,6 +152,7 @@ const char *aspeed_soc_cpu_type(AspeedSoCClass *sc);
 enum {
 ASPEED_DEV_SPI_BOOT,
 ASPEED_DEV_IOMEM,
+ASPEED_DEV_UART0,
 ASPEED_DEV_UART1,
 ASPEED_DEV_UART2,
 ASPEED_DEV_UART3,
@@ -235,4 +237,19 @@ void aspeed_mmio_map_unimplemented(AspeedSoCState *s, 
SysBusDevice *dev,
 void aspeed_board_init_flashes(AspeedSMCState *s, const char *flashtype,
unsigned int count, int unit0);
 
+static inline int aspeed_uart_index(int uart_dev)
+{
+return uart_dev - ASPEED_DEV_UART0;
+}
+
+static inline int aspeed_uart_first(AspeedSoCClass *sc)
+{
+return aspeed_uart_index(sc->uarts_base);
+}
+
+static inline int aspeed_uart_last(AspeedSoCClass *sc)
+{
+return aspeed_uart_first(sc) + sc->uarts_num - 1;
+}
+
 #endif /* ASPEED_SOC_H */
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 09b1e823ba1c..0af96afa16a6 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -342,7 +342,7 @@ static void connect_serial_hds_to_uarts(AspeedMachineState 
*bmc)
 int uart_chosen = bmc->uart_chosen ? bmc->uart_chosen : amc->uart_default;
 
 aspeed_soc_uart_set_chr(s, uart_chosen, serial_hd(0));
-for (int i = 1, uart = ASPEED_DEV_UART1; i < sc->uarts_num; i++, uart++) {
+for (int i = 1, uart = sc->uarts_base; i < sc->uarts_num; i++, uart++) {
 if (uart == uart_chosen) {
 continue;
 }
@@ -1094,7 +1094,7 @@ static char *aspeed_get_bmc_console(Object *obj, Error 
**errp)
 AspeedMachineClass *amc = ASPEED_MACHINE_GET_CLASS(bmc);
 int uart_chosen = bmc->uart_chosen ? bmc->uart_chosen : amc->uart_default;
 
-return g_strdup_printf("uart%d", uart_chosen - ASPEED_DEV_UART1 + 1);
+return g_strdup_printf("uart%d", aspeed_uart_index(uart_chosen));
 }
 
 static void aspeed_set_bmc_console(Object *obj, const char *value, Error 
**errp)
@@ -1103,6 +1103,8 @@ static void aspeed_set_bmc_console(Object *obj, const 
char *value, Error **errp)
 AspeedMachineClass *amc = ASPEED_MACHINE_GET_CLASS(bmc);
 AspeedSoCClass *sc = ASPEED_SOC_CLASS(object_class_by_name(amc->soc_name));
 int val;
+int uart_first = aspeed_uart_first(sc);
+int uart_last = aspeed_uart_last(sc);
 
 if (sscanf(value, "uart%u", ) != 1) {
 error_setg(errp, "Bad value for \"uart\" property");
@@ -1110,11 +1112,12 @@ static void aspeed_set_bmc_console(Object *obj, const 
char *value, Error **errp)
 }
 
 /* The number of UART depends on the SoC */
-if (val < 1 || val > sc->uarts_num) {
-error_setg(errp, "\"uart\" should be in range [1 - %d]", 
sc->uarts_num);
+if (val < uart_first || val > uart_last) {
+error_setg(errp, "\"uart\" should be in range [%d - %d]",
+   uart_first, uart_last);
 return;
 }
-bmc->uart_chosen = ASPEED_DEV_UART1 + val - 1;
+bmc->uart_chosen = val + ASPEED_DEV_UART0;
 }
 
 static void aspeed_machine_class_props_init(ObjectClass *oc)
diff --git a/hw/arm/aspeed_ast10x0.c b/hw/arm/aspeed_ast10x0.c
index c3b5116a6a9d..2634e0f6544e 100644
--- a/hw/arm/aspeed_ast10x0.c
+++ b/hw/arm/aspeed_ast10x0.c
@@ -436,6 +436,7 @@ static void aspeed_soc_ast1030_class_init(ObjectClass 
*klass, void *data)
 sc->wdts_num = 4;
 sc->macs_num = 1;
 sc->uarts_num = 13;
+sc->uarts_base = ASPEED_DEV_UART1;
 sc->irqmap = aspeed_soc_ast1030_irqmap;
 sc->memmap = aspeed_soc_ast1030_memmap;
 sc->num_cpus = 1;
diff --git a/hw/arm/aspeed_ast2400.c b/hw/arm/aspeed_ast2400.c
index 8829561

[PULL 2/2] aspeed: fix hardcode boot address 0

From: Jamin Lin 

In the previous design of ASPEED SOCs QEMU model, it set the boot
address at "0" which was the hardcode setting for ast10x0, ast2600,
ast2500 and ast2400.

According to the design of ast2700, it has a bootmcu(riscv-32) which
is used for executing SPL and initialize DRAM and copy u-boot image
from SPI/Flash to DRAM at address 0x4 at SPL boot stage.
Then, CPUs(cortex-a35) execute u-boot, kernel and rofs.

Currently, qemu not support emulate two CPU architectures
at the same machine. Therefore, qemu will only support
to emulate CPU(cortex-a35) side for ast2700 and the boot
address is "0x4 ".

Fixed hardcode boot address "0" for future models using
a different mapping address.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 
Reviewed-by: Cédric Le Goater 
Signed-off-by: Cédric Le Goater 
---
 include/hw/arm/aspeed_soc.h | 2 --
 hw/arm/aspeed.c | 4 +++-
 hw/arm/aspeed_ast2400.c | 4 ++--
 hw/arm/aspeed_ast2600.c | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h
index e1a023be538b..c60fac900acb 100644
--- a/include/hw/arm/aspeed_soc.h
+++ b/include/hw/arm/aspeed_soc.h
@@ -224,8 +224,6 @@ enum {
 ASPEED_DEV_FSI2,
 };
 
-#define ASPEED_SOC_SPI_BOOT_ADDR 0x0
-
 qemu_irq aspeed_soc_get_irq(AspeedSoCState *s, int dev);
 bool aspeed_soc_uart_realize(AspeedSoCState *s, Error **errp);
 void aspeed_soc_uart_set_chr(AspeedSoCState *s, int dev, Chardev *chr);
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 0af96afa16a6..8854581ca8de 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -289,12 +289,14 @@ static void aspeed_install_boot_rom(AspeedMachineState 
*bmc, BlockBackend *blk,
 uint64_t rom_size)
 {
 AspeedSoCState *soc = bmc->soc;
+AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(soc);
 
 memory_region_init_rom(>boot_rom, NULL, "aspeed.boot_rom", rom_size,
_abort);
 memory_region_add_subregion_overlap(>spi_boot_container, 0,
 >boot_rom, 1);
-write_boot_rom(blk, ASPEED_SOC_SPI_BOOT_ADDR, rom_size, _abort);
+write_boot_rom(blk, sc->memmap[ASPEED_DEV_SPI_BOOT],
+   rom_size, _abort);
 }
 
 void aspeed_board_init_flashes(AspeedSMCState *s, const char *flashtype,
diff --git a/hw/arm/aspeed_ast2400.c b/hw/arm/aspeed_ast2400.c
index 95da85fee029..d12588620751 100644
--- a/hw/arm/aspeed_ast2400.c
+++ b/hw/arm/aspeed_ast2400.c
@@ -26,7 +26,7 @@
 #define ASPEED_SOC_IOMEM_SIZE   0x0020
 
 static const hwaddr aspeed_soc_ast2400_memmap[] = {
-[ASPEED_DEV_SPI_BOOT]  =  ASPEED_SOC_SPI_BOOT_ADDR,
+[ASPEED_DEV_SPI_BOOT]  = 0x,
 [ASPEED_DEV_IOMEM]  = 0x1E60,
 [ASPEED_DEV_FMC]= 0x1E62,
 [ASPEED_DEV_SPI1]   = 0x1E63,
@@ -61,7 +61,7 @@ static const hwaddr aspeed_soc_ast2400_memmap[] = {
 };
 
 static const hwaddr aspeed_soc_ast2500_memmap[] = {
-[ASPEED_DEV_SPI_BOOT]  = ASPEED_SOC_SPI_BOOT_ADDR,
+[ASPEED_DEV_SPI_BOOT]  = 0x,
 [ASPEED_DEV_IOMEM]  = 0x1E60,
 [ASPEED_DEV_FMC]= 0x1E62,
 [ASPEED_DEV_SPI1]   = 0x1E63,
diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c
index f74561ecdcd5..174be537709b 100644
--- a/hw/arm/aspeed_ast2600.c
+++ b/hw/arm/aspeed_ast2600.c
@@ -22,7 +22,7 @@
 #define ASPEED_SOC_DPMCU_SIZE   0x0004
 
 static const hwaddr aspeed_soc_ast2600_memmap[] = {
-[ASPEED_DEV_SPI_BOOT]  = ASPEED_SOC_SPI_BOOT_ADDR,
+[ASPEED_DEV_SPI_BOOT]  = 0x,
 [ASPEED_DEV_SRAM]  = 0x1000,
 [ASPEED_DEV_DPMCU] = 0x1800,
 /* 0x1600 0x17FF : AHB BUS do LPC Bus bridge */
-- 
2.43.2

Re: [PATCH V4 00/14] allow cpr-reboot for vfio

2024-02-26 Thread Cédric Le Goater


On 2/26/24 03:14, Peter Xu wrote:

On Thu, Feb 22, 2024 at 12:33:42PM -0500, Steven Sistare wrote:

Peter (and David if interested): these patches still need RB:
   migration: notifier error checking
   migration: stop vm for cpr
   migration: update cpr-reboot description
   migration: options incompatible with cpr


These all look fine to me.



Alex, these patches still need RB:
   vfio: register container for cpr
   vfio: allow cpr-reboot migration if suspended


I'll need to wait for comment from either Alex/Cedric on these.


Yes. It's on my list.


As I asked in the other thread, afaict crp-reboot keeps changing behavior,
maybe I can merge migration patches first, 


Go ahead. It will help me for the changes I am doing on error reporting
for VFIO migration. I will rebase on top.

then keep vfio patches separately merged / discussed?  


Sure.

I always see cpr-reboot mode experimental from this regard.  


This makes sense to me also.

Thanks,

C.




Please consider adding a patch to declare cpr-reboot
mode experimental if that matches your expectation, until all relevant
patches are merged, to make sure the ABI becomes stable.

Thanks,

Re: [RFC PATCH 14/14] migration: Fix return-path thread exit

2024-02-26 Thread Cédric Le Goater


On 2/23/24 15:05, Fabiano Rosas wrote:

Peter Xu  writes:


On Fri, Feb 16, 2024 at 02:35:26PM -0300, Fabiano Rosas wrote:

Cédric Le Goater  writes:


Hello Fabiano

On 2/14/24 21:35, Fabiano Rosas wrote:

Cédric Le Goater  writes:


Hello Fabiano

On 2/8/24 14:29, Fabiano Rosas wrote:

Cédric Le Goater  writes:


In case of error, close_return_path_on_source() can perform a shutdown
to exit the return-path thread.  However, in migrate_fd_cleanup(),
'to_dst_file' is closed before calling close_return_path_on_source()
and the shutdown fails, leaving the source and destination waiting for
an event to occur.


Hi, Cédric

Are you sure this is not caused by patch 13?


It happens with upstream QEMU without any patch.


I might have taken that "shutdown fails" in the commit message too
literaly. Anyway, I have a proposed solution:

-->8--
  From 729aa7b5b7f130f756d41649fdd0862bd2e90430 Mon Sep 17 00:00:00 2001
From: Fabiano Rosas 
Date: Wed, 14 Feb 2024 16:45:43 -0300
Subject: [PATCH] migration: Join the return path thread before releasing
   to_dst_file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The return path thread might hang at a blocking system call. Before
joining the thread we might need to issue a shutdown() on the socket
file descriptor to release it. To determine whether the shutdown() is
necessary we look at the QEMUFile error.

Make sure we only clean up the QEMUFile after the return path has been
waited for.


Yes. That's the important part.


This fixes a hang when qemu_savevm_state_setup() produced an error
that was detected by migration_detect_error(). That skips
migration_completion() so close_return_path_on_source() would get
stuck waiting for the RP thread to terminate.

At migrate_fd_cleanup() I'm keeping the relative order of joining the
migration thread and the return path just in case.


That doesn't look necessary.


Indeed. But I don't trust the migration code, it's full of undocumented
dependencies like that.


What was the reason to join the migration thread only when
s->to_dst_file is valid ?


I didn't find any explicit reason looking through the history. It seems
we used to rely on to_dst_file before migration_thread_running was
introduced.

I wouldn't mind keeping that 'if' there.

Let's see what Peter thinks about it.


Frankly I don't have a strong opinion on current patch 14 or the new
proposal, but it seems we reached a consensus.

Fabiano, would you repost with a formal patch, with the proper tags?


Yes, I'll post it soon.



One thing I am still not sure is whether we should still have patch 13
altogether? Please see my other reply on whether it's possible to have
migrate_get_error() == true but qemu_file_get_error() == false.


I'll include it then.


Thanks for taking over.

I have included :

 [PATCH] migration: Join the return path thread before releasing to_dst_file

in my series and dropped 13-14. I hope to send a follow up on :

  https://lore.kernel.org/qemu-devel/20240207133347.1115903-1-...@redhat.com/

before we reach soft freeze. It's growing quite a lot.

C.

[PATCH] MAINTAINERS: Remove myself as reviewer from PPC

2024-02-20 Thread Cédric Le Goater

PPC maintainership has been a side activity for the last 2 years and
it is time to let go some of it now that Nick has taken over.

Signed-off-by: Cédric Le Goater 
---
 MAINTAINERS | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index a74d73960c0a..f5a4e4745c92 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -316,7 +316,6 @@ F: tests/tcg/openrisc/
 PowerPC TCG CPUs
 M: Nicholas Piggin 
 M: Daniel Henrique Barboza 
-R: Cédric Le Goater 
 L: qemu-...@nongnu.org
 S: Odd Fixes
 F: target/ppc/
@@ -468,7 +467,6 @@ F: target/mips/sysemu/
 PPC KVM CPUs
 M: Nicholas Piggin 
 R: Daniel Henrique Barboza 
-R: Cédric Le Goater 
 S: Odd Fixes
 F: target/ppc/kvm.c
 
@@ -1502,7 +1500,6 @@ F: tests/avocado/ppc_prep_40p.py
 sPAPR (pseries)
 M: Nicholas Piggin 
 R: Daniel Henrique Barboza 
-R: Cédric Le Goater 
 R: David Gibson 
 R: Harsh Prateek Bora 
 L: qemu-...@nongnu.org
-- 
2.43.0

Re: Trying to write data to i2c bus

2024-02-19 Thread Cédric Le Goater


On 2/19/24 18:14, Corey Minyard wrote:

On Mon, Feb 19, 2024 at 04:53:47PM +, Paz Offer wrote:

Thank you very much Corey,

I am simulating an external module that wants to communicate with the board 
management controller (BMC).
The real device will be connected to the board using i2c bus, and could 
initiate communication at any time, by sending bytes over the bus.


And you have a simulated BMC that can do this?  Or is the system running
in qemu the BMC.



I am not sure whether the 'Master-side' (the side the initiating communication) 
needs to simulate a full i2c-master device, or whether my code could 'simply' 
write directly to the appropriate registers of the guest OS.
Are there some examples or documentation on how to implement something like 
this?


The aspeed i2c controller is capable of having another bus master on an
I2C but, but it is the only host that can currently do it.

It is doable, the code is ther for multiple bus masters, but there is no
device currently that does it.  I assume that is coming at some point,
but no documentation exists on how to do it.

You can look at the git commits in hw/i2c around 37fa5ca42623 "hw/i2c:
support multiple masters" for the changes that were done to support
this.



There is an i2c-echo device implementing an I2C slave  and a test,
test_arm_ast2600_evb_buildroot in tests/avocado/ machine_aspeed.py,
using it on the ast2600-evb.


Thanks,

C.

Re: [PULL 22/49] hw/pci-host/raven.c: Mark raven_io_ops as implementing unaligned accesses

2024-02-19 Thread Cédric Le Goater


On 2/19/24 15:55, Peter Maydell wrote:

On Mon, 19 Feb 2024 at 14:53, Cédric Le Goater  wrote:


On 2/19/24 15:49, BALATON Zoltan wrote:

On Mon, 19 Feb 2024, Nicholas Piggin wrote:

From: Peter Maydell 

The raven_io_ops MemoryRegionOps is the only one in the source tree
which sets .valid.unaligned to indicate that it should support
unaligned accesses and which does not also set .impl.unaligned to
indicate that its read and write functions can do the unaligned
handling themselves.  This is a problem, because at the moment the
core memory system does not implement the support for handling
unaligned accesses by doing a series of aligned accesses and
combining them (system/memory.c:access_with_adjusted_size() has a
TODO comment noting this).

Fortunately raven_io_read() and raven_io_write() will correctly deal
with the case of being passed an unaligned address, so we can fix the
missing unaligned access support by setting .impl.unaligned in the
MemoryRegionOps struct.

Fixes: 9a1839164c9c8f06 ("raven: Implement non-contiguous I/O region")
Reviewed-by: Cédric Le Goater 
Tested-by: Cédric Le Goater 
Signed-off-by: Peter Maydell 
Signed-off-by: Nicholas Piggin 


Hm, this seems to be missing the actual patch.


It's merged already and git knows how to handle this.


Mmm, though this is the result of "rebased onto a tree that
already had the commit" rather than "two merges both contain
the commit", so we end up with a genuinely empty commit upstream,
which is a bit odd looking, though harmless.


git rebase -i db5f7f9e3ceb and dropping the first patch would
cleanup the empty patch.

C.

Re: [PULL 22/49] hw/pci-host/raven.c: Mark raven_io_ops as implementing unaligned accesses

2024-02-19 Thread Cédric Le Goater


On 2/19/24 15:49, BALATON Zoltan wrote:

On Mon, 19 Feb 2024, Nicholas Piggin wrote:

From: Peter Maydell 

The raven_io_ops MemoryRegionOps is the only one in the source tree
which sets .valid.unaligned to indicate that it should support
unaligned accesses and which does not also set .impl.unaligned to
indicate that its read and write functions can do the unaligned
handling themselves.  This is a problem, because at the moment the
core memory system does not implement the support for handling
unaligned accesses by doing a series of aligned accesses and
combining them (system/memory.c:access_with_adjusted_size() has a
TODO comment noting this).

Fortunately raven_io_read() and raven_io_write() will correctly deal
with the case of being passed an unaligned address, so we can fix the
missing unaligned access support by setting .impl.unaligned in the
MemoryRegionOps struct.

Fixes: 9a1839164c9c8f06 ("raven: Implement non-contiguous I/O region")
Reviewed-by: Cédric Le Goater 
Tested-by: Cédric Le Goater 
Signed-off-by: Peter Maydell 
Signed-off-by: Nicholas Piggin 


Hm, this seems to be missing the actual patch.


It's merged already and git knows how to handle this.

Thanks,

C.

Re: [PATCH 13/14] migration: Use migrate_has_error() in close_return_path_on_source()


On 2/14/24 17:00, Fabiano Rosas wrote:

Cédric Le Goater  writes:


On 2/8/24 14:57, Fabiano Rosas wrote:

Cédric Le Goater  writes:


On 2/8/24 14:07, Fabiano Rosas wrote:

Cédric Le Goater  writes:


close_return_path_on_source() retrieves the migration error from the
the QEMUFile '->to_dst_file' to know if a shutdown is required. This
shutdown is required to exit the return-path thread. However, in
migrate_fd_cleanup(), '->to_dst_file' is cleaned up before calling
close_return_path_on_source() and the shutdown is never performed,
leaving the source and destination waiting for an event to occur.

Avoid relying on '->to_dst_file' and use migrate_has_error() instead.

Suggested-by: Peter Xu 
Signed-off-by: Cédric Le Goater 
---
migration/migration.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 
d5f705ceef4c925589aa49335969672c0d761fa2..5f55af3d7624750ca416c4177781241b3e291e5d
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2372,8 +2372,7 @@ static bool close_return_path_on_source(MigrationState 
*ms)
 * cause it to unblock if it's stuck waiting for the destination.
 */
WITH_QEMU_LOCK_GUARD(>qemu_file_lock) {
-if (ms->to_dst_file && ms->rp_state.from_dst_file &&
-qemu_file_get_error(ms->to_dst_file)) {
+if (migrate_has_error(ms) && ms->rp_state.from_dst_file) {
qemu_file_shutdown(ms->rp_state.from_dst_file);
}
}


Hm, maybe Peter can help defend this, but this assumes that every
function that takes an 'f' and sets the file error also sets
migrate_set_error(). I'm not sure we have determined that, have we?


How could we check all the code path ? I agree it is difficult when
looking at the code :/


It would help if the thing wasn't called 'f' for the most part of the
code to begin with.

Whenever there's a file error at to_dst_file there's the chance that the
rp_state.from_dst_file got stuck. So we cannot ignore the file error.

Would it work if we checked it earlier during cleanup as you did
previously and then set the migration error?


Do you mean doing something similar to what is done in
source_return_path_thread() ?

  if (qemu_file_get_error(s->to_dst_file)) {
  qemu_file_get_error_obj(s->to_dst_file, );
if (err) {
migrate_set_error(ms, err);
error_free(err);
...

Yes. That would be safer I think.


Yes, something like that.

I wish we could make that return path cleanup more deterministic, but
currently it's just: "if something hangs, call shutdown()". We don't
have a way to detect a hang, we just look at the file error and hope it
works.

A crucial aspect here is that calling qemu_file_shutdown() itself sets
the file error. So there's not even a guarantee that an error is
actually an error.




Nevertheless, I am struggling to understand how qemu_file_set_error()
and migrate_set_error() fit together. I was expecting some kind of
synchronization  routine but there isn't it seems. Are they completely
orthogonal ? when should we use these routines and when not ?


We're trying to phase out the QEMUFile usage altogether. One thing that
is getting in the way is this dependency on the qemu_file_*_error
functions.


OK. the other changes, which add an Error** argument to various handlers,
reduce the use of qemu_file_*_error routines in VFIO.


While we're not there yet, a good pattern is to find a
qemu_file_set|get_error() pair and replace it with
migrate_set|has_error(). 


OK. I will keep that in mind for the other changes.

Thanks,

C.




Unfortunately the return path does not fit in
this, because we don't have a matching qemu_file_set_error, it could be
anywhere. As I said above, we're using that error as a heuristic for: "a
recvmsg() might be hanging".



My initial goal was to modify some of the memory handlers (log_global*)
and migration handlers to propagate errors at the QMP level and them
report to the management layer. This is growing in something bigger
and currently, I don't find a good approach to the problem.

The last two patches of this series try to fix the return-path thread
termination. Let's keep that for after.


I'll try to figure that out. I see you provided a reproducer.



Thanks,

C.

Re: [RFC PATCH 14/14] migration: Fix return-path thread exit


Hello Fabiano

On 2/14/24 21:35, Fabiano Rosas wrote:

Cédric Le Goater  writes:


Hello Fabiano

On 2/8/24 14:29, Fabiano Rosas wrote:

Cédric Le Goater  writes:


In case of error, close_return_path_on_source() can perform a shutdown
to exit the return-path thread.  However, in migrate_fd_cleanup(),
'to_dst_file' is closed before calling close_return_path_on_source()
and the shutdown fails, leaving the source and destination waiting for
an event to occur.


Hi, Cédric

Are you sure this is not caused by patch 13?


It happens with upstream QEMU without any patch.


I might have taken that "shutdown fails" in the commit message too
literaly. Anyway, I have a proposed solution:

-->8--
 From 729aa7b5b7f130f756d41649fdd0862bd2e90430 Mon Sep 17 00:00:00 2001
From: Fabiano Rosas 
Date: Wed, 14 Feb 2024 16:45:43 -0300
Subject: [PATCH] migration: Join the return path thread before releasing
  to_dst_file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The return path thread might hang at a blocking system call. Before
joining the thread we might need to issue a shutdown() on the socket
file descriptor to release it. To determine whether the shutdown() is
necessary we look at the QEMUFile error.

Make sure we only clean up the QEMUFile after the return path has been
waited for.


Yes. That's the important part.


This fixes a hang when qemu_savevm_state_setup() produced an error
that was detected by migration_detect_error(). That skips
migration_completion() so close_return_path_on_source() would get
stuck waiting for the RP thread to terminate.

At migrate_fd_cleanup() I'm keeping the relative order of joining the
migration thread and the return path just in case.


That doesn't look necessary. What was the reason to join the migration
thread only when s->to_dst_file is valid ?



Reported-by: Cédric Le Goater 
Signed-off-by: Fabiano Rosas 


LGTM, it fixes the hang when an error is detected, the migration is
aborted and the VM resumes execution. FWIW,

Tested-by: Cédric Le Goater 

It requires more thorough testing though.

Thanks,

C.





---
  migration/migration.c | 36 
  1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index ab21de2cad..f0b70e8a9d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1326,17 +1326,19 @@ static void migrate_fd_cleanup(MigrationState *s)
  
  qemu_savevm_state_cleanup();
  
+bql_unlock();

+if (s->migration_thread_running) {
+qemu_thread_join(>thread);
+s->migration_thread_running = false;
+}
+bql_lock();
+
+close_return_path_on_source(s);
+
  if (s->to_dst_file) {
  QEMUFile *tmp;
  
  trace_migrate_fd_cleanup();

-bql_unlock();
-if (s->migration_thread_running) {
-qemu_thread_join(>thread);
-s->migration_thread_running = false;
-}
-bql_lock();
-
  multifd_send_shutdown();
  qemu_mutex_lock(>qemu_file_lock);
  tmp = s->to_dst_file;
@@ -1350,12 +1352,6 @@ static void migrate_fd_cleanup(MigrationState *s)
  qemu_fclose(tmp);
  }
  
-/*

- * We already cleaned up to_dst_file, so errors from the return
- * path might be due to that, ignore them.
- */
-close_return_path_on_source(s);
-
  assert(!migration_is_active(s));
  
  if (s->state == MIGRATION_STATUS_CANCELLING) {

@@ -2874,6 +2870,13 @@ static MigThrError postcopy_pause(MigrationState *s)
  while (true) {
  QEMUFile *file;
  
+/*

+ * We're already pausing, so ignore any errors on the return
+ * path and just wait for the thread to finish. It will be
+ * re-created when we resume.
+ */
+close_return_path_on_source(s);
+
  /*
   * Current channel is possibly broken. Release it.  Note that this is
   * guaranteed even without lock because to_dst_file should only be
@@ -2893,13 +2896,6 @@ static MigThrError postcopy_pause(MigrationState *s)
  qemu_file_shutdown(file);
  qemu_fclose(file);
  
-/*

- * We're already pausing, so ignore any errors on the return
- * path and just wait for the thread to finish. It will be
- * re-created when we resume.
- */
-close_return_path_on_source(s);
-
  migrate_set_state(>state, s->state,
MIGRATION_STATUS_POSTCOPY_PAUSED);

Re: [PATCH 11/14] vfio: Extend vfio_set_migration_error() with Error* argument


Hello Avihai,

On 2/12/24 10:35, Avihai Horon wrote:

Hi Cedric,

On 07/02/2024 15:33, Cédric Le Goater wrote:

External email: Use caution opening links or attachments


vfio_set_migration_error() sets the 'return' error on the migration
stream if a migration is in progress. To improve error reporting, add
a new Error* argument to also set the Error object on the migration
stream.

Signed-off-by: Cédric Le Goater 
---
  hw/vfio/common.c | 50 +---
  1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
82173b039c47150f5edd05d329192c5b9c8a9a0f..afe8b6bd294fd5904f394a5db48aae3fd718b14e
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -148,16 +148,18 @@ bool vfio_viommu_preset(VFIODevice *vbasedev)
  return vbasedev->bcontainer->space->as != _space_memory;
  }

-static void vfio_set_migration_error(int err)
+static void vfio_set_migration_error(int ret, Error *err)
  {
  MigrationState *ms = migrate_get_current();

  if (migration_is_setup_or_active(ms->state)) {
  WITH_QEMU_LOCK_GUARD(>qemu_file_lock) {
  if (ms->to_dst_file) {
-    qemu_file_set_error(ms->to_dst_file, err);
+    qemu_file_set_error_obj(ms->to_dst_file, ret, err);
  }
  }
+    } else {
+    error_report_err(err);
  }
  }

@@ -296,15 +298,17 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
  VFIOContainerBase *bcontainer = giommu->bcontainer;
  hwaddr iova = iotlb->iova + giommu->iommu_offset;
  void *vaddr;
+    Error *local_err = NULL;
  int ret;

  trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
  iova, iova + iotlb->addr_mask);

  if (iotlb->target_as != _space_memory) {
-    error_report("Wrong target AS \"%s\", only system memory is allowed",
- iotlb->target_as->name ? iotlb->target_as->name : "none");
-    vfio_set_migration_error(-EINVAL);
+    error_setg(_err,
+   "Wrong target AS \"%s\", only system memory is allowed",
+   iotlb->target_as->name ? iotlb->target_as->name : "none");
+    vfio_set_migration_error(-EINVAL, local_err);
  return;
  }

@@ -336,11 +340,12 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
  ret = vfio_container_dma_unmap(bcontainer, iova,
 iotlb->addr_mask + 1, iotlb);
  if (ret) {
-    error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%s)",
- bcontainer, iova,
- iotlb->addr_mask + 1, ret, strerror(-ret));
-    vfio_set_migration_error(ret);
+    error_setg(_err,
+   "vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+   "0x%"HWADDR_PRIx") = %d (%s)",
+   bcontainer, iova,
+   iotlb->addr_mask + 1, ret, strerror(-ret));
+    vfio_set_migration_error(ret, local_err);
  }
  }
  out:
@@ -1224,13 +1229,15 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier 
*n, IOMMUTLBEntry *iotlb)
  VFIOContainerBase *bcontainer = giommu->bcontainer;
  hwaddr iova = iotlb->iova + giommu->iommu_offset;
  ram_addr_t translated_addr;
+    Error *local_err = NULL;
  int ret = -EINVAL;

  trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);

  if (iotlb->target_as != _space_memory) {
-    error_report("Wrong target AS \"%s\", only system memory is allowed",
- iotlb->target_as->name ? iotlb->target_as->name : "none");
+    error_setg(_err,
+   "Wrong target AS \"%s\", only system memory is allowed",
+   iotlb->target_as->name ? iotlb->target_as->name : "none");
  goto out;
  }

@@ -1239,17 +1246,18 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier 
*n, IOMMUTLBEntry *iotlb)
  ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
  translated_addr);


If vfio_get_xlat_addr() above (it's not shown here) returns false, we will pass 
a NULL local_err to vfio_set_migration_error() and it may de-reference NULL ptr 
in error_report_err().


Ah yes. Thanks for spotting this.



Should we refactor vfio_get_xlat_addr() to get errp, 


I think we should add an Error** parameter to vfio_get_xlat_addr() and
memory_get_xlat_addr(). It shoul

Re: [PATCH 05/21] hw/ppc/pnv_bmc: Use qdev_new() instead of QOM API


On 2/16/24 12:02, Philippe Mathieu-Daudé wrote:

Prefer QDev API for QDev objects, avoid the underlying QOM layer.

Signed-off-by: Philippe Mathieu-Daudé 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/ppc/pnv_bmc.c | 10 +-
  1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c
index 99f1e8d7f9..0c1274df21 100644
--- a/hw/ppc/pnv_bmc.c
+++ b/hw/ppc/pnv_bmc.c
@@ -269,13 +269,13 @@ void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor)
   */
  IPMIBmc *pnv_bmc_create(PnvPnor *pnor)
  {
-Object *obj;
+DeviceState *dev;
  
-obj = object_new(TYPE_IPMI_BMC_SIMULATOR);

-qdev_realize(DEVICE(obj), NULL, _fatal);
-pnv_bmc_set_pnor(IPMI_BMC(obj), pnor);
+dev = qdev_new(TYPE_IPMI_BMC_SIMULATOR);
+qdev_realize(dev, NULL, _fatal);
+pnv_bmc_set_pnor(IPMI_BMC(dev), pnor);
  
-return IPMI_BMC(obj);

+return IPMI_BMC(dev);
  }
  
  typedef struct ForeachArgs {

Re: [PATCH 5/6] hw/vfio/common: Use RCU_READ macros


On 2/16/24 09:49, Philippe Mathieu-Daudé wrote:

On 24/1/24 15:09, Philippe Mathieu-Daudé wrote:

On 24/1/24 10:25, Manos Pitsidianakis wrote:

On Wed, 24 Jan 2024 09:42, Philippe Mathieu-Daudé  wrote:

Replace the manual rcu_read_(un)lock calls by the
*RCU_READ_LOCK_GUARD macros (See commit ef46ae67ba
"docs/style: call out the use of GUARD macros").

Signed-off-by: Philippe Mathieu-Daudé 
---
hw/vfio/common.c | 34 --
1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 4aa86f563c..09878a3603 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -308,13 +308,13 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
    return;
    }

-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();

    if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
    bool read_only;

    if (!vfio_get_xlat_addr(iotlb, , NULL, _only)) {
-    goto out;
+    return;


Since this is the only early return, we could alternatively do:

- if (!vfio_get_xlat_addr(iotlb, , NULL, _only)) {
+ if (vfio_get_xlat_addr(iotlb, , NULL, _only)) {

remove the goto/return, and wrap the rest of the codeflow in this if's 
brackets. And then we could use WITH_RCU_READ_LOCK_GUARD instead. That'd 
increase the code indentation however.


If the maintainer agrees with the style & code churn, I don't
mind respining.


Alex, Cédric, any preference?


my choice would be to keep the 'goto' statement and protect
the vfio_get_xlat_addr() call with :

+WITH_RCU_READ_LOCK_GUARD() {
+if (vfio_get_xlat_addr(iotlb, NULL, _addr, NULL)) {
+ret = vfio_get_dirty_bitmap(bcontainer, iova,
+iotlb->addr_mask + 1,
+translated_addr);
+if (ret) {
+error_report("vfio_iommu_map_dirty_notify(%p,"
+ " 0x%"HWADDR_PRIx
+ ", 0x%"HWADDR_PRIx") = %d (%s)",
+ bcontainer, iova, iotlb->addr_mask + 1, ret,
+ strerror(-ret));
+}
+}
 }



Thanks,

C.

Re: [PATCH v3 1/2] aspeed: introduce a new UART0 device name


On 2/15/24 08:59, Jamin Lin wrote:

The Aspeed datasheet refers to the UART controllers
as UART1 - UART13 for the ast10x0, ast2600, ast2500
and ast2400 SoCs and the Aspeed ast2700 introduces an UART0
and the UART controllers as UART0 - UART12.

To keep the naming in the QEMU models
in sync with the datasheet, let's introduce a new  UART0 device name
and do the required adjustements.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 


Reviewed-by: Cédric Le Goater 

One comment below,


---
  hw/arm/aspeed.c | 13 -
  hw/arm/aspeed_ast10x0.c |  1 +
  hw/arm/aspeed_ast2400.c |  2 ++
  hw/arm/aspeed_ast2600.c |  1 +
  hw/arm/aspeed_soc_common.c  | 10 ++
  include/hw/arm/aspeed_soc.h | 17 +
  6 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 09b1e823ba..aa165d583b 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -342,7 +342,7 @@ static void connect_serial_hds_to_uarts(AspeedMachineState 
*bmc)
  int uart_chosen = bmc->uart_chosen ? bmc->uart_chosen : amc->uart_default;
  
  aspeed_soc_uart_set_chr(s, uart_chosen, serial_hd(0));

-for (int i = 1, uart = ASPEED_DEV_UART1; i < sc->uarts_num; i++, uart++) {
+for (int i = 0, uart = sc->uarts_base; i < sc->uarts_num; i++, uart++) {
  if (uart == uart_chosen) {
  continue;
  }
@@ -1094,7 +1094,7 @@ static char *aspeed_get_bmc_console(Object *obj, Error 
**errp)
  AspeedMachineClass *amc = ASPEED_MACHINE_GET_CLASS(bmc);
  int uart_chosen = bmc->uart_chosen ? bmc->uart_chosen : amc->uart_default;
  
-return g_strdup_printf("uart%d", uart_chosen - ASPEED_DEV_UART1 + 1);

+return g_strdup_printf("uart%d", aspeed_uart_index(uart_chosen));
  }
  
  static void aspeed_set_bmc_console(Object *obj, const char *value, Error **errp)

@@ -1103,6 +1103,8 @@ static void aspeed_set_bmc_console(Object *obj, const 
char *value, Error **errp)
  AspeedMachineClass *amc = ASPEED_MACHINE_GET_CLASS(bmc);
  AspeedSoCClass *sc = 
ASPEED_SOC_CLASS(object_class_by_name(amc->soc_name));
  int val;
+int uart_first = aspeed_uart_first(sc);
+int uart_last = aspeed_uart_last(sc);
  
  if (sscanf(value, "uart%u", ) != 1) {

  error_setg(errp, "Bad value for \"uart\" property");
@@ -1110,11 +1112,12 @@ static void aspeed_set_bmc_console(Object *obj, const 
char *value, Error **errp)
  }
  
  /* The number of UART depends on the SoC */

-if (val < 1 || val > sc->uarts_num) {
-error_setg(errp, "\"uart\" should be in range [1 - %d]", 
sc->uarts_num);
+if (val < uart_first || val > uart_last) {
+error_setg(errp, "\"uart\" should be in range [%d - %d]",
+   uart_first, uart_last);
  return;
  }
-bmc->uart_chosen = ASPEED_DEV_UART1 + val - 1;
+bmc->uart_chosen = val + ASPEED_DEV_UART0;
  }
  
  static void aspeed_machine_class_props_init(ObjectClass *oc)

diff --git a/hw/arm/aspeed_ast10x0.c b/hw/arm/aspeed_ast10x0.c
index c3b5116a6a..2634e0f654 100644
--- a/hw/arm/aspeed_ast10x0.c
+++ b/hw/arm/aspeed_ast10x0.c
@@ -436,6 +436,7 @@ static void aspeed_soc_ast1030_class_init(ObjectClass 
*klass, void *data)
  sc->wdts_num = 4;
  sc->macs_num = 1;
  sc->uarts_num = 13;
+sc->uarts_base = ASPEED_DEV_UART1;
  sc->irqmap = aspeed_soc_ast1030_irqmap;
  sc->memmap = aspeed_soc_ast1030_memmap;
  sc->num_cpus = 1;
diff --git a/hw/arm/aspeed_ast2400.c b/hw/arm/aspeed_ast2400.c
index 8829561bb6..95da85fee0 100644
--- a/hw/arm/aspeed_ast2400.c
+++ b/hw/arm/aspeed_ast2400.c
@@ -523,6 +523,7 @@ static void aspeed_soc_ast2400_class_init(ObjectClass *oc, 
void *data)
  sc->wdts_num = 2;
  sc->macs_num = 2;
  sc->uarts_num= 5;
+sc->uarts_base   = ASPEED_DEV_UART1;
  sc->irqmap   = aspeed_soc_ast2400_irqmap;
  sc->memmap   = aspeed_soc_ast2400_memmap;
  sc->num_cpus = 1;
@@ -551,6 +552,7 @@ static void aspeed_soc_ast2500_class_init(ObjectClass *oc, 
void *data)
  sc->wdts_num = 3;
  sc->macs_num = 2;
  sc->uarts_num= 5;
+sc->uarts_base   = ASPEED_DEV_UART1;
  sc->irqmap   = aspeed_soc_ast2500_irqmap;
  sc->memmap   = aspeed_soc_ast2500_memmap;
  sc->num_cpus = 1;
diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c
index 4ee32ea99d..f74561ecdc 100644
--- a/hw/arm/aspeed_ast2600.c
+++ b/hw/arm/aspeed_ast2600.c
@@ -666,6 +666,7 @@ static void aspeed_soc_ast2600_class_init(ObjectClass *oc, 
void *data)
  sc->wdts_num = 4;
  sc->macs_num = 4;
  sc->uarts_num= 13;
+sc->uarts_base   = ASPEED_DEV_UART1;
  sc->irqmap   = aspeed_soc_ast2600_irqmap;

Re: [PATCH v3 2/2] aspeed: fix hardcode boot address 0


On 2/15/24 08:59, Jamin Lin wrote:

In the previous design of ASPEED SOCs QEMU model, it set the boot
address at "0" which was the hardcode setting for ast10x0, ast2600,
ast2500 and ast2400.

According to the design of ast2700, it has a bootmcu(riscv-32) which
is used for executing SPL and initialize DRAM and copy u-boot image
from SPI/Flash to DRAM at address 0x4 at SPL boot stage.
Then, CPUs(cortex-a35) execute u-boot, kernel and rofs.

Currently, qemu not support emulate two CPU architectures
at the same machine. Therefore, qemu will only support
to emulate CPU(cortex-a35) side for ast2700 and the boot
address is "0x4 ".

Fixed hardcode boot address "0" for future models using
a different mapping address.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/arm/aspeed.c | 4 +++-
  hw/arm/aspeed_ast2400.c | 4 ++--
  hw/arm/aspeed_ast2600.c | 2 +-
  include/hw/arm/aspeed_soc.h | 2 --
  4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index aa165d583b..9fec245e4e 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -289,12 +289,14 @@ static void aspeed_install_boot_rom(AspeedMachineState 
*bmc, BlockBackend *blk,
  uint64_t rom_size)
  {
  AspeedSoCState *soc = bmc->soc;
+AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(soc);
  
  memory_region_init_rom(>boot_rom, NULL, "aspeed.boot_rom", rom_size,

 _abort);
  memory_region_add_subregion_overlap(>spi_boot_container, 0,
  >boot_rom, 1);
-write_boot_rom(blk, ASPEED_SOC_SPI_BOOT_ADDR, rom_size, _abort);
+write_boot_rom(blk, sc->memmap[ASPEED_DEV_SPI_BOOT],
+   rom_size, _abort);
  }
  
  void aspeed_board_init_flashes(AspeedSMCState *s, const char *flashtype,

diff --git a/hw/arm/aspeed_ast2400.c b/hw/arm/aspeed_ast2400.c
index 95da85fee0..d125886207 100644
--- a/hw/arm/aspeed_ast2400.c
+++ b/hw/arm/aspeed_ast2400.c
@@ -26,7 +26,7 @@
  #define ASPEED_SOC_IOMEM_SIZE   0x0020
  
  static const hwaddr aspeed_soc_ast2400_memmap[] = {

-[ASPEED_DEV_SPI_BOOT]  =  ASPEED_SOC_SPI_BOOT_ADDR,
+[ASPEED_DEV_SPI_BOOT]  = 0x,
  [ASPEED_DEV_IOMEM]  = 0x1E60,
  [ASPEED_DEV_FMC]= 0x1E62,
  [ASPEED_DEV_SPI1]   = 0x1E63,
@@ -61,7 +61,7 @@ static const hwaddr aspeed_soc_ast2400_memmap[] = {
  };
  
  static const hwaddr aspeed_soc_ast2500_memmap[] = {

-[ASPEED_DEV_SPI_BOOT]  = ASPEED_SOC_SPI_BOOT_ADDR,
+[ASPEED_DEV_SPI_BOOT]  = 0x,
  [ASPEED_DEV_IOMEM]  = 0x1E60,
  [ASPEED_DEV_FMC]= 0x1E62,
  [ASPEED_DEV_SPI1]   = 0x1E63,
diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c
index f74561ecdc..174be53770 100644
--- a/hw/arm/aspeed_ast2600.c
+++ b/hw/arm/aspeed_ast2600.c
@@ -22,7 +22,7 @@
  #define ASPEED_SOC_DPMCU_SIZE   0x0004
  
  static const hwaddr aspeed_soc_ast2600_memmap[] = {

-[ASPEED_DEV_SPI_BOOT]  = ASPEED_SOC_SPI_BOOT_ADDR,
+[ASPEED_DEV_SPI_BOOT]  = 0x,
  [ASPEED_DEV_SRAM]  = 0x1000,
  [ASPEED_DEV_DPMCU] = 0x1800,
  /* 0x1600 0x17FF : AHB BUS do LPC Bus bridge */
diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h
index e1a023be53..c60fac900a 100644
--- a/include/hw/arm/aspeed_soc.h
+++ b/include/hw/arm/aspeed_soc.h
@@ -224,8 +224,6 @@ enum {
  ASPEED_DEV_FSI2,
  };
  
-#define ASPEED_SOC_SPI_BOOT_ADDR 0x0

-
  qemu_irq aspeed_soc_get_irq(AspeedSoCState *s, int dev);
  bool aspeed_soc_uart_realize(AspeedSoCState *s, Error **errp);
  void aspeed_soc_uart_set_chr(AspeedSoCState *s, int dev, Chardev *chr);

Re: [PATCH RFCv2 0/8] vfio/iommufd: IOMMUFD Dirty Tracking

2024-02-14 Thread Cédric Le Goater

Hello Joao,

On 2/13/24 12:59, Joao Martins wrote:

On 12/02/2024 13:56, Joao Martins wrote:

This small series adds support for Dirty Tracking in IOMMUFD backend.
The sole reason I still made it RFC is because of the second patch,
where we are implementing user-managed auto domains.

In essence it is quite similar to the original IOMMUFD series where we
would allocate a HWPT, until we switched later on into a IOAS attach.
Patch 2 goes into more detail, but the gist is that there's two modes of
using IOMMUFD and by keep using kernel managed auto domains we would end
up duplicating the same flags we have in HWPT but into the VFIO IOAS
attach. While it is true that just adding a flag is simpler, it also
creates duplication and motivates duplicate what hwpt-alloc already has.
But there's a chance I have the wrong expectation here, so any feedback
welcome.

The series is divided into:

* Patch 1: Adds a simple helper to get device capabilities;

* Patches 2 - 5: IOMMUFD backend support for dirty tracking;

The workflow is relatively simple:

1) Probe device and allow dirty tracking in the HWPT
2) Toggling dirty tracking on/off
3) Read-and-clear of Dirty IOVAs

The heuristics selected for (1) were to enable it *if* device supports
migration but doesn't support VF dirty tracking or IOMMU dirty tracking
is supported. The latter is for the hotplug case where we can add a device
without a tracker and thus still support migration.

The unmap case is deferred until further vIOMMU support with migration
is added[3] which will then introduce the usage of
IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR in GET_DIRTY_BITMAP ioctl in the
dma unmap bitmap flow.

* Patches 6-8: Add disabling of hugepages to allow tracking at base
page; avoid blocking live migration where there's no VF dirty
tracker, considering that we have IOMMU dirty tracking. And allow
disabling VF dirty tracker via qemu command line.

This series builds on top of Zhengzhong series[0], but only requires the
first 9 patches i.e. up to ("vfio/pci: Initialize host iommu device
instance after attachment")[1] that are more generic IOMMUFD device
plumbing, and doesn't require the nesting counterpart.

I need to add that this series doesn't *need* to be based on Zhengzhong series.
Though given that he is consolidating how an IOMMUFD device info is represented
it felt the correct thing to do. For dirty tracking we mainly need the
dev_id/iommufd available when we are going to attach, that's it.

I've pushed this series version that doesn't have such dependency, let me know
if you want me to pursue this version instead going forward:

https://github.com/jpemartins/qemu/commits/iommufd-v5.nodeps

I feel I have lost track of all the different patchsets.

To recap, there is yours :

* vfio/iommufd: IOMMUFD Dirty Tracking

https://lore.kernel.org/qemu-devel/20240212135643.5858-1-joao.m.mart...@oracle.com/

Zhengzhong's :

* [PATCH rfcv2 00/18] Check and sync host IOMMU cap/ecap with vIOMMU

https://lore.kernel.org/qemu-devel/20240201072818.327930-1-zhenzhong.d...@intel.com/

Eric's :

* [RFC 0/7] VIRTIO-IOMMU/VFIO: Fix host iommu geometry handling for hotplugged
devices

https://lore.kernel.org/qemu-devel/20240117080414.316890-1-eric.au...@redhat.com/

Steve's:

* [PATCH V3 00/13] allow cpr-reboot for vfio

https://lore.kernel.org/qemu-devel/1707418446-134863-1-git-send-email-steven.sist...@oracle.com/

Mine, which should be an RFC :

* [PATCH 00/14] migration: Improve error reporting
https://lore.kernel.org/qemu-devel/20240207133347.1115903-1-...@redhat.com/

Anything else ?

Thanks,

Re: [PATCH v4 4/4] qemu-options.hx: Add an entry for virtio-iommu-pci and document aw-bits

2024-02-13 Thread Cédric Le Goater


On 2/13/24 19:28, Eric Auger wrote:

We are missing an entry for the virtio-iommu-pci device. Add the
information on which machine it is currently supported and document
the new aw-bits option.

Signed-off-by: Eric Auger 
---
  qemu-options.hx | 8 
  1 file changed, 8 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index 8547254dbf..6a8c970640 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1172,6 +1172,14 @@ SRST
  Please also refer to the wiki page for general scenarios of VT-d
  emulation in QEMU: https://wiki.qemu.org/Features/VT-d.
  
+``-device virtio-iommu-pci[,option=...]``

+This is only supported by ``-machine q35`` and ``-machine virt``.
+It supports below options:
+
+``aw-bits=val`` (val between 32 and 64, default depends on machine)
+This decides the address width of IOVA address space. With
+q35 it defaults to 39 bits. On arm virt it defaults to 48 bits.



Minor improvement :

 "It defaults to 39 bits on q35 machines and 48 bits on ARM virt machines."

Anyhow,

Reviewed-by: Cédric Le Goater 

Thanks,

C.

Re: [PATCH] target/ppc: Fix lxv/stxv MSR facility check

2024-02-13 Thread Cédric Le Goater


On 2/13/24 09:39, Nicholas Piggin wrote:

The move to decodetree flipped the inequality test for the VEC / VSX
MSR facility check.

This caused application crashes under Linux, where these facility
unavailable interrupts are used for lazy-switching of VEC/VSX register
sets. Getting the incorrect interrupt would result in wrong registers
being loaded, potentially overwriting live values and/or exposing
stale ones.

Cc: qemu-sta...@nongnu.org
Reported-by: Joel Stanley 
Fixes: 70426b5bb738 ("target/ppc: moved stxvx and lxvx from legacy to 
decodtree")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1769
Tested-by: Harsh Prateek Bora 
Signed-off-by: Nicholas Piggin 


Reviewed-by: Cédric Le Goater 
Tested-by: Cédric Le Goater 

with a RHEL9 image.

Thanks,

C.



---
  target/ppc/translate/vsx-impl.c.inc | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/translate/vsx-impl.c.inc 
b/target/ppc/translate/vsx-impl.c.inc
index 6db87ab336..0266f09119 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -2268,7 +2268,7 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv 
displ,
  
  static bool do_lstxv_D(DisasContext *ctx, arg_D *a, bool store, bool paired)

  {
-if (paired || a->rt >= 32) {
+if (paired || a->rt < 32) {
  REQUIRE_VSX(ctx);
  } else {
  REQUIRE_VECTOR(ctx);

Re: [PATCH 4/4] hw/arm/stellaris: Add missing QOM 'SoC' parent

2024-02-13 Thread Cédric Le Goater


On 2/13/24 16:36, Philippe Mathieu-Daudé wrote:

On 1/2/24 17:46, Peter Maydell wrote:

On Tue, 30 Jan 2024 at 19:03, Philippe Mathieu-Daudé  wrote:


QDev objects created with qdev_new() need to manually add
their parent relationship with object_property_add_child().

Since we don't model the SoC, just use a QOM container.

Signed-off-by: Philippe Mathieu-Daudé 
---


Ah, this is where the other qdev_new() calls are sorted.

Reviewed-by: Peter Maydell 

I wonder if we should add a variant on qdev_new() that
you can pass in the parent object to?


Yes, this is what we discussed with Markus. In order to
stop using the "/unattached" container from pre-QOM,
qdev_new() must take a QOM parent. I tried to do it but hit
some problem with some odd use in PPC or S390 (discussed
with Cédric so likely PPC, I need to go back to it).


Can you remind what this was about ?


Thanks,

C.

Re: [PATCH 08/14] vfio: Use new Error** argument in vfio_save_setup()


On 2/12/24 10:17, Avihai Horon wrote:

Hi Cedric,

On 07/02/2024 15:33, Cédric Le Goater wrote:

External email: Use caution opening links or attachments


Add an Error** argument to vfio_migration_set_state() and adjust
callers, including vfio_save_setup(). The error will be propagated up
to qemu_savevm_state_setup() where the save_setup() handler is
executed.

Signed-off-by: Cédric Le Goater 
---
  hw/vfio/migration.c | 62 +
  1 file changed, 40 insertions(+), 22 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
2dfbe671f6f45aa530c7341177bb532d8292cecd..2e0a79967cc97f44d9be5575c3cfe18c9f349dab
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -84,7 +84,8 @@ static const char *mig_state_to_str(enum 
vfio_device_mig_state state)

  static int vfio_migration_set_state(VFIODevice *vbasedev,
  enum vfio_device_mig_state new_state,
-    enum vfio_device_mig_state recover_state)
+    enum vfio_device_mig_state recover_state,
+    Error **errp)
  {
  VFIOMigration *migration = vbasedev->migration;
  uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
@@ -104,15 +105,15 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
  ret = -errno;

  if (recover_state == VFIO_DEVICE_STATE_ERROR) {
-    error_report("%s: Failed setting device state to %s, err: %s. "
- "Recover state is ERROR. Resetting device",
- vbasedev->name, mig_state_to_str(new_state),
- strerror(errno));
+    error_setg(errp, "%s: Failed setting device state to %s, err: %s. "
+   "Recover state is ERROR. Resetting device",
+   vbasedev->name, mig_state_to_str(new_state),
+   strerror(errno));

  goto reset_device;
  }

-    error_report(
+    error_setg(errp,
  "%s: Failed setting device state to %s, err: %s. Setting device in 
recover state %s",
   vbasedev->name, mig_state_to_str(new_state),
   strerror(errno), mig_state_to_str(recover_state));
@@ -120,7 +121,7 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
  mig_state->device_state = recover_state;
  if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
  ret = -errno;
-    error_report(
+    error_setg(errp,
  "%s: Failed setting device in recover state, err: %s. Resetting 
device",
   vbasedev->name, strerror(errno));


I think here we will assert because errp is already set.

Adding an error_append() API would be useful here I guess.


yes.


Otherwise, we need to move the first error_setg() below, to before we return 
from a successful recover state change, and construct the error message 
differently (e.g., provide a full error message for the recover state fail case 
containing also the first error).

Do you have other ideas?


Errors for :

if (ioctl(vbasedev->fd, VFIO_DEVICE_RESET)) {

should be treated as the others with and error_append() and not
hw_error(). This needs a rework before any new changes.

I also wonder why we have twice :

migration->device_state = recover_state;

It looks redundant. The ioctl VFIO_DEVICE_FEATURE should leave the
state unmodified.

Thanks,

C.

Re: [PATCH 04/14] migration: Modify ram_init_bitmaps() to report dirty tracking errors


On 2/12/24 09:51, Avihai Horon wrote:

Hi Cedric,

On 07/02/2024 15:33, Cédric Le Goater wrote:

External email: Use caution opening links or attachments


The .save_setup() handler has now an Error** argument that we can use
to propagate errors reported by the .log_global_start() handler. Do
that for the RAM. qemu_savevm_state_setup() will store the error under
the migration stream for later detection in the migration sequence.

Signed-off-by: Cédric Le Goater 
---
  migration/ram.c | 19 ++-
  1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 
d86626bb1c704b2d3497b323a702ca6ca8939a79..b87245466bb46937fd0358d0c66432bcc6280018
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2802,19 +2802,17 @@ static void 
migration_bitmap_clear_discarded_pages(RAMState *rs)
  }
  }

-static void ram_init_bitmaps(RAMState *rs)
+static void ram_init_bitmaps(RAMState *rs, Error **errp)
  {
-    Error *local_err = NULL;
-
  qemu_mutex_lock_ramlist();

  WITH_RCU_READ_LOCK_GUARD() {
  ram_list_init_bitmaps();
  /* We don't use dirty log with background snapshots */
  if (!migrate_background_snapshot()) {
-    memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, _err);
-    if (local_err) {
-    error_report_err(local_err);
+    memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
+    if (*errp) {


I think we should use ERRP_GUARD() or a local error here and also below at 
ram_init_bitmaps() (or return bool like Philippe suggested).


yes. I will rework that part.

Thanks,

C.




Thanks.


+    break;
  }
  migration_bitmap_sync_precopy(rs, false);
  }
@@ -2828,7 +2826,7 @@ static void ram_init_bitmaps(RAMState *rs)
  migration_bitmap_clear_discarded_pages(rs);
  }

-static int ram_init_all(RAMState **rsp)
+static int ram_init_all(RAMState **rsp, Error **errp)
  {
  if (ram_state_init(rsp)) {
  return -1;
@@ -2839,7 +2837,10 @@ static int ram_init_all(RAMState **rsp)
  return -1;
  }

-    ram_init_bitmaps(*rsp);
+    ram_init_bitmaps(*rsp, errp);
+    if (*errp) {
+    return -1;
+    }

  return 0;
  }
@@ -2952,7 +2953,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque, 
Error **errp)

  /* migration has already setup the bitmap, reuse it. */
  if (!migration_in_colo_state()) {
-    if (ram_init_all(rsp) != 0) {
+    if (ram_init_all(rsp, errp) != 0) {
  compress_threads_save_cleanup();
  return -1;
  }
--
2.43.0

Re: [PATCH 03/14] memory: Add Error** argument to .log_global*() handlers


On 2/12/24 09:43, Avihai Horon wrote:

Hi Cedric,

On 09/02/2024 12:14, Cédric Le Goater wrote:

External email: Use caution opening links or attachments


On 2/8/24 06:48, Peter Xu wrote:

On Wed, Feb 07, 2024 at 02:33:36PM +0100, Cédric Le Goater wrote:

@@ -2936,14 +2940,14 @@ void memory_global_dirty_log_start(unsigned int flags)
  trace_global_dirty_changed(global_dirty_tracking);

  if (!old_flags) {
-    MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward);
+    MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward, errp);
  memory_region_transaction_begin();
  memory_region_update_pending = true;
  memory_region_transaction_commit();
  }
  }

-static void memory_global_dirty_log_do_stop(unsigned int flags)
+static void memory_global_dirty_log_do_stop(unsigned int flags, Error **errp)
  {
  assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
  assert((global_dirty_tracking & flags) == flags);
@@ -2955,7 +2959,7 @@ static void memory_global_dirty_log_do_stop(unsigned int 
flags)
  memory_region_transaction_begin();
  memory_region_update_pending = true;
  memory_region_transaction_commit();
-    MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse);
+    MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse, errp);
  }
  }


I'm a little bit surprised to see that MEMORY_LISTENER_CALL_GLOBAL()
already allows >2 args, with the ability to conditionally pass over errp
with such oneliner change; even if all callers were only using 2 args
before this patch..

yes. The proposal takes the easy path.

Should we change all memory listener global handlers :

  begin
  commit
  log_global_after_sync
  log_global_start
  log_global_stop

to take an extra Error **errp argument ?

I think we should distinguish begin + commit handlers from the log_global_*
with a new macro. In which case, we could also change the handler to return
a bool and fail at the first error in MEMORY_LISTENER_CALL_GLOBAL(...).


I think we must fail at first error in any case. Otherwise, if two handlers 
error and call error_setg() with errp, the second handler will assert IIUC.


Good point. I will respin with a new MEMORY_LISTENER_CALL_GLOBAL_ERR macro
exiting the loop at first error.

Thanks,

C.

Re: [RFC PATCH 14/14] migration: Fix return-path thread exit


Hello Peter

On 2/8/24 06:57, Peter Xu wrote:

On Wed, Feb 07, 2024 at 02:33:47PM +0100, Cédric Le Goater wrote:

In case of error, close_return_path_on_source() can perform a shutdown
to exit the return-path thread.  However, in migrate_fd_cleanup(),
'to_dst_file' is closed before calling close_return_path_on_source()
and the shutdown fails, leaving the source and destination waiting for
an event to occur.

Close the file after calling close_return_path_on_source() so that the
shutdown succeeds and the return-path thread exits.

Signed-off-by: Cédric Le Goater 
---

  This is an RFC because the correct fix implies reworking the QEMUFile
  construct, built on top of the QEMU I/O channel.

  migration/migration.c | 13 ++---
  1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 
5f55af3d7624750ca416c4177781241b3e291e5d..de329f2c553288935d824748286e79e535929b8b
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1313,6 +1313,8 @@ void migrate_set_state(int *state, int old_state, int 
new_state)
  
  static void migrate_fd_cleanup(MigrationState *s)

  {
+QEMUFile *tmp = NULL;
+
  g_free(s->hostname);
  s->hostname = NULL;
  json_writer_free(s->vmdesc);
@@ -1321,8 +1323,6 @@ static void migrate_fd_cleanup(MigrationState *s)
  qemu_savevm_state_cleanup();
  
  if (s->to_dst_file) {

-QEMUFile *tmp;
-
  trace_migrate_fd_cleanup();
  bql_unlock();
  if (s->migration_thread_running) {
@@ -1341,15 +1341,14 @@ static void migrate_fd_cleanup(MigrationState *s)
   * critical section won't block for long.
   */
  migration_ioc_unregister_yank_from_file(tmp);
-qemu_fclose(tmp);
  }
  
-/*

- * We already cleaned up to_dst_file, so errors from the return
- * path might be due to that, ignore them.
- */
  close_return_path_on_source(s);
  
+if (tmp) {

+qemu_fclose(tmp);
+}
+
  assert(!migration_is_active(s));
  
  if (s->state == MIGRATION_STATUS_CANCELLING) {


I think this is okay to me for a short term plan.  I'll see how others
think, also add Dan into the loop.

If so, would you please add a rich comment explaining why tmp needs to be
closed later?  Especially, explicit comment on the ordering requirement
would be helpful: IMHO here it's an order that qemu_fclose() must happen
after close_return_path_on_source().  So when others work on this code we
don't easily break it without noticing.


Sure. I will when we have clarified with Fabiano what is the best
approach.


Also please feel free to post separately on migration patches if you'd like
us to merge the patches when repost.


This series is a collection of multiple (related) changes :

* extra Error** parameter to save_setup() migration handlers.
  This change has consequences on the various callers which are not
  fully analyzed.
* similar changes for memory logging handlers. These looks more self
  contained and I will see if I can send then separately.
* return-path thread termination

and then, in background we have open questions regarding :

* the QEMUfile implementation and its QIOChannel usage for migration
  streams
* qemu_file_set_error* vs. migrate_set_error. It is confusing, at least
  for me. Do we have some documentation on best practices ?

Thanks,

C.

Re: [RFC PATCH 14/14] migration: Fix return-path thread exit


Hello Fabiano

On 2/8/24 14:29, Fabiano Rosas wrote:

Cédric Le Goater  writes:


In case of error, close_return_path_on_source() can perform a shutdown
to exit the return-path thread.  However, in migrate_fd_cleanup(),
'to_dst_file' is closed before calling close_return_path_on_source()
and the shutdown fails, leaving the source and destination waiting for
an event to occur.


Hi, Cédric

Are you sure this is not caused by patch 13? 


It happens with upstream QEMU without any patch.

When vfio_listener_log_global_start() fails, it sets an error on the
QEMUFile. To reproduce without a VFIO device, you can inject an error
when dirty tracking is started. Something like below,

@@ -2817,6 +2817,8 @@ static void ram_init_bitmaps(RAMState *r
  * containing all 1s to exclude any discarded pages from migration.
  */
 migration_bitmap_clear_discarded_pages(rs);
+
+qemu_file_set_error(migrate_get_current()->to_dst_file, -EAGAIN);
 }
 
 static int ram_init_all(RAMState **rsp)


Activate return-path and migrate.


That 'if (ms->to_dst_file'
was there to avoid this sort of thing happening.

Is there some reordering possibility that I'm not spotting in the code
below? I think the data dependency on to_dst_file shouldn't allow it.

migrate_fd_cleanup:
 qemu_mutex_lock(>qemu_file_lock);
 tmp = s->to_dst_file;
 s->to_dst_file = NULL;
 qemu_mutex_unlock(>qemu_file_lock);
 ...
 qemu_fclose(tmp);

close_return_path_on_source:
 WITH_QEMU_LOCK_GUARD(>qemu_file_lock) {
 if (ms->to_dst_file && ms->rp_state.from_dst_file &&
 qemu_file_get_error(ms->to_dst_file)) {
 qemu_file_shutdown(ms->rp_state.from_dst_file);
 }
 }


close_return_path_on_source() is called by migrate_fd_cleanup() in
the same thread. So, when we reach the locking section ms->to_dst_file
is already NULL and qemu_fclose() has been closed :/

May be I misunderstood. Please try to reproduce with the little hack
above.

Thanks,

C.


I'm thinking maybe the culprit is the close_return_path_on_source() at
migration_completion(). It might be possible for it to race with the
migrate_fd_cleanup_bh from migration_iteration_finish().

If that's the case, then I think that one possible fix would be to hold
the BQL at migration_completion() so the BH doesn't get dispatched until
we properly close the return path.



Close the file after calling close_return_path_on_source() so that the
shutdown succeeds and the return-path thread exits.

Signed-off-by: Cédric Le Goater 
---

  This is an RFC because the correct fix implies reworking the QEMUFile
  construct, built on top of the QEMU I/O channel.

  migration/migration.c | 13 ++---
  1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 
5f55af3d7624750ca416c4177781241b3e291e5d..de329f2c553288935d824748286e79e535929b8b
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1313,6 +1313,8 @@ void migrate_set_state(int *state, int old_state, int 
new_state)
  
  static void migrate_fd_cleanup(MigrationState *s)

  {
+QEMUFile *tmp = NULL;
+
  g_free(s->hostname);
  s->hostname = NULL;
  json_writer_free(s->vmdesc);
@@ -1321,8 +1323,6 @@ static void migrate_fd_cleanup(MigrationState *s)
  qemu_savevm_state_cleanup();
  
  if (s->to_dst_file) {

-QEMUFile *tmp;
-
  trace_migrate_fd_cleanup();
  bql_unlock();
  if (s->migration_thread_running) {
@@ -1341,15 +1341,14 @@ static void migrate_fd_cleanup(MigrationState *s)
   * critical section won't block for long.
   */
  migration_ioc_unregister_yank_from_file(tmp);
-qemu_fclose(tmp);
  }
  
-/*

- * We already cleaned up to_dst_file, so errors from the return
- * path might be due to that, ignore them.
- */
  close_return_path_on_source(s);
  
+if (tmp) {

+qemu_fclose(tmp);
+}
+
  assert(!migration_is_active(s));
  
  if (s->state == MIGRATION_STATUS_CANCELLING) {

Re: [PATCH 01/14] migration: Add Error** argument to .save_setup() handler


On 2/12/24 09:36, Avihai Horon wrote:

Hi, Cedric

On 07/02/2024 15:33, Cédric Le Goater wrote:

External email: Use caution opening links or attachments


The purpose is to record a potential error in the migration stream if
qemu_savevm_state_setup() fails. Most of the current .save_setup()
handlers can be modified to use the Error argument instead of managing
their own and calling locally error_report(). The following patches
will introduce such changes for VFIO first.

Signed-off-by: Cédric Le Goater 
---
  include/migration/register.h   | 2 +-
  hw/ppc/spapr.c | 2 +-
  hw/s390x/s390-stattrib.c   | 2 +-
  hw/vfio/migration.c    | 2 +-
  migration/block-dirty-bitmap.c | 2 +-
  migration/block.c  | 2 +-
  migration/ram.c    | 2 +-
  migration/savevm.c | 4 ++--
  8 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index 
9ab1f79512c605f0c88a45b560c57486fa054441..831600a00eae4efd0464b60925d65de4d9dbcff8
 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -25,7 +25,7 @@ typedef struct SaveVMHandlers {
   * used to perform early checks.
   */
  int (*save_prepare)(void *opaque, Error **errp);
-    int (*save_setup)(QEMUFile *f, void *opaque);
+    int (*save_setup)(QEMUFile *f, void *opaque, Error **errp);
  void (*save_cleanup)(void *opaque);
  int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque);
  int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 
0d72d286d80f0435122593555f79fae4d90acf81..a1b0aa02582ad2d68a13476c1859b18143da7bb8
 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2142,7 +2142,7 @@ static const VMStateDescription vmstate_spapr = {
  }
  };

-static int htab_save_setup(QEMUFile *f, void *opaque)
+static int htab_save_setup(QEMUFile *f, void *opaque, Error **errp)
  {
  SpaprMachineState *spapr = opaque;

diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c
index 
c483b62a9b5f71772639fc180bdad15ecb6711cb..c934df424a555d83d2198f5ddfc0cbe0ea98e9ec
 100644
--- a/hw/s390x/s390-stattrib.c
+++ b/hw/s390x/s390-stattrib.c
@@ -166,7 +166,7 @@ static int cmma_load(QEMUFile *f, void *opaque, int 
version_id)
  return ret;
  }

-static int cmma_save_setup(QEMUFile *f, void *opaque)
+static int cmma_save_setup(QEMUFile *f, void *opaque, Error **errp)
  {
  S390StAttribState *sas = S390_STATTRIB(opaque);
  S390StAttribClass *sac = S390_STATTRIB_GET_CLASS(sas);
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
70e6b1a709f9b67e4c9eb41033d76347275cac42..8bcb4bc73cd5ba5338e3ffa4d907d0e6bfbb9485
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -378,7 +378,7 @@ static int vfio_save_prepare(void *opaque, Error **errp)
  return 0;
  }

-static int vfio_save_setup(QEMUFile *f, void *opaque)
+static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
  {
  VFIODevice *vbasedev = opaque;
  VFIOMigration *migration = vbasedev->migration;
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 
2708abf3d762de774ed294d3fdb8e56690d2974c..16f84e6c57c2403a8c2d6319f4e7b6360dade28c
 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -1213,7 +1213,7 @@ fail:
  return ret;
  }

-static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
+static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque, Error **errp)
  {
  DBMSaveState *s = &((DBMState *)opaque)->save;
  SaveBitmapState *dbms = NULL;
diff --git a/migration/block.c b/migration/block.c
index 
8c6ebafacc1ffe930d1d4f19d968817b14852c69..df15319ceab66201b043f15eac1b0a7d6522b60c
 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -708,7 +708,7 @@ static void block_migration_cleanup(void *opaque)
  blk_mig_unlock();
  }

-static int block_save_setup(QEMUFile *f, void *opaque)
+static int block_save_setup(QEMUFile *f, void *opaque, Error **errp)
  {
  int ret;

diff --git a/migration/ram.c b/migration/ram.c
index 
d5b7cd5ac2f31aabf4a248b966153401c48912cf..136c237f4079f68d4e578cf1c72eec2efc815bc8
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2931,7 +2931,7 @@ void qemu_guest_free_page_hint(void *addr, size_t len)
   * @f: QEMUFile where to send the data
   * @opaque: RAMState pointer
   */
-static int ram_save_setup(QEMUFile *f, void *opaque)
+static int ram_save_setup(QEMUFile *f, void *opaque, Error **errp)
  {
  RAMState **rsp = opaque;
  RAMBlock *block;
diff --git a/migration/savevm.c b/migration/savevm.c
index 
d612c8a9020b204d5d078d5df85f0e6449c27645..f2ae799bad13e631bccf733a34c3a8fd22e8dd48
 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1342,10 +1342,10 @@ void qemu_savevm_state_setup(QEMUFile *f)
  }
  save_section_header(f, se, QEMU_VM_SECTION_START);

-    ret = se->ops->

Re: [PATCH 13/14] migration: Use migrate_has_error() in close_return_path_on_source()


On 2/8/24 14:57, Fabiano Rosas wrote:

Cédric Le Goater  writes:


On 2/8/24 14:07, Fabiano Rosas wrote:

Cédric Le Goater  writes:


close_return_path_on_source() retrieves the migration error from the
the QEMUFile '->to_dst_file' to know if a shutdown is required. This
shutdown is required to exit the return-path thread. However, in
migrate_fd_cleanup(), '->to_dst_file' is cleaned up before calling
close_return_path_on_source() and the shutdown is never performed,
leaving the source and destination waiting for an event to occur.

Avoid relying on '->to_dst_file' and use migrate_has_error() instead.

Suggested-by: Peter Xu 
Signed-off-by: Cédric Le Goater 
---
   migration/migration.c | 3 +--
   1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 
d5f705ceef4c925589aa49335969672c0d761fa2..5f55af3d7624750ca416c4177781241b3e291e5d
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2372,8 +2372,7 @@ static bool close_return_path_on_source(MigrationState 
*ms)
* cause it to unblock if it's stuck waiting for the destination.
*/
   WITH_QEMU_LOCK_GUARD(>qemu_file_lock) {
-if (ms->to_dst_file && ms->rp_state.from_dst_file &&
-qemu_file_get_error(ms->to_dst_file)) {
+if (migrate_has_error(ms) && ms->rp_state.from_dst_file) {
   qemu_file_shutdown(ms->rp_state.from_dst_file);
   }
   }


Hm, maybe Peter can help defend this, but this assumes that every
function that takes an 'f' and sets the file error also sets
migrate_set_error(). I'm not sure we have determined that, have we?


How could we check all the code path ? I agree it is difficult when
looking at the code :/


It would help if the thing wasn't called 'f' for the most part of the
code to begin with.

Whenever there's a file error at to_dst_file there's the chance that the
rp_state.from_dst_file got stuck. So we cannot ignore the file error.

Would it work if we checked it earlier during cleanup as you did
previously and then set the migration error?


Do you mean doing something similar to what is done in
source_return_path_thread() ?

if (qemu_file_get_error(s->to_dst_file)) {
qemu_file_get_error_obj(s->to_dst_file, );
if (err) {
migrate_set_error(ms, err);
error_free(err);
...

Yes. That would be safer I think.


Nevertheless, I am struggling to understand how qemu_file_set_error()
and migrate_set_error() fit together. I was expecting some kind of
synchronization  routine but there isn't it seems. Are they completely
orthogonal ? when should we use these routines and when not ?

My initial goal was to modify some of the memory handlers (log_global*)
and migration handlers to propagate errors at the QMP level and them
report to the management layer. This is growing in something bigger
and currently, I don't find a good approach to the problem.

The last two patches of this series try to fix the return-path thread
termination. Let's keep that for after.

Thanks,

C.

Re: [PATCH v2 2/6] vfio: Avoid inspecting option QDict for rombar


On 2/12/24 09:04, Philippe Mathieu-Daudé wrote:

On 10/2/24 11:24, Akihiko Odaki wrote:

Use pci_rom_bar_explicitly_enabled() to determine if rombar is explicitly
enabled.

Signed-off-by: Akihiko Odaki 
---
  hw/vfio/pci.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index d7fe06715c4b..44178ac9355f 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1010,7 +1010,6 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
  {
  uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
  off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
-    DeviceState *dev = DEVICE(vdev);
  char *name;
  int fd = vdev->vbasedev.fd;
@@ -1044,7 +1043,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
  }
  if (vfio_opt_rom_in_denylist(vdev)) {
-    if (dev->opts && qdict_haskey(dev->opts, "rombar")) {
+    if (pci_rom_bar_explicitly_enabled(>pdev)) {


"pdev" is considered internal field, please use the DEVICE() macro
to access it. 


Yes. I was just looking at  vfio_pci_size_rom(). There is a test at
the beginning of this routine which should be changed to use DEVICE()


if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
/* Since pci handles romfile, just print a message and return */
if (vfio_opt_rom_in_denylist(vdev) && vdev->pdev.romfile) {
...


Thanks,

C.

Re: [PATCH 03/14] memory: Add Error** argument to .log_global*() handlers


On 2/8/24 06:48, Peter Xu wrote:

On Wed, Feb 07, 2024 at 02:33:36PM +0100, Cédric Le Goater wrote:

@@ -2936,14 +2940,14 @@ void memory_global_dirty_log_start(unsigned int flags)
  trace_global_dirty_changed(global_dirty_tracking);
  
  if (!old_flags) {

-MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward);
+MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward, errp);
  memory_region_transaction_begin();
  memory_region_update_pending = true;
  memory_region_transaction_commit();
  }
  }
  
-static void memory_global_dirty_log_do_stop(unsigned int flags)

+static void memory_global_dirty_log_do_stop(unsigned int flags, Error **errp)
  {
  assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
  assert((global_dirty_tracking & flags) == flags);
@@ -2955,7 +2959,7 @@ static void memory_global_dirty_log_do_stop(unsigned int 
flags)
  memory_region_transaction_begin();
  memory_region_update_pending = true;
  memory_region_transaction_commit();
-MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse);
+MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse, errp);
  }
  }


I'm a little bit surprised to see that MEMORY_LISTENER_CALL_GLOBAL()
already allows >2 args, with the ability to conditionally pass over errp
with such oneliner change; even if all callers were only using 2 args
before this patch..

yes. The proposal takes the easy path.

Should we change all memory listener global handlers :

  begin
  commit
  log_global_after_sync
  log_global_start
  log_global_stop

to take an extra Error **errp argument ?

I think we should distinguish begin + commit handlers from the log_global_*
with a new macro. In which case, we could also change the handler to return
a bool and fail at the first error in MEMORY_LISTENER_CALL_GLOBAL(...).

Thanks,

C.

Re: [PATCH 02/14] migration: Add Error** argument to .load_setup() handler


On 2/8/24 05:30, Peter Xu wrote:

On Wed, Feb 07, 2024 at 02:33:35PM +0100, Cédric Le Goater wrote:

diff --git a/migration/ram.c b/migration/ram.c
index 
136c237f4079f68d4e578cf1c72eec2efc815bc8..8dac9bac2fe8b8c19e102c771a7ef6e976252906
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3498,7 +3498,7 @@ void colo_release_ram_cache(void)
   * @f: QEMUFile where to receive the data
   * @opaque: RAMState pointer


Another one may need touch up..


   */
-static int ram_load_setup(QEMUFile *f, void *opaque)
+static int ram_load_setup(QEMUFile *f, void *opaque, Error **errp)
  {
  xbzrle_load_setup();
  ramblock_recv_map_init();
diff --git a/migration/savevm.c b/migration/savevm.c
index 
f2ae799bad13e631bccf733a34c3a8fd22e8dd48..990f4249a26d28117ee365d8b20fc5bbca0d43d6
 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2737,7 +2737,7 @@ static void 
qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis)
  trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num);
  }
  
-static int qemu_loadvm_state_setup(QEMUFile *f)

+static int qemu_loadvm_state_setup(QEMUFile *f, Error **errp)
  {
  SaveStateEntry *se;
  int ret;
@@ -2753,10 +2753,11 @@ static int qemu_loadvm_state_setup(QEMUFile *f)
  }
  }
  
-ret = se->ops->load_setup(f, se->opaque);

+ret = se->ops->load_setup(f, se->opaque, errp);
  if (ret < 0) {
+error_prepend(errp, "Load state of device %s failed: ",
+  se->idstr);
  qemu_file_set_error(f, ret);


Do we also want to switch to _set_error_obj()? 


yes. possible.

Or even use migrate_set_error() 


It seems so and may be even remove it completely.

What we could do first is add an Errp ** argument to qemu_loadvm_state()
which would improve qmp_xen_load_devices_state() and load_snapshot().
It is less obvious for process_incoming_migration_co().


(the latter may apply to previous patch too if it works)?


It seems safe to use migrate_set_error for both migration_thread() and
bg_migration_thread() because migration_detect_error() is called after
calling qemu_savevm_state_setup().

However, qemu_savevm_state() relies only on qemu_file_get_error() and
there would be a problem there I think.

Thanks,

C.





-error_report("Load state of device %s failed", se->idstr);
  return ret;
  }
  }
@@ -2937,7 +2938,8 @@ int qemu_loadvm_state(QEMUFile *f)
  return ret;
  }
  
-if (qemu_loadvm_state_setup(f) != 0) {

+if (qemu_loadvm_state_setup(f, _err) != 0) {
+error_report_err(local_err);
  return -EINVAL;
  }
  
--

2.43.0

Re: [PATCH v2 2/2] aspeed: fix hardcode boot address 0


On 2/7/24 20:52, Jamin Lin wrote:

In the previous design of ASPEED SOCs QEMU model, it set the boot
address at "0" which was the hardcode setting for ast10x0, ast2600,
ast2500 and ast2400.

According to the design of ast2700, it has bootmcu which is used for
executing SPL and initialize DRAM, then, CPUs(cortex-a35)
execute u-boot, kernel and rofs. QEMU will only support CPU(cortex-a35)
parts and the boot address is "0x4 " for ast2700.
Therefore, fixed hardcode boot address 0.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 


I agree with Philippe that the justification could be simpler. This change
is just a cleanup preparing ground for future models using a different
mapping address.

Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/arm/aspeed.c | 4 +++-
  hw/arm/aspeed_ast2400.c | 4 ++--
  hw/arm/aspeed_ast2600.c | 2 +-
  include/hw/arm/aspeed_soc.h | 2 --
  4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 06d863958b..39758557be 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -289,12 +289,14 @@ static void aspeed_install_boot_rom(AspeedMachineState 
*bmc, BlockBackend *blk,
  uint64_t rom_size)
  {
  AspeedSoCState *soc = bmc->soc;
+AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(soc);
  
  memory_region_init_rom(>boot_rom, NULL, "aspeed.boot_rom", rom_size,

 _abort);
  memory_region_add_subregion_overlap(>spi_boot_container, 0,
  >boot_rom, 1);
-write_boot_rom(blk, ASPEED_SOC_SPI_BOOT_ADDR, rom_size, _abort);
+write_boot_rom(blk, sc->memmap[ASPEED_DEV_SPI_BOOT],
+   rom_size, _abort);
  }
  
  void aspeed_board_init_flashes(AspeedSMCState *s, const char *flashtype,

diff --git a/hw/arm/aspeed_ast2400.c b/hw/arm/aspeed_ast2400.c
index 95da85fee0..d125886207 100644
--- a/hw/arm/aspeed_ast2400.c
+++ b/hw/arm/aspeed_ast2400.c
@@ -26,7 +26,7 @@
  #define ASPEED_SOC_IOMEM_SIZE   0x0020
  
  static const hwaddr aspeed_soc_ast2400_memmap[] = {

-[ASPEED_DEV_SPI_BOOT]  =  ASPEED_SOC_SPI_BOOT_ADDR,
+[ASPEED_DEV_SPI_BOOT]  = 0x,
  [ASPEED_DEV_IOMEM]  = 0x1E60,
  [ASPEED_DEV_FMC]= 0x1E62,
  [ASPEED_DEV_SPI1]   = 0x1E63,
@@ -61,7 +61,7 @@ static const hwaddr aspeed_soc_ast2400_memmap[] = {
  };
  
  static const hwaddr aspeed_soc_ast2500_memmap[] = {

-[ASPEED_DEV_SPI_BOOT]  = ASPEED_SOC_SPI_BOOT_ADDR,
+[ASPEED_DEV_SPI_BOOT]  = 0x,
  [ASPEED_DEV_IOMEM]  = 0x1E60,
  [ASPEED_DEV_FMC]= 0x1E62,
  [ASPEED_DEV_SPI1]   = 0x1E63,
diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c
index f74561ecdc..174be53770 100644
--- a/hw/arm/aspeed_ast2600.c
+++ b/hw/arm/aspeed_ast2600.c
@@ -22,7 +22,7 @@
  #define ASPEED_SOC_DPMCU_SIZE   0x0004
  
  static const hwaddr aspeed_soc_ast2600_memmap[] = {

-[ASPEED_DEV_SPI_BOOT]  = ASPEED_SOC_SPI_BOOT_ADDR,
+[ASPEED_DEV_SPI_BOOT]  = 0x,
  [ASPEED_DEV_SRAM]  = 0x1000,
  [ASPEED_DEV_DPMCU] = 0x1800,
  /* 0x1600 0x17FF : AHB BUS do LPC Bus bridge */
diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h
index 5ab0902da0..bf43ad8351 100644
--- a/include/hw/arm/aspeed_soc.h
+++ b/include/hw/arm/aspeed_soc.h
@@ -224,8 +224,6 @@ enum {
  ASPEED_DEV_FSI2,
  };
  
-#define ASPEED_SOC_SPI_BOOT_ADDR 0x0

-
  qemu_irq aspeed_soc_get_irq(AspeedSoCState *s, int dev);
  bool aspeed_soc_uart_realize(AspeedSoCState *s, Error **errp);
  void aspeed_soc_uart_set_chr(AspeedSoCState *s, int dev, Chardev *chr);

Re: [PATCH v2 1/2] aspeed: introduce a new UART0 device name


Hello Jamin,

On 2/7/24 21:02, Jamin Lin via wrote:

The Aspeed datasheet refers to the UART controllers
as UART1 - UART13 for the ast10x0, ast2600, ast2500
and ast2400 SoCs and the Aspeed ast2700 introduces an UART0
and the UART controllers as UART0 - UART12.

To keep the naming in the QEMU models
in sync with the datasheet, let's introduce a new  UART0 device name
and do the required adjustements, etc ...


Please drop the etc...



Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 
---
  hw/arm/aspeed.c | 13 -
  hw/arm/aspeed_ast10x0.c |  1 +
  hw/arm/aspeed_ast2400.c |  2 ++
  hw/arm/aspeed_ast2600.c |  1 +
  hw/arm/aspeed_soc_common.c  | 14 +-
  include/hw/arm/aspeed_soc.h |  2 ++
  6 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 09b1e823ba..06d863958b 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -342,7 +342,7 @@ static void connect_serial_hds_to_uarts(AspeedMachineState 
*bmc)
  int uart_chosen = bmc->uart_chosen ? bmc->uart_chosen : amc->uart_default;
  
  aspeed_soc_uart_set_chr(s, uart_chosen, serial_hd(0));

-for (int i = 1, uart = ASPEED_DEV_UART1; i < sc->uarts_num; i++, uart++) {
+for (int i = 0, uart = sc->uarts_base; i < sc->uarts_num; i++, uart++) {
  if (uart == uart_chosen) {
  continue;
  }
@@ -1094,7 +1094,7 @@ static char *aspeed_get_bmc_console(Object *obj, Error 
**errp)
  AspeedMachineClass *amc = ASPEED_MACHINE_GET_CLASS(bmc);
  int uart_chosen = bmc->uart_chosen ? bmc->uart_chosen : amc->uart_default;
  
-return g_strdup_printf("uart%d", uart_chosen - ASPEED_DEV_UART1 + 1);

+return g_strdup_printf("uart%d", uart_chosen - ASPEED_DEV_UART0);
  }
  
  static void aspeed_set_bmc_console(Object *obj, const char *value, Error **errp)

@@ -1103,6 +1103,8 @@ static void aspeed_set_bmc_console(Object *obj, const 
char *value, Error **errp)
  AspeedMachineClass *amc = ASPEED_MACHINE_GET_CLASS(bmc);
  AspeedSoCClass *sc = 
ASPEED_SOC_CLASS(object_class_by_name(amc->soc_name));
  int val;
+int start = sc->uarts_base - ASPEED_DEV_UART0;
+int end = start + sc->uarts_num;



To help the reader, I would introduce these helpers at the end of
aspeed_soc.h :

static inline int aspeed_uart_index(int uart_dev)
{
return uart_dev - ASPEED_DEV_UART0;
}

static inline int aspeed_uart_first(AspeedSoCClass *sc)

{
return aspeed_uart_index(sc->uarts_base);
}

static inline int aspeed_uart_last(AspeedSoCClass *sc)

{
return aspeed_uart_first(sc) + sc->uarts_num - 1;
}



  if (sscanf(value, "uart%u", ) != 1) {
  error_setg(errp, "Bad value for \"uart\" property");
@@ -1110,11 +1112,12 @@ static void aspeed_set_bmc_console(Object *obj, const 
char *value, Error **errp)
  }
  
  /* The number of UART depends on the SoC */

-if (val < 1 || val > sc->uarts_num) {
-error_setg(errp, "\"uart\" should be in range [1 - %d]", 
sc->uarts_num);
+if (val < start || val >= end) {
+error_setg(errp, "\"uart\" should be in range [%d - %d]",
+   start, end - 1);
  return;
  }
-bmc->uart_chosen = ASPEED_DEV_UART1 + val - 1;
+bmc->uart_chosen = val + ASPEED_DEV_UART0;
  }
  
  static void aspeed_machine_class_props_init(ObjectClass *oc)

diff --git a/hw/arm/aspeed_ast10x0.c b/hw/arm/aspeed_ast10x0.c
index c3b5116a6a..2634e0f654 100644
--- a/hw/arm/aspeed_ast10x0.c
+++ b/hw/arm/aspeed_ast10x0.c
@@ -436,6 +436,7 @@ static void aspeed_soc_ast1030_class_init(ObjectClass 
*klass, void *data)
  sc->wdts_num = 4;
  sc->macs_num = 1;
  sc->uarts_num = 13;
+sc->uarts_base = ASPEED_DEV_UART1;
  sc->irqmap = aspeed_soc_ast1030_irqmap;
  sc->memmap = aspeed_soc_ast1030_memmap;
  sc->num_cpus = 1;
diff --git a/hw/arm/aspeed_ast2400.c b/hw/arm/aspeed_ast2400.c
index 8829561bb6..95da85fee0 100644
--- a/hw/arm/aspeed_ast2400.c
+++ b/hw/arm/aspeed_ast2400.c
@@ -523,6 +523,7 @@ static void aspeed_soc_ast2400_class_init(ObjectClass *oc, 
void *data)
  sc->wdts_num = 2;
  sc->macs_num = 2;
  sc->uarts_num= 5;
+sc->uarts_base   = ASPEED_DEV_UART1;
  sc->irqmap   = aspeed_soc_ast2400_irqmap;
  sc->memmap   = aspeed_soc_ast2400_memmap;
  sc->num_cpus = 1;
@@ -551,6 +552,7 @@ static void aspeed_soc_ast2500_class_init(ObjectClass *oc, 
void *data)
  sc->wdts_num = 3;
  sc->macs_num = 2;
  sc->uarts_num= 5;
+sc->uarts_base   = ASPEED_DEV_UART1;
  sc->irqmap   = aspeed_soc_ast2500_irqmap;
  sc->memmap   = aspeed_soc_ast2500_memmap;
  sc->num_cpus = 1;
diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c
index 4ee32ea99d..f74561ecdc 100644
--- a/hw/arm/aspeed_ast2600.c
+++ b/hw/arm/aspeed_ast2600.c
@@ -666,6 +666,7 @@ static void aspeed_soc_ast2600_class_init(ObjectClass

Re: [PATCH 08/13] hw/arm/mps3r: Initial skeleton for mps3-an536 board





+/*
+ * The MPS3 DDR is 3GiB, but on a 32-bit host QEMU doesn't permit
+ * emulation of that much guest RAM, so artificially make it smaller.
+ */
+#if HOST_LONG_BITS == 32
+#define MPS3_DDR_SIZE (1 * GiB)
+#else
+#define MPS3_DDR_SIZE (3 * GiB)
+#endif


Generically, can we migrate a VM started on a 32-bit host to a 64-bit
one?


I think it's one of those things that in theory is supposed
to be possible and in practice nobody tests so it might well
not work. At any rate, this is the same thing we do already
in mps2-tz.c for the 2GB DRAM those boards have.


We could have a common helper may be. Aspeed does:

  /* On 32-bit hosts, lower RAM to 1G because of the 2047 MB limit */
  #if HOST_LONG_BITS == 32
  #define ASPEED_RAM_SIZE(sz) MIN((sz), 1 * GiB)
  #else
  #define ASPEED_RAM_SIZE(sz) (sz)
  #endif

Thanks,

C.

Re: [PATCH 13/14] migration: Use migrate_has_error() in close_return_path_on_source()


On 2/8/24 14:07, Fabiano Rosas wrote:

Cédric Le Goater  writes:


close_return_path_on_source() retrieves the migration error from the
the QEMUFile '->to_dst_file' to know if a shutdown is required. This
shutdown is required to exit the return-path thread. However, in
migrate_fd_cleanup(), '->to_dst_file' is cleaned up before calling
close_return_path_on_source() and the shutdown is never performed,
leaving the source and destination waiting for an event to occur.

Avoid relying on '->to_dst_file' and use migrate_has_error() instead.

Suggested-by: Peter Xu 
Signed-off-by: Cédric Le Goater 
---
  migration/migration.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 
d5f705ceef4c925589aa49335969672c0d761fa2..5f55af3d7624750ca416c4177781241b3e291e5d
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2372,8 +2372,7 @@ static bool close_return_path_on_source(MigrationState 
*ms)
   * cause it to unblock if it's stuck waiting for the destination.
   */
  WITH_QEMU_LOCK_GUARD(>qemu_file_lock) {
-if (ms->to_dst_file && ms->rp_state.from_dst_file &&
-qemu_file_get_error(ms->to_dst_file)) {
+if (migrate_has_error(ms) && ms->rp_state.from_dst_file) {
  qemu_file_shutdown(ms->rp_state.from_dst_file);
  }
  }


Hm, maybe Peter can help defend this, but this assumes that every
function that takes an 'f' and sets the file error also sets
migrate_set_error(). I'm not sure we have determined that, have we?


How could we check all the code path ? I agree it is difficult when
looking at the code :/

Thanks,

C.

Re: [PATCH 01/14] migration: Add Error** argument to .save_setup() handler


On 2/7/24 21:11, Philippe Mathieu-Daudé wrote:

On 7/2/24 14:33, Cédric Le Goater wrote:

The purpose is to record a potential error in the migration stream if
qemu_savevm_state_setup() fails. Most of the current .save_setup()
handlers can be modified to use the Error argument instead of managing
their own and calling locally error_report(). The following patches
will introduce such changes for VFIO first.

Signed-off-by: Cédric Le Goater 
---
  include/migration/register.h   | 2 +-
  hw/ppc/spapr.c | 2 +-
  hw/s390x/s390-stattrib.c   | 2 +-
  hw/vfio/migration.c    | 2 +-
  migration/block-dirty-bitmap.c | 2 +-
  migration/block.c  | 2 +-
  migration/ram.c    | 2 +-
  migration/savevm.c | 4 ++--
  8 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index 
9ab1f79512c605f0c88a45b560c57486fa054441..831600a00eae4efd0464b60925d65de4d9dbcff8
 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -25,7 +25,7 @@ typedef struct SaveVMHandlers {
   * used to perform early checks.
   */
  int (*save_prepare)(void *opaque, Error **errp);
-    int (*save_setup)(QEMUFile *f, void *opaque);
+    int (*save_setup)(QEMUFile *f, void *opaque, Error **errp);


Since you change this, do you mind adding a docstring
describing this prototype?


I can send an initial patch adding the documentation tags and then
resend the same patch with the updates people will provide. I don't
have the knowledge to cover all of the SaveVMHandlers struct on my
own.

Thanks,

C.



Otherwise,
Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 3/3] hw: Set virtio-iommu aw-bits default value on pc_q35 and arm virt


On 2/8/24 11:10, Eric Auger wrote:

Currently the default input range can extend to 64 bits. On x86,
when the virtio-iommu protects vfio devices, the physical iommu
may support only 39 bits. Let's set the default to 39, as done
for the intel-iommu. On ARM we set 48b as a default (matching
SMMUv3 SMMU_IDR5.VAX == 0).

We use hw_compat_8_2 to handle the compatibility for machines
before 9.0 which used to have a virtio-iommu default input range
of 64 bits.

Of course if aw-bits is set from the command line, the default
is overriden.

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 
Tested-by: Yanghang Liu


Reviewed-by: Cédric Le Goater 

Thanks,

C.


---

v2 -> v3:
- collected Zhenzhong's R-b
- use _abort instead of NULL error handle
   on object_property_get_uint() call (Cédric)
- use VTD_HOST_AW_39BIT (Cédric)

v1 -> v2:
- set aw-bits to 48b on ARM
- use hw_compat_8_2 to handle the compat for older machines
   which used 64b as a default
---
  hw/arm/virt.c| 6 ++
  hw/core/machine.c| 5 -
  hw/i386/pc.c | 6 ++
  hw/virtio/virtio-iommu.c | 2 +-
  4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 368c2a415a..0994f2a560 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2716,10 +2716,16 @@ static void 
virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
  } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
  virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), 
errp);
  } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+uint8_t aw_bits = object_property_get_uint(OBJECT(dev),
+   "aw-bits", _abort);
  hwaddr db_start = 0, db_end = 0;
  QList *reserved_regions;
  char *resv_prop_str;
  
+if (!aw_bits) {

+qdev_prop_set_uint8(dev, "aw-bits", 48);
+}
+
  if (vms->iommu != VIRT_IOMMU_NONE) {
  error_setg(errp, "virt machine does not support multiple IOMMUs");
  return;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index fb5afdcae4..70ac96954c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -30,9 +30,12 @@
  #include "exec/confidential-guest-support.h"
  #include "hw/virtio/virtio-pci.h"
  #include "hw/virtio/virtio-net.h"
+#include "hw/virtio/virtio-iommu.h"
  #include "audio/audio.h"
  
-GlobalProperty hw_compat_8_2[] = {};

+GlobalProperty hw_compat_8_2[] = {
+{ TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" },
+};
  const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
  
  GlobalProperty hw_compat_8_1[] = {

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 196827531a..ee2d379c90 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1456,6 +1456,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler 
*hotplug_dev,
  } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
  virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), 
errp);
  } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+uint8_t aw_bits = object_property_get_uint(OBJECT(dev),
+   "aw-bits", _abort);
  /* Declare the APIC range as the reserved MSI region */
  char *resv_prop_str = g_strdup_printf("0xfee0:0xfeef:%d",
VIRTIO_IOMMU_RESV_MEM_T_MSI);
@@ -1464,6 +1466,10 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler 
*hotplug_dev,
  qlist_append_str(reserved_regions, resv_prop_str);
  qdev_prop_set_array(dev, "reserved-regions", reserved_regions);
  
+if (!aw_bits) {

+qdev_prop_set_uint8(dev, "aw-bits", VTD_HOST_AW_39BIT);
+}
+
  g_free(resv_prop_str);
  }
  
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c

index 7870bdbeee..c468e9b13b 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1529,7 +1529,7 @@ static Property virtio_iommu_properties[] = {
  DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus,
   TYPE_PCI_BUS, PCIBus *),
  DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
-DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 64),
+DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 0),
  DEFINE_PROP_END_OF_LIST(),
  };

[PATCH 12/14] migration: Report error when shutdown fails

This will help detect issues regarding I/O channels usage.

Signed-off-by: Cédric Le Goater 
---
 migration/qemu-file.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 
94231ff2955c80b3d0fab11a40510d34c334a826..b69e0c62e2fcf21d346a3687df7eebee23791fdc
 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -62,6 +62,8 @@ struct QEMUFile {
  */
 int qemu_file_shutdown(QEMUFile *f)
 {
+Error *err = NULL;
+
 /*
  * We must set qemufile error before the real shutdown(), otherwise
  * there can be a race window where we thought IO all went though
@@ -90,7 +92,8 @@ int qemu_file_shutdown(QEMUFile *f)
 return -ENOSYS;
 }
 
-if (qio_channel_shutdown(f->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL) < 0) {
+if (qio_channel_shutdown(f->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, ) < 0) {
+error_report_err(err);
 return -EIO;
 }
 
-- 
2.43.0

[PATCH 08/14] vfio: Use new Error** argument in vfio_save_setup()

Add an Error** argument to vfio_migration_set_state() and adjust
callers, including vfio_save_setup(). The error will be propagated up
to qemu_savevm_state_setup() where the save_setup() handler is
executed.

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/migration.c | 62 +
 1 file changed, 40 insertions(+), 22 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
2dfbe671f6f45aa530c7341177bb532d8292cecd..2e0a79967cc97f44d9be5575c3cfe18c9f349dab
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -84,7 +84,8 @@ static const char *mig_state_to_str(enum 
vfio_device_mig_state state)
 
 static int vfio_migration_set_state(VFIODevice *vbasedev,
 enum vfio_device_mig_state new_state,
-enum vfio_device_mig_state recover_state)
+enum vfio_device_mig_state recover_state,
+Error **errp)
 {
 VFIOMigration *migration = vbasedev->migration;
 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
@@ -104,15 +105,15 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
 ret = -errno;
 
 if (recover_state == VFIO_DEVICE_STATE_ERROR) {
-error_report("%s: Failed setting device state to %s, err: %s. "
- "Recover state is ERROR. Resetting device",
- vbasedev->name, mig_state_to_str(new_state),
- strerror(errno));
+error_setg(errp, "%s: Failed setting device state to %s, err: %s. "
+   "Recover state is ERROR. Resetting device",
+   vbasedev->name, mig_state_to_str(new_state),
+   strerror(errno));
 
 goto reset_device;
 }
 
-error_report(
+error_setg(errp,
 "%s: Failed setting device state to %s, err: %s. Setting device in 
recover state %s",
  vbasedev->name, mig_state_to_str(new_state),
  strerror(errno), mig_state_to_str(recover_state));
@@ -120,7 +121,7 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
 mig_state->device_state = recover_state;
 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
 ret = -errno;
-error_report(
+error_setg(errp,
 "%s: Failed setting device in recover state, err: %s. 
Resetting device",
  vbasedev->name, strerror(errno));
 
@@ -139,7 +140,7 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
  * This can happen if the device is asynchronously reset and
  * terminates a data transfer.
  */
-error_report("%s: data_fd out of sync", vbasedev->name);
+error_setg(errp, "%s: data_fd out of sync", vbasedev->name);
 close(mig_state->data_fd);
 
 return -EBADF;
@@ -170,10 +171,11 @@ reset_device:
  */
 static int
 vfio_migration_set_state_or_reset(VFIODevice *vbasedev,
-  enum vfio_device_mig_state new_state)
+  enum vfio_device_mig_state new_state,
+  Error **errp)
 {
 return vfio_migration_set_state(vbasedev, new_state,
-VFIO_DEVICE_STATE_ERROR);
+VFIO_DEVICE_STATE_ERROR, errp);
 }
 
 static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
@@ -391,8 +393,8 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error 
**errp)
   stop_copy_size);
 migration->data_buffer = g_try_malloc0(migration->data_buffer_size);
 if (!migration->data_buffer) {
-error_report("%s: Failed to allocate migration data buffer",
- vbasedev->name);
+error_setg(errp, "%s: Failed to allocate migration data buffer",
+   vbasedev->name);
 return -ENOMEM;
 }
 
@@ -402,7 +404,7 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error 
**errp)
 switch (migration->device_state) {
 case VFIO_DEVICE_STATE_RUNNING:
 ret = vfio_migration_set_state(vbasedev, 
VFIO_DEVICE_STATE_PRE_COPY,
-   VFIO_DEVICE_STATE_RUNNING);
+   VFIO_DEVICE_STATE_RUNNING, errp);
 if (ret) {
 return ret;
 }
@@ -429,13 +431,18 @@ static void vfio_save_cleanup(void *opaque)
 {
 VFIODevice *vbasedev = opaque;
 VFIOMigration *migration = vbasedev->migration;
+Error *local_err = NULL;
 
 /*
  * Changing device state from STOP_COPY to STOP can take time. Do

[PATCH 04/14] migration: Modify ram_init_bitmaps() to report dirty tracking errors

The .save_setup() handler has now an Error** argument that we can use
to propagate errors reported by the .log_global_start() handler. Do
that for the RAM. qemu_savevm_state_setup() will store the error under
the migration stream for later detection in the migration sequence.

Signed-off-by: Cédric Le Goater 
---
 migration/ram.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 
d86626bb1c704b2d3497b323a702ca6ca8939a79..b87245466bb46937fd0358d0c66432bcc6280018
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2802,19 +2802,17 @@ static void 
migration_bitmap_clear_discarded_pages(RAMState *rs)
 }
 }
 
-static void ram_init_bitmaps(RAMState *rs)
+static void ram_init_bitmaps(RAMState *rs, Error **errp)
 {
-Error *local_err = NULL;
-
 qemu_mutex_lock_ramlist();
 
 WITH_RCU_READ_LOCK_GUARD() {
 ram_list_init_bitmaps();
 /* We don't use dirty log with background snapshots */
 if (!migrate_background_snapshot()) {
-memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, _err);
-if (local_err) {
-error_report_err(local_err);
+memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
+if (*errp) {
+break;
 }
 migration_bitmap_sync_precopy(rs, false);
 }
@@ -2828,7 +2826,7 @@ static void ram_init_bitmaps(RAMState *rs)
 migration_bitmap_clear_discarded_pages(rs);
 }
 
-static int ram_init_all(RAMState **rsp)
+static int ram_init_all(RAMState **rsp, Error **errp)
 {
 if (ram_state_init(rsp)) {
 return -1;
@@ -2839,7 +2837,10 @@ static int ram_init_all(RAMState **rsp)
 return -1;
 }
 
-ram_init_bitmaps(*rsp);
+ram_init_bitmaps(*rsp, errp);
+if (*errp) {
+return -1;
+}
 
 return 0;
 }
@@ -2952,7 +2953,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque, 
Error **errp)
 
 /* migration has already setup the bitmap, reuse it. */
 if (!migration_in_colo_state()) {
-if (ram_init_all(rsp) != 0) {
+if (ram_init_all(rsp, errp) != 0) {
 compress_threads_save_cleanup();
 return -1;
 }
-- 
2.43.0

[PATCH 05/14] vfio: Add Error** argument to .set_dirty_page_tracking() handler

We will use the Error object to improve error reporting in the
.log_global*() handlers of VFIO.

Signed-off-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-container-base.h | 4 ++--
 hw/vfio/common.c  | 4 ++--
 hw/vfio/container-base.c  | 4 ++--
 hw/vfio/container.c   | 6 +++---
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/hw/vfio/vfio-container-base.h 
b/include/hw/vfio/vfio-container-base.h
index 
b2813b0c117985425c842d91f011bb895955d738..f22fcb5a214be2717b42815371346401bb7fce51
 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -81,7 +81,7 @@ int vfio_container_add_section_window(VFIOContainerBase 
*bcontainer,
 void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
MemoryRegionSection *section);
 int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
-   bool start);
+   bool start, Error **errp);
 int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
   VFIOBitmap *vbmap,
   hwaddr iova, hwaddr size);
@@ -122,7 +122,7 @@ struct VFIOIOMMUClass {
 void (*detach_device)(VFIODevice *vbasedev);
 /* migration feature */
 int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
-   bool start);
+   bool start, Error **errp);
 int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer,
   VFIOBitmap *vbmap,
   hwaddr iova, hwaddr size);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
45af5c675584e1931dfba3b4f78469cc4c00014e..03f2059d903eca335b02f633b07cd35ef3dd6237
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1085,7 +1085,7 @@ static void vfio_listener_log_global_start(MemoryListener 
*listener,
 if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
 ret = vfio_devices_dma_logging_start(bcontainer);
 } else {
-ret = vfio_container_set_dirty_page_tracking(bcontainer, true);
+ret = vfio_container_set_dirty_page_tracking(bcontainer, true, NULL);
 }
 
 if (ret) {
@@ -1105,7 +1105,7 @@ static void vfio_listener_log_global_stop(MemoryListener 
*listener,
 if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
 vfio_devices_dma_logging_stop(bcontainer);
 } else {
-ret = vfio_container_set_dirty_page_tracking(bcontainer, false);
+ret = vfio_container_set_dirty_page_tracking(bcontainer, false, NULL);
 }
 
 if (ret) {
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 
913ae49077c4f09b7b27517c1231cfbe4befb7fb..7c0764121d24b02b6c4e66e368d7dff78a6d65aa
 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -53,14 +53,14 @@ void vfio_container_del_section_window(VFIOContainerBase 
*bcontainer,
 }
 
 int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
-   bool start)
+   bool start, Error **errp)
 {
 if (!bcontainer->dirty_pages_supported) {
 return 0;
 }
 
 g_assert(bcontainer->ops->set_dirty_page_tracking);
-return bcontainer->ops->set_dirty_page_tracking(bcontainer, start);
+return bcontainer->ops->set_dirty_page_tracking(bcontainer, start, errp);
 }
 
 int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
bd25b9fbad2e717e63c2ab0e331186e5f63cef49..f772ac79b9c413c86d7e60f6dc4e6699852d5aac
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -210,7 +210,7 @@ static int vfio_legacy_dma_map(const VFIOContainerBase 
*bcontainer, hwaddr iova,
 
 static int
 vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
-bool start)
+bool start, Error **errp)
 {
 const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
   bcontainer);
@@ -228,8 +228,8 @@ vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase 
*bcontainer,
 ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, );
 if (ret) {
 ret = -errno;
-error_report("Failed to set dirty tracking flag 0x%x errno: %d",
- dirty.flags, errno);
+error_setg(errp, "Failed to set dirty tracking flag 0x%x errno: %d",
+   dirty.flags, errno);
 }
 
 return ret;
-- 
2.43.0

[PATCH 07/14] vfio: Add Error** argument to vfio_devices_dma_logging_stop()

This improves error reporting in the log_global_stop() VFIO handler.

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/common.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
a5d53e67efaa921e89ad918390a22506c7b1ed66..82173b039c47150f5edd05d329192c5b9c8a9a0f
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -938,12 +938,14 @@ static void vfio_dirty_tracking_init(VFIOContainerBase 
*bcontainer,
 memory_listener_unregister();
 }
 
-static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer)
+static int vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer,
+  Error **errp)
 {
 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature),
   sizeof(uint64_t))] = {};
 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
 VFIODevice *vbasedev;
+int ret = 0;
 
 feature->argsz = sizeof(buf);
 feature->flags = VFIO_DEVICE_FEATURE_SET |
@@ -955,11 +957,17 @@ static void 
vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer)
 }
 
 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
-warn_report("%s: Failed to stop DMA logging, err %d (%s)",
-vbasedev->name, -errno, strerror(errno));
+/* Keep first error */
+if (!ret) {
+ret = -errno;
+error_setg(errp, "%s: Failed to stop DMA logging, err %d (%s)",
+   vbasedev->name, -errno, strerror(errno));
+}
 }
 vbasedev->dirty_tracking = false;
 }
+
+return ret;
 }
 
 static struct vfio_device_feature *
@@ -1068,7 +1076,8 @@ static int 
vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
 
 out:
 if (ret) {
-vfio_devices_dma_logging_stop(bcontainer);
+/* Ignore the potential errors when doing rollback */
+vfio_devices_dma_logging_stop(bcontainer, NULL);
 }
 
 vfio_device_feature_dma_logging_start_destroy(feature);
@@ -1102,7 +,7 @@ static void vfio_listener_log_global_stop(MemoryListener 
*listener,
 int ret = 0;
 
 if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
-vfio_devices_dma_logging_stop(bcontainer);
+ret = vfio_devices_dma_logging_stop(bcontainer, errp);
 } else {
 ret = vfio_container_set_dirty_page_tracking(bcontainer, false, errp);
 }
-- 
2.43.0

[PATCH 11/14] vfio: Extend vfio_set_migration_error() with Error* argument

vfio_set_migration_error() sets the 'return' error on the migration
stream if a migration is in progress. To improve error reporting, add
a new Error* argument to also set the Error object on the migration
stream.

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/common.c | 50 +---
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
82173b039c47150f5edd05d329192c5b9c8a9a0f..afe8b6bd294fd5904f394a5db48aae3fd718b14e
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -148,16 +148,18 @@ bool vfio_viommu_preset(VFIODevice *vbasedev)
 return vbasedev->bcontainer->space->as != _space_memory;
 }
 
-static void vfio_set_migration_error(int err)
+static void vfio_set_migration_error(int ret, Error *err)
 {
 MigrationState *ms = migrate_get_current();
 
 if (migration_is_setup_or_active(ms->state)) {
 WITH_QEMU_LOCK_GUARD(>qemu_file_lock) {
 if (ms->to_dst_file) {
-qemu_file_set_error(ms->to_dst_file, err);
+qemu_file_set_error_obj(ms->to_dst_file, ret, err);
 }
 }
+} else {
+error_report_err(err);
 }
 }
 
@@ -296,15 +298,17 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
 VFIOContainerBase *bcontainer = giommu->bcontainer;
 hwaddr iova = iotlb->iova + giommu->iommu_offset;
 void *vaddr;
+Error *local_err = NULL;
 int ret;
 
 trace_vfio_iommu_map_notify(iotlb->perm == IOMMU_NONE ? "UNMAP" : "MAP",
 iova, iova + iotlb->addr_mask);
 
 if (iotlb->target_as != _space_memory) {
-error_report("Wrong target AS \"%s\", only system memory is allowed",
- iotlb->target_as->name ? iotlb->target_as->name : "none");
-vfio_set_migration_error(-EINVAL);
+error_setg(_err,
+   "Wrong target AS \"%s\", only system memory is allowed",
+   iotlb->target_as->name ? iotlb->target_as->name : "none");
+vfio_set_migration_error(-EINVAL, local_err);
 return;
 }
 
@@ -336,11 +340,12 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
 ret = vfio_container_dma_unmap(bcontainer, iova,
iotlb->addr_mask + 1, iotlb);
 if (ret) {
-error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%s)",
- bcontainer, iova,
- iotlb->addr_mask + 1, ret, strerror(-ret));
-vfio_set_migration_error(ret);
+error_setg(_err,
+   "vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
+   "0x%"HWADDR_PRIx") = %d (%s)",
+   bcontainer, iova,
+   iotlb->addr_mask + 1, ret, strerror(-ret));
+vfio_set_migration_error(ret, local_err);
 }
 }
 out:
@@ -1224,13 +1229,15 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier 
*n, IOMMUTLBEntry *iotlb)
 VFIOContainerBase *bcontainer = giommu->bcontainer;
 hwaddr iova = iotlb->iova + giommu->iommu_offset;
 ram_addr_t translated_addr;
+Error *local_err = NULL;
 int ret = -EINVAL;
 
 trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
 
 if (iotlb->target_as != _space_memory) {
-error_report("Wrong target AS \"%s\", only system memory is allowed",
- iotlb->target_as->name ? iotlb->target_as->name : "none");
+error_setg(_err,
+   "Wrong target AS \"%s\", only system memory is allowed",
+   iotlb->target_as->name ? iotlb->target_as->name : "none");
 goto out;
 }
 
@@ -1239,17 +1246,18 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier 
*n, IOMMUTLBEntry *iotlb)
 ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
 translated_addr);
 if (ret) {
-error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%s)",
- bcontainer, iova, iotlb->addr_mask + 1, ret,
- strerror(-ret));
+error_setg(_err,
+   "vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
+   "0x%"HWADDR_PRIx") = %d (%s)",
+   bcontainer, iova, iotlb->addr_mask + 1, ret,
+

[PATCH 01/14] migration: Add Error** argument to .save_setup() handler

The purpose is to record a potential error in the migration stream if
qemu_savevm_state_setup() fails. Most of the current .save_setup()
handlers can be modified to use the Error argument instead of managing
their own and calling locally error_report(). The following patches
will introduce such changes for VFIO first.

Signed-off-by: Cédric Le Goater 
---
 include/migration/register.h   | 2 +-
 hw/ppc/spapr.c | 2 +-
 hw/s390x/s390-stattrib.c   | 2 +-
 hw/vfio/migration.c| 2 +-
 migration/block-dirty-bitmap.c | 2 +-
 migration/block.c  | 2 +-
 migration/ram.c| 2 +-
 migration/savevm.c | 4 ++--
 8 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index 
9ab1f79512c605f0c88a45b560c57486fa054441..831600a00eae4efd0464b60925d65de4d9dbcff8
 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -25,7 +25,7 @@ typedef struct SaveVMHandlers {
  * used to perform early checks.
  */
 int (*save_prepare)(void *opaque, Error **errp);
-int (*save_setup)(QEMUFile *f, void *opaque);
+int (*save_setup)(QEMUFile *f, void *opaque, Error **errp);
 void (*save_cleanup)(void *opaque);
 int (*save_live_complete_postcopy)(QEMUFile *f, void *opaque);
 int (*save_live_complete_precopy)(QEMUFile *f, void *opaque);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 
0d72d286d80f0435122593555f79fae4d90acf81..a1b0aa02582ad2d68a13476c1859b18143da7bb8
 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2142,7 +2142,7 @@ static const VMStateDescription vmstate_spapr = {
 }
 };
 
-static int htab_save_setup(QEMUFile *f, void *opaque)
+static int htab_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 SpaprMachineState *spapr = opaque;
 
diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c
index 
c483b62a9b5f71772639fc180bdad15ecb6711cb..c934df424a555d83d2198f5ddfc0cbe0ea98e9ec
 100644
--- a/hw/s390x/s390-stattrib.c
+++ b/hw/s390x/s390-stattrib.c
@@ -166,7 +166,7 @@ static int cmma_load(QEMUFile *f, void *opaque, int 
version_id)
 return ret;
 }
 
-static int cmma_save_setup(QEMUFile *f, void *opaque)
+static int cmma_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 S390StAttribState *sas = S390_STATTRIB(opaque);
 S390StAttribClass *sac = S390_STATTRIB_GET_CLASS(sas);
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
70e6b1a709f9b67e4c9eb41033d76347275cac42..8bcb4bc73cd5ba5338e3ffa4d907d0e6bfbb9485
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -378,7 +378,7 @@ static int vfio_save_prepare(void *opaque, Error **errp)
 return 0;
 }
 
-static int vfio_save_setup(QEMUFile *f, void *opaque)
+static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 VFIODevice *vbasedev = opaque;
 VFIOMigration *migration = vbasedev->migration;
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 
2708abf3d762de774ed294d3fdb8e56690d2974c..16f84e6c57c2403a8c2d6319f4e7b6360dade28c
 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -1213,7 +1213,7 @@ fail:
 return ret;
 }
 
-static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
+static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 DBMSaveState *s = &((DBMState *)opaque)->save;
 SaveBitmapState *dbms = NULL;
diff --git a/migration/block.c b/migration/block.c
index 
8c6ebafacc1ffe930d1d4f19d968817b14852c69..df15319ceab66201b043f15eac1b0a7d6522b60c
 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -708,7 +708,7 @@ static void block_migration_cleanup(void *opaque)
 blk_mig_unlock();
 }
 
-static int block_save_setup(QEMUFile *f, void *opaque)
+static int block_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 int ret;
 
diff --git a/migration/ram.c b/migration/ram.c
index 
d5b7cd5ac2f31aabf4a248b966153401c48912cf..136c237f4079f68d4e578cf1c72eec2efc815bc8
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2931,7 +2931,7 @@ void qemu_guest_free_page_hint(void *addr, size_t len)
  * @f: QEMUFile where to send the data
  * @opaque: RAMState pointer
  */
-static int ram_save_setup(QEMUFile *f, void *opaque)
+static int ram_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 RAMState **rsp = opaque;
 RAMBlock *block;
diff --git a/migration/savevm.c b/migration/savevm.c
index 
d612c8a9020b204d5d078d5df85f0e6449c27645..f2ae799bad13e631bccf733a34c3a8fd22e8dd48
 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1342,10 +1342,10 @@ void qemu_savevm_state_setup(QEMUFile *f)
 }
 save_section_header(f, se, QEMU_VM_SECTION_START);
 
-ret = se->ops->save_setup(f, se->opaque);
+ret = se->ops->save_setup(f, se->opaque, _err);
 save_section_footer(f, se);
 if (ret < 0) {
-

[PATCH 06/14] vfio: Add Error** argument to vfio_devices_dma_logging_start()

This allows to update the Error argument of the VFIO log_global_start()
handler. Errors detected when device level logging is started will be
propagated up to qemu_savevm_state_setup() when the ram save_setup()
handler is executed.

The vfio_set_migration_error() call becomes redudant. Remove it.

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/common.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
03f2059d903eca335b02f633b07cd35ef3dd6237..a5d53e67efaa921e89ad918390a22506c7b1ed66
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1036,7 +1036,8 @@ static void vfio_device_feature_dma_logging_start_destroy(
 g_free(feature);
 }
 
-static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer)
+static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
+  Error **errp)
 {
 struct vfio_device_feature *feature;
 VFIODirtyRanges ranges;
@@ -1058,8 +1059,8 @@ static int 
vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer)
 ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature);
 if (ret) {
 ret = -errno;
-error_report("%s: Failed to start DMA logging, err %d (%s)",
- vbasedev->name, ret, strerror(errno));
+error_setg(errp, "%s: Failed to start DMA logging, err %d (%s)",
+   vbasedev->name, ret, strerror(errno));
 goto out;
 }
 vbasedev->dirty_tracking = true;
@@ -1083,15 +1084,13 @@ static void 
vfio_listener_log_global_start(MemoryListener *listener,
 int ret;
 
 if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
-ret = vfio_devices_dma_logging_start(bcontainer);
+ret = vfio_devices_dma_logging_start(bcontainer, errp);
 } else {
-ret = vfio_container_set_dirty_page_tracking(bcontainer, true, NULL);
+ret = vfio_container_set_dirty_page_tracking(bcontainer, true, errp);
 }
 
 if (ret) {
-error_report("vfio: Could not start dirty page tracking, err: %d (%s)",
- ret, strerror(-ret));
-vfio_set_migration_error(ret);
+error_prepend(errp, "vfio: Could not start dirty page tracking - ");
 }
 }
 
@@ -1105,13 +1104,11 @@ static void 
vfio_listener_log_global_stop(MemoryListener *listener,
 if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
 vfio_devices_dma_logging_stop(bcontainer);
 } else {
-ret = vfio_container_set_dirty_page_tracking(bcontainer, false, NULL);
+ret = vfio_container_set_dirty_page_tracking(bcontainer, false, errp);
 }
 
 if (ret) {
-error_report("vfio: Could not stop dirty page tracking, err: %d (%s)",
- ret, strerror(-ret));
-vfio_set_migration_error(ret);
+error_prepend(errp, "vfio: Could not stop dirty page tracking - ");
 }
 }
 
-- 
2.43.0

[PATCH 13/14] migration: Use migrate_has_error() in close_return_path_on_source()

close_return_path_on_source() retrieves the migration error from the
the QEMUFile '->to_dst_file' to know if a shutdown is required. This
shutdown is required to exit the return-path thread. However, in
migrate_fd_cleanup(), '->to_dst_file' is cleaned up before calling
close_return_path_on_source() and the shutdown is never performed,
leaving the source and destination waiting for an event to occur.

Avoid relying on '->to_dst_file' and use migrate_has_error() instead.

Suggested-by: Peter Xu 
Signed-off-by: Cédric Le Goater 
---
 migration/migration.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 
d5f705ceef4c925589aa49335969672c0d761fa2..5f55af3d7624750ca416c4177781241b3e291e5d
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2372,8 +2372,7 @@ static bool close_return_path_on_source(MigrationState 
*ms)
  * cause it to unblock if it's stuck waiting for the destination.
  */
 WITH_QEMU_LOCK_GUARD(>qemu_file_lock) {
-if (ms->to_dst_file && ms->rp_state.from_dst_file &&
-qemu_file_get_error(ms->to_dst_file)) {
+if (migrate_has_error(ms) && ms->rp_state.from_dst_file) {
 qemu_file_shutdown(ms->rp_state.from_dst_file);
 }
 }
-- 
2.43.0

[PATCH 10/14] vfio: Also trace event failures in vfio_save_complete_precopy()

vfio_save_complete_precopy() currently returns before doing the trace
event. Change that.

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/migration.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
fb264c1ef57bbbde4306901e5449e0dfbd0ce3b7..cc5b74f9563eca25d3c7285f106ed06f1eb2f519
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -572,9 +572,6 @@ static int vfio_save_complete_precopy(QEMUFile *f, void 
*opaque)
 
 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 ret = qemu_file_get_error(f);
-if (ret) {
-return ret;
-}
 
 trace_vfio_save_complete_precopy(vbasedev->name, ret);
 
-- 
2.43.0

[PATCH 09/14] vfio: Add Error** argument to .vfio_save_config() handler

Use vmstate_save_state_with_err() to improve error reporting in the
callers.

Signed-off-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-common.h |  2 +-
 hw/vfio/migration.c   | 18 --
 hw/vfio/pci.c |  5 +++--
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 
9b7ef7d02b5a0ad5266bcc4d06cd6874178978e4..710e0d6a880b97848af6ddc2e7968a01054fa122
 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -133,7 +133,7 @@ struct VFIODeviceOps {
 int (*vfio_hot_reset_multi)(VFIODevice *vdev);
 void (*vfio_eoi)(VFIODevice *vdev);
 Object *(*vfio_get_object)(VFIODevice *vdev);
-void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f);
+int (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f, Error **errp);
 int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
 };
 
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
2e0a79967cc97f44d9be5575c3cfe18c9f349dab..fb264c1ef57bbbde4306901e5449e0dfbd0ce3b7
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -190,14 +190,19 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice 
*vbasedev,
 return ret;
 }
 
-static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
+static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
+ Error **errp)
 {
 VFIODevice *vbasedev = opaque;
+int ret = 0;
 
 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
 
 if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
-vbasedev->ops->vfio_save_config(vbasedev, f);
+ret = vbasedev->ops->vfio_save_config(vbasedev, f, errp);
+if (ret) {
+return ret;
+}
 }
 
 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
@@ -579,13 +584,14 @@ static int vfio_save_complete_precopy(QEMUFile *f, void 
*opaque)
 static void vfio_save_state(QEMUFile *f, void *opaque)
 {
 VFIODevice *vbasedev = opaque;
+Error *local_err = NULL;
 int ret;
 
-ret = vfio_save_device_config_state(f, opaque);
+ret = vfio_save_device_config_state(f, opaque, _err);
 if (ret) {
-error_report("%s: Failed to save device config space",
- vbasedev->name);
-qemu_file_set_error(f, ret);
+error_prepend(_err, "%s: Failed to save device config space",
+  vbasedev->name);
+qemu_file_set_error_obj(f, ret, local_err);
 }
 }
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 
4fa387f0430d62ca2ba1b5ae5b7037f8f06b33f9..99d86e1d40ef25133fc76ad6e58294b07bd20843
 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2585,11 +2585,12 @@ const VMStateDescription vmstate_vfio_pci_config = {
 }
 };
 
-static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f)
+static int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error 
**errp)
 {
 VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
 
-vmstate_save_state(f, _vfio_pci_config, vdev, NULL);
+return vmstate_save_state_with_err(f, _vfio_pci_config, vdev, NULL,
+   errp);
 }
 
 static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
-- 
2.43.0

[RFC PATCH 14/14] migration: Fix return-path thread exit

In case of error, close_return_path_on_source() can perform a shutdown
to exit the return-path thread.  However, in migrate_fd_cleanup(),
'to_dst_file' is closed before calling close_return_path_on_source()
and the shutdown fails, leaving the source and destination waiting for
an event to occur.

Close the file after calling close_return_path_on_source() so that the
shutdown succeeds and the return-path thread exits.

Signed-off-by: Cédric Le Goater 
---

 This is an RFC because the correct fix implies reworking the QEMUFile
 construct, built on top of the QEMU I/O channel.

 migration/migration.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 
5f55af3d7624750ca416c4177781241b3e291e5d..de329f2c553288935d824748286e79e535929b8b
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1313,6 +1313,8 @@ void migrate_set_state(int *state, int old_state, int 
new_state)
 
 static void migrate_fd_cleanup(MigrationState *s)
 {
+QEMUFile *tmp = NULL;
+
 g_free(s->hostname);
 s->hostname = NULL;
 json_writer_free(s->vmdesc);
@@ -1321,8 +1323,6 @@ static void migrate_fd_cleanup(MigrationState *s)
 qemu_savevm_state_cleanup();
 
 if (s->to_dst_file) {
-QEMUFile *tmp;
-
 trace_migrate_fd_cleanup();
 bql_unlock();
 if (s->migration_thread_running) {
@@ -1341,15 +1341,14 @@ static void migrate_fd_cleanup(MigrationState *s)
  * critical section won't block for long.
  */
 migration_ioc_unregister_yank_from_file(tmp);
-qemu_fclose(tmp);
 }
 
-/*
- * We already cleaned up to_dst_file, so errors from the return
- * path might be due to that, ignore them.
- */
 close_return_path_on_source(s);
 
+if (tmp) {
+qemu_fclose(tmp);
+}
+
 assert(!migration_is_active(s));
 
 if (s->state == MIGRATION_STATUS_CANCELLING) {
-- 
2.43.0

[PATCH 00/14] migration: Improve error reporting

Hello,

The motivation behind these changes is to improve error reporting to
the upper management layer (libvirt) with a more detailed error, this
to let it decide, depending on the reported error, whether to try
migration again later. It would be useful in cases where migration
fails due to lack of HW resources on the host. For instance, some
adapters can only initiate a limited number of simultaneous dirty
tracking requests and this imposes a limit on the the number of VMs
that can be migrated simultaneously.

We are not quite ready for such a mechanism but what we can do first is
to cleanup the error reporting in the early save_setup sequence. This
is what the following changes propose, by adding an Error argument to
various handlers and propagating it to the core migration subsystem.

The last patches try to address a related issue found on VMs with MLX5
VF assigned devices. These are one of those adapters with the HW
limitation described above. If dirty tracking setup fails and
return-path is in use, the return-path thread does not terminate,
leaving the source and destination VMs waiting for an event to occur.

The last patch is still an RFC because the correct fix is not obvious
and implies reworking the QEMUFile software construct, built on top of
the QEMU I/O channel.
 
Thanks,

C.

[1] https://lore.kernel.org/qemu-devel/20240201184853.890471-1-...@redhat.com/

Cédric Le Goater (14):
  migration: Add Error** argument to .save_setup() handler
  migration: Add Error** argument to .load_setup() handler
  memory: Add Error** argument to .log_global*() handlers
  migration: Modify ram_init_bitmaps() to report dirty tracking errors
  vfio: Add Error** argument to .set_dirty_page_tracking() handler
  vfio: Add Error** argument to vfio_devices_dma_logging_start()
  vfio: Add Error** argument to vfio_devices_dma_logging_stop()
  vfio: Use new Error** argument in vfio_save_setup()
  vfio: Add Error** argument to .vfio_save_config() handler
  vfio: Also trace event failures in vfio_save_complete_precopy()
  vfio: Extend vfio_set_migration_error() with Error* argument
  migration: Report error when shutdown fails
  migration: Use migrate_has_error() in close_return_path_on_source()
  migration: Fix return-path thread exit

 include/exec/memory.h | 12 ++--
 include/hw/vfio/vfio-common.h |  2 +-
 include/hw/vfio/vfio-container-base.h |  4 +-
 include/migration/register.h  |  4 +-
 hw/i386/xen/xen-hvm.c |  8 +--
 hw/ppc/spapr.c|  2 +-
 hw/s390x/s390-stattrib.c  |  2 +-
 hw/vfio/common.c  | 96 ---
 hw/vfio/container-base.c  |  4 +-
 hw/vfio/container.c   |  6 +-
 hw/vfio/migration.c   | 87 +++-
 hw/vfio/pci.c |  5 +-
 hw/virtio/vhost.c |  4 +-
 migration/block-dirty-bitmap.c|  2 +-
 migration/block.c |  2 +-
 migration/dirtyrate.c | 24 +--
 migration/migration.c | 16 ++---
 migration/qemu-file.c |  5 +-
 migration/ram.c   | 40 ---
 migration/savevm.c| 14 ++--
 system/memory.c   | 37 +++
 21 files changed, 236 insertions(+), 140 deletions(-)

-- 
2.43.0

[PATCH 03/14] memory: Add Error** argument to .log_global*() handlers

Modify memory_global_dirty_log_start() and memory_global_dirty_log_stop()
to also take an Error** parameter and report the error in the callers.
Aside from error reporting, there should be no functional changes.

Cc: Stefano Stabellini 
Cc: Anthony Perard 
Cc: Paul Durrant 
Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Cc: David Hildenbrand 
Signed-off-by: Cédric Le Goater 
---
 include/exec/memory.h | 12 
 hw/i386/xen/xen-hvm.c |  8 
 hw/vfio/common.c  |  6 --
 hw/virtio/vhost.c |  4 ++--
 migration/dirtyrate.c | 24 
 migration/ram.c   | 27 +++
 system/memory.c   | 37 +
 7 files changed, 86 insertions(+), 32 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 
177be23db709d8bab9cebfe6acbae57611073327..b348070dc8f17b3505196d3a92d8cfb2171b640f
 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -998,8 +998,9 @@ struct MemoryListener {
  * active at that time.
  *
  * @listener: The #MemoryListener.
+ * @errp: pointer to Error*, to store an error if it happens.
  */
-void (*log_global_start)(MemoryListener *listener);
+void (*log_global_start)(MemoryListener *listener, Error **errp);
 
 /**
  * @log_global_stop:
@@ -1009,8 +1010,9 @@ struct MemoryListener {
  * the address space.
  *
  * @listener: The #MemoryListener.
+ * @errp: pointer to Error*, to store an error if it happens.
  */
-void (*log_global_stop)(MemoryListener *listener);
+void (*log_global_stop)(MemoryListener *listener, Error **errp);
 
 /**
  * @log_global_after_sync:
@@ -2567,15 +2569,17 @@ void memory_listener_unregister(MemoryListener 
*listener);
  * memory_global_dirty_log_start: begin dirty logging for all regions
  *
  * @flags: purpose of starting dirty log, migration or dirty rate
+ * @errp: pointer to Error*, to store an error if it happens.
  */
-void memory_global_dirty_log_start(unsigned int flags);
+void memory_global_dirty_log_start(unsigned int flags, Error **errp);
 
 /**
  * memory_global_dirty_log_stop: end dirty logging for all regions
  *
  * @flags: purpose of stopping dirty log, migration or dirty rate
+ * @errp: pointer to Error*, to store an error if it happens.
  */
-void memory_global_dirty_log_stop(unsigned int flags);
+void memory_global_dirty_log_stop(unsigned int flags, Error **errp);
 
 void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled);
 
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 
f42621e6742552035122ea58092c91c3458338ff..d9c80416343b71311389563c7bdaa748829ada29
 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -446,14 +446,14 @@ static void xen_log_sync(MemoryListener *listener, 
MemoryRegionSection *section)
   int128_get64(section->size));
 }
 
-static void xen_log_global_start(MemoryListener *listener)
+static void xen_log_global_start(MemoryListener *listener, Error **errp)
 {
 if (xen_enabled()) {
 xen_in_migration = true;
 }
 }
 
-static void xen_log_global_stop(MemoryListener *listener)
+static void xen_log_global_stop(MemoryListener *listener, Error **errp)
 {
 xen_in_migration = false;
 }
@@ -653,9 +653,9 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t 
length)
 void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
 {
 if (enable) {
-memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
+memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
 } else {
-memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
+memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION, errp);
 }
 }
 
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
059bfdc07a85e2eb908df828c1f42104d683e911..45af5c675584e1931dfba3b4f78469cc4c00014e
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1075,7 +1075,8 @@ out:
 return ret;
 }
 
-static void vfio_listener_log_global_start(MemoryListener *listener)
+static void vfio_listener_log_global_start(MemoryListener *listener,
+   Error **errp)
 {
 VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
  listener);
@@ -1094,7 +1095,8 @@ static void vfio_listener_log_global_start(MemoryListener 
*listener)
 }
 }
 
-static void vfio_listener_log_global_stop(MemoryListener *listener)
+static void vfio_listener_log_global_stop(MemoryListener *listener,
+  Error **errp)
 {
 VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
  listener);
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 
2c9ac794680ea9b65eba6cc22e70cf141e90aa73..970f5951cc0b2113f91a3c640e27add5752b2944
 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -104

[PATCH 02/14] migration: Add Error** argument to .load_setup() handler

This will be useful to report errors at a higher level, mostly in VFIO
today.

Signed-off-by: Cédric Le Goater 
---
 include/migration/register.h |  2 +-
 hw/vfio/migration.c  |  2 +-
 migration/ram.c  |  2 +-
 migration/savevm.c   | 10 ++
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index 
831600a00eae4efd0464b60925d65de4d9dbcff8..e6bc226c98b27c1fb0f9e2b56d8aff491aa14d65
 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -72,7 +72,7 @@ typedef struct SaveVMHandlers {
 void (*state_pending_exact)(void *opaque, uint64_t *must_precopy,
 uint64_t *can_postcopy);
 LoadStateHandler *load_state;
-int (*load_setup)(QEMUFile *f, void *opaque);
+int (*load_setup)(QEMUFile *f, void *opaque, Error **errp);
 int (*load_cleanup)(void *opaque);
 /* Called when postcopy migration wants to resume from failure */
 int (*resume_prepare)(MigrationState *s, void *opaque);
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
8bcb4bc73cd5ba5338e3ffa4d907d0e6bfbb9485..2dfbe671f6f45aa530c7341177bb532d8292cecd
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -580,7 +580,7 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
 }
 }
 
-static int vfio_load_setup(QEMUFile *f, void *opaque)
+static int vfio_load_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 VFIODevice *vbasedev = opaque;
 
diff --git a/migration/ram.c b/migration/ram.c
index 
136c237f4079f68d4e578cf1c72eec2efc815bc8..8dac9bac2fe8b8c19e102c771a7ef6e976252906
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3498,7 +3498,7 @@ void colo_release_ram_cache(void)
  * @f: QEMUFile where to receive the data
  * @opaque: RAMState pointer
  */
-static int ram_load_setup(QEMUFile *f, void *opaque)
+static int ram_load_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 xbzrle_load_setup();
 ramblock_recv_map_init();
diff --git a/migration/savevm.c b/migration/savevm.c
index 
f2ae799bad13e631bccf733a34c3a8fd22e8dd48..990f4249a26d28117ee365d8b20fc5bbca0d43d6
 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2737,7 +2737,7 @@ static void 
qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis)
 trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num);
 }
 
-static int qemu_loadvm_state_setup(QEMUFile *f)
+static int qemu_loadvm_state_setup(QEMUFile *f, Error **errp)
 {
 SaveStateEntry *se;
 int ret;
@@ -2753,10 +2753,11 @@ static int qemu_loadvm_state_setup(QEMUFile *f)
 }
 }
 
-ret = se->ops->load_setup(f, se->opaque);
+ret = se->ops->load_setup(f, se->opaque, errp);
 if (ret < 0) {
+error_prepend(errp, "Load state of device %s failed: ",
+  se->idstr);
 qemu_file_set_error(f, ret);
-error_report("Load state of device %s failed", se->idstr);
 return ret;
 }
 }
@@ -2937,7 +2938,8 @@ int qemu_loadvm_state(QEMUFile *f)
 return ret;
 }
 
-if (qemu_loadvm_state_setup(f) != 0) {
+if (qemu_loadvm_state_setup(f, _err) != 0) {
+error_report_err(local_err);
 return -EINVAL;
 }
 
-- 
2.43.0

Re: [PATCH v0 1/2] aspeed: support uart controller both 0 and 1 base


Hello Jmain,

On 2/5/24 10:14, Jamin Lin wrote:

According to the design of ASPEED SOCS, the uart controller
is 1 base for ast10x0, ast2600, ast2500 and ast2400.


Please rephrase saying somehting :

the Aspeed datasheet refers to the UART controllers as UART1 - UART13
for the ast10x0, ast2600, ast2500 and ast2400 SoCs and the Aspeed
ast2700 introduces an UART0. To keep the naming in the QEMU models
in sync with the datasheet, let's introduce a new  UART0 device name
and do the required adjustements, etc ...


However, the uart controller is 0 base for ast2700.
To support uart controller both 0 and 1 base,
adds uasrt_bases parameter in AspeedSoCClass
and set the default uart controller 1 base
for ast10x0, astt2600, ast2500 and ast2400.

 From datasheet description
ast2700:
Base Address of UART0 = 0x14c33000
ast1030:
Base Address of UART1 = 0x7e783000
ast2600:
Base Address of UART1 = 0x1E78 3000
ast2500:
Base Address of UART1 = 0x1E78 3000


We should also introduce ASPEED_DEV_UART0 enum. See below.


Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 
---
  hw/arm/aspeed.c | 8 +---
  hw/arm/aspeed_ast10x0.c | 1 +
  hw/arm/aspeed_ast2400.c | 2 ++
  hw/arm/aspeed_ast2600.c | 1 +
  hw/arm/aspeed_soc_common.c  | 4 ++--
  include/hw/arm/aspeed_soc.h | 1 +
  6 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 09b1e823ba..218b81298e 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -342,7 +342,7 @@ static void connect_serial_hds_to_uarts(AspeedMachineState 
*bmc)
  int uart_chosen = bmc->uart_chosen ? bmc->uart_chosen : amc->uart_default;
  
  aspeed_soc_uart_set_chr(s, uart_chosen, serial_hd(0));

-for (int i = 1, uart = ASPEED_DEV_UART1; i < sc->uarts_num; i++, uart++) {
+for (int i = 1, uart = sc->uarts_base; i < sc->uarts_num; i++, uart++) {
  if (uart == uart_chosen) {
  continue;
  }
@@ -1092,9 +1092,11 @@ static char *aspeed_get_bmc_console(Object *obj, Error 
**errp)
  {
  AspeedMachineState *bmc = ASPEED_MACHINE(obj);
  AspeedMachineClass *amc = ASPEED_MACHINE_GET_CLASS(bmc);
+AspeedSoCClass *sc = ASPEED_SOC_CLASS(obj);
+
  int uart_chosen = bmc->uart_chosen ? bmc->uart_chosen : amc->uart_default;
  
-return g_strdup_printf("uart%d", uart_chosen - ASPEED_DEV_UART1 + 1);

+return g_strdup_printf("uart%d", uart_chosen - sc->uarts_base + 1);


Wwe didn't have a ASPEED_DEV_UART0 at the time. The calculation above should
be replaced with : "uart_chosen - ASPEED_DEV_UART0"


  }
  
  static void aspeed_set_bmc_console(Object *obj, const char *value, Error **errp)

@@ -1114,7 +1116,7 @@ static void aspeed_set_bmc_console(Object *obj, const 
char *value, Error **errp)
  error_setg(errp, "\"uart\" should be in range [1 - %d]", 
sc->uarts_num);


The range in the reported error above needs a fix. It's not "1" anymore
but "sc->uarts_base - ASPEED_DEV_UART0". Same for the test :

  if (val < 1 || val > sc->uarts_num) {



  return;
  }
-bmc->uart_chosen = ASPEED_DEV_UART1 + val - 1;
+bmc->uart_chosen = sc->uarts_base + val - 1;


Should be ASPEED_DEV_UART0 + val.


Thanks,

C.




  }
  
  static void aspeed_machine_class_props_init(ObjectClass *oc)

diff --git a/hw/arm/aspeed_ast10x0.c b/hw/arm/aspeed_ast10x0.c
index c3b5116a6a..2634e0f654 100644
--- a/hw/arm/aspeed_ast10x0.c
+++ b/hw/arm/aspeed_ast10x0.c
@@ -436,6 +436,7 @@ static void aspeed_soc_ast1030_class_init(ObjectClass 
*klass, void *data)
  sc->wdts_num = 4;
  sc->macs_num = 1;
  sc->uarts_num = 13;
+sc->uarts_base = ASPEED_DEV_UART1;
  sc->irqmap = aspeed_soc_ast1030_irqmap;
  sc->memmap = aspeed_soc_ast1030_memmap;
  sc->num_cpus = 1;
diff --git a/hw/arm/aspeed_ast2400.c b/hw/arm/aspeed_ast2400.c
index 8829561bb6..95da85fee0 100644
--- a/hw/arm/aspeed_ast2400.c
+++ b/hw/arm/aspeed_ast2400.c
@@ -523,6 +523,7 @@ static void aspeed_soc_ast2400_class_init(ObjectClass *oc, 
void *data)
  sc->wdts_num = 2;
  sc->macs_num = 2;
  sc->uarts_num= 5;
+sc->uarts_base   = ASPEED_DEV_UART1;
  sc->irqmap   = aspeed_soc_ast2400_irqmap;
  sc->memmap   = aspeed_soc_ast2400_memmap;
  sc->num_cpus = 1;
@@ -551,6 +552,7 @@ static void aspeed_soc_ast2500_class_init(ObjectClass *oc, 
void *data)
  sc->wdts_num = 3;
  sc->macs_num = 2;
  sc->uarts_num= 5;
+sc->uarts_base   = ASPEED_DEV_UART1;
  sc->irqmap   = aspeed_soc_ast2500_irqmap;
  sc->memmap   = aspeed_soc_ast2500_memmap;
  sc->num_cpus = 1;
diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c
index 4ee32ea99d..f74561ecdc 100644
--- a/hw/arm/aspeed_ast2600.c
+++ b/hw/arm/aspeed_ast2600.c
@@ -666,6 +666,7 @@ static void aspeed_soc_ast2600_class_init(ObjectClass *oc, 
void *data)
  sc->wdts_num = 4;
  sc->macs_num = 4;
  sc->uarts_num= 13;
+sc->uarts_base   =

Re: [PATCH v0 1/2] aspeed: support uart controller both 0 and 1 base


On 2/6/24 04:29, Jamin Lin wrote:

-Original Message-
The uart definitions on the AST2700 are different :


https://github.com/AspeedTech-BMC/linux/blob/aspeed-master-v6.6/arch/arm
64/boot/dts/aspeed/aspeed-g7.dtsi

serial0 = 
serial1 = 
serial2 = 
serial3 = 
serial4 = 
serial5 = 
serial6 = 
serial7 = 
serial8 = 
  ...

I think the names in the DT (and consequently in the QEMU models) follow the
IP names in the datasheet.

I don't think we care in QEMU, so I would be inclined to change the indexing of
the device names in QEMU and start at 0, which would introduce a
discrepancy for the AST2400, AST2600, AST2600 SoC.

Let's see what the other maintainers have to say.

Thanks,

C.

Hi Cedric,

Did you mean to change the naming of uart device to 0 base for all ASPEED SOCs?
If yes, it seems we need to do the following changes.
1. add ASPEED_DEV_UART0 in aspeed_soc.h
2. Re-defined uart memory map for ast2600, ast10x0, ast2500 and ast2400(uart0 
-> ASPEED_DEV_UART0)
Take ast2600 for example:
static const hwaddr aspeed_soc_ast2600_memmap[] = {
 [ASPEED_DEV_UART1] = 0x1E783000, ---> [ASPEED_DEV_UART0]
 [ASPEED_DEV_UART2] = 0x1E78D000, ---> [ASPEED_DEV_UART1]
 [ASPEED_DEV_UART3] = 0x1E78E000,
 [ASPEED_DEV_UART4] = 0x1E78F000,
 [ASPEED_DEV_UART5] = 0x1E784000,
 [ASPEED_DEV_UART6] = 0x1E79,
 [ASPEED_DEV_UART7] = 0x1E790100,
 [ASPEED_DEV_UART8] = 0x1E790200,
 [ASPEED_DEV_UART9] = 0x1E790300,
 [ASPEED_DEV_UART10]= 0x1E790400,
 [ASPEED_DEV_UART11]= 0x1E790500,
 [ASPEED_DEV_UART12]= 0x1E790600,
 [ASPEED_DEV_UART13]= 0x1E790700, ---> [ASPEED_DEV_UART12]
};
If no, could you please descript it more detail? So, I can change it and 
re-send this patch series.


Let's keep the datasheet names. I had forgotten the reason initially
and from an HW POV it makes sense to keep them in sync. I will add
some more comments to the patch.
 

By the way, I will send a new patch series to support AST2700 in two weeks.
We encountered GIC issues. It seems that QEMU support GIC v3 but SPI did not 
support, yet.

https://github.com/qemu/qemu/blob/master/hw/intc/arm_gicv3_dist.c#L383
https://github.com/AspeedTech-BMC/linux/blob/aspeed-master-v6.6/arch/arm64/boot/dts/aspeed/aspeed-g7.dtsi#L229


If you did any hacks or workarounds in the QEMU models, please keep them
separate from the other patches so that we can discuss.


It think that we can discuss it in a new AST2700 patch series.

Sure.

Thanks,

C.

Re: [PATCH v0 2/2] aspeed: fix hardcode boot address 0

On 2/6/24 02:48, Jamin Lin wrote:

-Original Message-
From: Philippe Mathieu-Daudé 
Sent: Monday, February 5, 2024 9:20 PM
To: Jamin Lin ; Cédric Le Goater ;
Peter Maydell ; Andrew Jeffery
; Joel Stanley ; open
list:ASPEED BMCs ; open list:All patches CC here

Cc: Troy Lee 
Subject: Re: [PATCH v0 2/2] aspeed: fix hardcode boot address 0

Hi Jamin,

On 5/2/24 10:14, Jamin Lin via wrote:

In the previous design of QEMU model for ASPEED SOCs, it set the boot
address at 0 which was the hardcode setting for ast10x0, ast2600,
ast2500 and ast2400.

According to the design of ast2700, it has bootmcu which is used for
executing SPL and initialize DRAM,

Out of curiosity, what architecture is this MCU?

MCU is riscv-ibex and its architecture is riscv-32.

then, CPUs(cortex-a35)
execute u-boot, kernel and rofs. QEMU will only support
CPU(coretax-a35) parts and the boot address is "0x4" for ast2700.

OK, but I don't get how you get from here ...

Our design make MCU execute SPL and copy u-boot image from SPI to DRAM at 
address 0x4 at SPL boot stage.
However, QEMU will only support to emulate CPU sides (coretex-a35) for ast2700, 

The fby35 is an example of a machine with two ARM SoCs : ast1030-a1
and ast2600-a3. There is work in progress for heterogeneous QEMU
machines and It might be possible to model RISC-V and ARM one day.

that was why we want to change the boot address at 0x4
And use the following start command by QEMU.

./qemu-system-aarch64 -M ast2750-evb -nographic -m 8G \
  -device loader,addr=0x4,file=${IMGDIR}/u-boot-nodtb.bin,force-raw=on \
  -device loader,addr=$((0x4 + 
${UBOOT_SIZE})),file=${IMGDIR}/u-boot.dtb,force-raw=on \
  ---
  ---

ok. Makes sense.

By the way, I will send a new patch series to support ast2700 in two weeks and
We set memory map for ast2700 as following.

static const hwaddr aspeed_soc_ast2700_memmap[] = {
 [ASPEED_DEV_SPI_BOOT]  =  0x4,
 [ASPEED_DEV_SRAM]  =  0x1000,

Excellent !

Thanks,

C.

Re: [v0 0/2] uart base and hardcode boot address 0


On 2/5/24 10:14, Jamin Lin wrote:

v0:


usually we start at v1, so the next version would be a v2. Indexing again :)


Thanks,

C.




1. support uart controller both 0 and 1 base
2. fix hardcode boot address 0

Jamin Lin (2):
   aspeed: support uart controller both 0 and 1 base
   aspeed: fix hardcode boot address 0

  hw/arm/aspeed.c | 12 
  hw/arm/aspeed_ast10x0.c |  1 +
  hw/arm/aspeed_ast2400.c |  2 ++
  hw/arm/aspeed_ast2600.c |  1 +
  hw/arm/aspeed_soc_common.c  |  4 ++--
  include/hw/arm/aspeed_soc.h |  1 +
  6 files changed, 15 insertions(+), 6 deletions(-)

Re: [PATCH v0 1/2] aspeed: support uart controller both 0 and 1 base


[ ... ]



As you said, uart12 mapped ASPEED_DEV_UART13.
The device naming will confuse users because the device name in qemu mismatch 
with ast2700 datasheet.

That way why we want to add ASPEED_DEV_UART0 and set the memory map of AST2700 
as following.
static const hwaddr aspeed_soc_ast2700_memmap[] = {
 [ASPEED_DEV_UART0] =  0X14C33000,
 [ASPEED_DEV_UART1] =  0X14C33100,
 [ASPEED_DEV_UART2] =  0X14C33200,
 [ASPEED_DEV_UART3] =  0X14C33300,
 [ASPEED_DEV_UART4] =  0X12C1A000,
 [ASPEED_DEV_UART5] =  0X14C33400,
 [ASPEED_DEV_UART6] =  0X14C33500,
 [ASPEED_DEV_UART7] =  0X14C33600,
 [ASPEED_DEV_UART8] =  0X14C33700,
 [ASPEED_DEV_UART9] =  0X14C33800,
 [ASPEED_DEV_UART10]=  0X14C33900,
 [ASPEED_DEV_UART11]=  0X14C33A00,
 [ASPEED_DEV_UART12]=  0X14C33B00,



So we would prefer to keep the QEMU IP names in sync with the datasheet,
and in that case your proposal makes sense.

A have a few comments that I will make on the patch.

Thanks,

C.

Re: [PATCH v0 1/2] aspeed: support uart controller both 0 and 1 base


On 2/5/24 11:46, Cédric Le Goater wrote:

Hello Jamin,

On 2/5/24 10:14, Jamin Lin wrote:

According to the design of ASPEED SOCS, the uart controller
is 1 base for ast10x0, ast2600, ast2500 and ast2400.

However, the uart controller is 0 base for ast2700.
To support uart controller both 0 and 1 base,
adds uasrt_bases parameter in AspeedSoCClass
and set the default uart controller 1 base
for ast10x0, astt2600, ast2500 and ast2400.


The board definition can set 'amc->uart_default' to choose a different
default serial port for the console, or use the "bmc-console" machine
option . Isn't it enough ? May be I am misunderstanding the need.

To clarify,

ASPEED_DEV_UART1 is in the first serial port on the boards.

I think we chose to start the indexing at 1 because the Aspeed QEMU
modeling began first with the UART model (console) and for simplicity,
we copied the definitions of the device tree from Linux :

     serial0 = 
     serial1 = 
     serial2 = 
     serial3 = 
     serial4 = 
     serial5 = 


The uart definitions on the AST2700 are different :
  
  https://github.com/AspeedTech-BMC/linux/blob/aspeed-master-v6.6/arch/arm64/boot/dts/aspeed/aspeed-g7.dtsi


serial0 = 
serial1 = 
serial2 = 
serial3 = 
serial4 = 
serial5 = 
serial6 = 
serial7 = 
serial8 = 
...

I think the names in the DT (and consequently in the QEMU models)
follow the IP names in the datasheet.

I don't think we care in QEMU, so I would be inclined to change the
indexing of the device names in QEMU and start at 0, which would
introduce a discrepancy for the AST2400, AST2600, AST2600 SoC.

Let's see what the other maintainers have to say.

Thanks,

C.

Re: [PATCH v0 2/2] aspeed: fix hardcode boot address 0


On 2/5/24 10:14, Jamin Lin wrote:

In the previous design of QEMU model for ASPEED SOCs, it set the boot
address at 0 which was the hardcode setting for ast10x0, ast2600,
ast2500 and ast2400.

According to the design of ast2700, it has bootmcu which is used for
executing SPL and initialize DRAM, then, CPUs(cortex-a35)
execute u-boot, kernel and rofs. QEMU will only support CPU(coretax-a35)
parts and the boot address is "0x4" for ast2700.


On the previous SoC, the ASPEED_DEV_SPI_BOOT region is an alias, at 0x0,
to the FMC CE0 region, mapped at 0x2000.

Is 0x4 (or 0x4000 ?) the address for FMC CE0 region on the
ast2700 ? or an alias ?

What is the cortex-a35 reset address ?

It would help to also introduce a basic skeleton of the ast2700 SoC.

Anyhow, this change makes sense. Could you please respin and also
remove ASPEED_SOC_SPI_BOOT_ADDR. ?

Thanks,

C.


Therefore, fixed hardcode boot address 0.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 
---
  hw/arm/aspeed.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 218b81298e..82a92e8142 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -289,12 +289,14 @@ static void aspeed_install_boot_rom(AspeedMachineState 
*bmc, BlockBackend *blk,
  uint64_t rom_size)
  {
  AspeedSoCState *soc = bmc->soc;
+AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(soc);
  
  memory_region_init_rom(>boot_rom, NULL, "aspeed.boot_rom", rom_size,

 _abort);
  memory_region_add_subregion_overlap(>spi_boot_container, 0,
  >boot_rom, 1);
-write_boot_rom(blk, ASPEED_SOC_SPI_BOOT_ADDR, rom_size, _abort);
+write_boot_rom(blk, sc->memmap[ASPEED_DEV_SPI_BOOT],
+   rom_size, _abort);
  }
  
  void aspeed_board_init_flashes(AspeedSMCState *s, const char *flashtype,

Re: [PATCH v0 1/2] aspeed: support uart controller both 0 and 1 base


Hello Jamin,

On 2/5/24 10:14, Jamin Lin wrote:

According to the design of ASPEED SOCS, the uart controller
is 1 base for ast10x0, ast2600, ast2500 and ast2400.

However, the uart controller is 0 base for ast2700.
To support uart controller both 0 and 1 base,
adds uasrt_bases parameter in AspeedSoCClass
and set the default uart controller 1 base
for ast10x0, astt2600, ast2500 and ast2400.


The board definition can set 'amc->uart_default' to choose a different
default serial port for the console, or use the "bmc-console" machine
option . Isn't it enough ? May be I am misunderstanding the need.

To clarify,

ASPEED_DEV_UART1 is in the first serial port on the boards.

I think we chose to start the indexing at 1 because the Aspeed QEMU
modeling began first with the UART model (console) and for simplicity,
we copied the definitions of the device tree from Linux :

serial0 = 
serial1 = 
serial2 = 
serial3 = 
serial4 = 
serial5 = 

We replicated this indexing starting at 1 to nearly all device models :
 
ASPEED_DEV_UART1 - 13

ASPEED_DEV_SPI1 -2
ASPEED_DEV_EHCI1 -2
ASPEED_DEV_TIMER1 - 8
ASPEED_DEV_ETH1 -4
ASPEED_DEV_MII1 - 4
ASPEED_DEV_JTAG0 - 1  <--- !!
ASPEED_DEV_FSI1 - 2

I don't know what would be ASPEED_DEV_UART0 in this context.

May be you could send a simplified AST2700 SoC model with definitions
of a minimum address space and IRQ space ?

Or you could change the indexing to start at 0 if you prefer. Just be
careful with the aspeed_set/get_bmc_console routines it you choose to.

Thanks,

C.



 From datasheet description
ast2700:
Base Address of UART0 = 0x14c33000
ast1030:
Base Address of UART1 = 0x7e783000
ast2600:
Base Address of UART1 = 0x1E78 3000
ast2500:
Base Address of UART1 = 0x1E78 3000

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 
---
  hw/arm/aspeed.c | 8 +---
  hw/arm/aspeed_ast10x0.c | 1 +
  hw/arm/aspeed_ast2400.c | 2 ++
  hw/arm/aspeed_ast2600.c | 1 +
  hw/arm/aspeed_soc_common.c  | 4 ++--
  include/hw/arm/aspeed_soc.h | 1 +
  6 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 09b1e823ba..218b81298e 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -342,7 +342,7 @@ static void connect_serial_hds_to_uarts(AspeedMachineState 
*bmc)
  int uart_chosen = bmc->uart_chosen ? bmc->uart_chosen : amc->uart_default;
  
  aspeed_soc_uart_set_chr(s, uart_chosen, serial_hd(0));

-for (int i = 1, uart = ASPEED_DEV_UART1; i < sc->uarts_num; i++, uart++) {
+for (int i = 1, uart = sc->uarts_base; i < sc->uarts_num; i++, uart++) {
  if (uart == uart_chosen) {
  continue;
  }
@@ -1092,9 +1092,11 @@ static char *aspeed_get_bmc_console(Object *obj, Error 
**errp)
  {
  AspeedMachineState *bmc = ASPEED_MACHINE(obj);
  AspeedMachineClass *amc = ASPEED_MACHINE_GET_CLASS(bmc);
+AspeedSoCClass *sc = ASPEED_SOC_CLASS(obj);
+
  int uart_chosen = bmc->uart_chosen ? bmc->uart_chosen : amc->uart_default;
  
-return g_strdup_printf("uart%d", uart_chosen - ASPEED_DEV_UART1 + 1);

+return g_strdup_printf("uart%d", uart_chosen - sc->uarts_base + 1);
  }
  
  static void aspeed_set_bmc_console(Object *obj, const char *value, Error **errp)

@@ -1114,7 +1116,7 @@ static void aspeed_set_bmc_console(Object *obj, const 
char *value, Error **errp)
  error_setg(errp, "\"uart\" should be in range [1 - %d]", 
sc->uarts_num);
  return;
  }
-bmc->uart_chosen = ASPEED_DEV_UART1 + val - 1;
+bmc->uart_chosen = sc->uarts_base + val - 1;
  }
  
  static void aspeed_machine_class_props_init(ObjectClass *oc)

diff --git a/hw/arm/aspeed_ast10x0.c b/hw/arm/aspeed_ast10x0.c
index c3b5116a6a..2634e0f654 100644
--- a/hw/arm/aspeed_ast10x0.c
+++ b/hw/arm/aspeed_ast10x0.c
@@ -436,6 +436,7 @@ static void aspeed_soc_ast1030_class_init(ObjectClass 
*klass, void *data)
  sc->wdts_num = 4;
  sc->macs_num = 1;
  sc->uarts_num = 13;
+sc->uarts_base = ASPEED_DEV_UART1;
  sc->irqmap = aspeed_soc_ast1030_irqmap;
  sc->memmap = aspeed_soc_ast1030_memmap;
  sc->num_cpus = 1;
diff --git a/hw/arm/aspeed_ast2400.c b/hw/arm/aspeed_ast2400.c
index 8829561bb6..95da85fee0 100644
--- a/hw/arm/aspeed_ast2400.c
+++ b/hw/arm/aspeed_ast2400.c
@@ -523,6 +523,7 @@ static void aspeed_soc_ast2400_class_init(ObjectClass *oc, 
void *data)
  sc->wdts_num = 2;
  sc->macs_num = 2;
  sc->uarts_num= 5;
+sc->uarts_base   = ASPEED_DEV_UART1;
  sc->irqmap   = aspeed_soc_ast2400_irqmap;
  sc->memmap   = aspeed_soc_ast2400_memmap;
  sc->num_cpus = 1;
@@ -551,6 +552,7 @@ static void aspeed_soc_ast2500_class_init(ObjectClass *oc, 
void *data)
  sc->wdts_num = 3;
  sc->macs_num = 2;
  sc->uarts_num= 5;
+sc->uarts_base   = ASPEED_DEV_UART1;
  sc->irqmap   = aspeed_soc_ast2500_irqmap;
  sc->memmap   =

Re: [PATCH v2 1/3] virtio-iommu: Add an option to define the input range width


On 2/5/24 10:14, Cédric Le Goater wrote:

On 2/1/24 17:32, Eric Auger wrote:

aw-bits is a new option that allows to set the bit width of
the input address range. This value will be used as a default for
the device config input_range.end. By default it is set to 64 bits
which is the current value.

Signed-off-by: Eric Auger 

---

v1 -> v2:
- Check the aw-bits value is within [32,64]
---
  include/hw/virtio/virtio-iommu.h | 1 +
  hw/virtio/virtio-iommu.c | 7 ++-
  2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index 781ebaea8f..5fbe4677c2 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -66,6 +66,7 @@ struct VirtIOIOMMU {
  bool boot_bypass;
  Notifier machine_done;
  bool granule_frozen;
+    uint8_t aw_bits;
  };
  #endif
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index ec2ba11d1d..7870bdbeee 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1314,7 +1314,11 @@ static void virtio_iommu_device_realize(DeviceState 
*dev, Error **errp)
   */
  s->config.bypass = s->boot_bypass;
  s->config.page_size_mask = qemu_real_host_page_mask();
-    s->config.input_range.end = UINT64_MAX;
+    if (s->aw_bits < 32 || s->aw_bits > 64) {
+    error_setg(errp, "aw-bits must be within [32,64]");
+    }
+    s->config.input_range.end =
+    s->aw_bits == 64 ? UINT64_MAX : BIT_ULL(s->aw_bits) - 1;



This could be simplified :

   s->config.input_range.end = BIT_ULL(s->aw_bits) - 1;


Forget that. We would need a int28.

Thanks,

C.





Anyhow,


Reviewed-by: Cédric Le Goater 

Thanks,

C.




  s->config.domain_range.end = UINT32_MAX;
  s->config.probe_size = VIOMMU_PROBE_SIZE;
@@ -1525,6 +1529,7 @@ static Property virtio_iommu_properties[] = {
  DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus,
   TYPE_PCI_BUS, PCIBus *),
  DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
+    DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 64),
  DEFINE_PROP_END_OF_LIST(),
  };

Re: [PATCH 2/2] migration: Fix return-path thread exit


On 2/5/24 04:37, Peter Xu wrote:

On Fri, Feb 02, 2024 at 12:11:09PM -0300, Fabiano Rosas wrote:

Cédric Le Goater  writes:


On 2/2/24 15:42, Fabiano Rosas wrote:

Cédric Le Goater  writes:


In case of error, close_return_path_on_source() can perform a shutdown
to exit the return-path thread.  However, in migrate_fd_cleanup(),
'to_dst_file' is closed before calling close_return_path_on_source()
and the shutdown fails, leaving the source and destination waiting for
an event to occur.


At close_return_path_on_source, qemu_file_shutdown() and checking
ms->to_dst_file are done under the qemu_file_lock, so how could
migrate_fd_cleanup() have cleared the pointer but the ms->to_dst_file
check have passed?


This is not a locking issue, it's much simpler. migrate_fd_cleanup()
clears the ms->to_dst_file pointer and closes the QEMUFile and then
calls close_return_path_on_source() which then tries to use resources
which are not available anymore.


I'm missing something here. Which resources? I assume you're talking
about this:

 WITH_QEMU_LOCK_GUARD(>qemu_file_lock) {
 if (ms->to_dst_file && ms->rp_state.from_dst_file &&
 qemu_file_get_error(ms->to_dst_file)) {
 qemu_file_shutdown(ms->rp_state.from_dst_file);
 }
 }

How do we get past the 'if (ms->to_dst_file)'?


We don't; migrate_fd_cleanup() will release ms->to_dst_file, then call
close_return_path_on_source(), found that to_dst_file==NULL and then skip
the shutdown().

One other option might be that we do close_return_path_on_source() before
the chunk of releasing to_dst_file.

This "two qemufiles share the same ioc" issue had bitten us before IIRC,
and the only concern of that workaround is we keep postponing resolution of
the real issue, then we keep getting bitten by it..

Maybe we can wait a few days to see if Dan can join the conversation and if
we can reach a consensus on a complete solution.  Otherwise I think we can
still work this around, but maybe that'll require a comment block
explaining the bits after such movement.


yes. The series should have been sent with an RFC.

I changed PATCH 1 to use migrate_has_error() instead of
qemu_file_get_error(ms->to_dst_file). I will keep PATCH 2 as it is for
the time being and wait for more feedback.

The prereq series adds an Error** argument to the .save_setup() and
.log_global*() handlers. I should send this week.

Thanks,

C.







Thanks,

Re: [PATCH v2 3/3] hw: Set virtio-iommu aw-bits default value on pc_q35_9.0 and arm virt


On 2/1/24 17:32, Eric Auger wrote:

Currently the default input range can extend to 64 bits. On x86,
when the virtio-iommu protects vfio devices, the physical iommu
may support only 39 bits. Let's set the default to 39, as done
for the intel-iommu. On ARM we set 48b as a default (matching
SMMUv3 SMMU_IDR5.VAX == 0).

We use hw_compat_8_2 to handle the compatibility for machines
before 9.0 which used to have a virtio-iommu default input range
of 64 bits.

Of course if aw-bits is set from the command line, the default
is overriden.

Signed-off-by: Eric Auger 

---

v1 -> v2:
- set aw-bits to 48b on ARM
- use hw_compat_8_2 to handle the compat for older machines
   which used 64b as a default
---
  hw/arm/virt.c| 6 ++
  hw/core/machine.c| 5 -
  hw/i386/pc.c | 6 ++
  hw/virtio/virtio-iommu.c | 2 +-
  4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index e6ead2c5c8..56539f2fc5 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2718,10 +2718,16 @@ static void 
virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
  } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
  virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), 
errp);
  } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+uint8_t aw_bits = object_property_get_uint(OBJECT(dev),
+   "aw-bits", NULL);


object_property_get_uint() should not fail. Please use _abort.


  hwaddr db_start = 0, db_end = 0;
  QList *reserved_regions;
  char *resv_prop_str;
  
+if (!aw_bits) {

+qdev_prop_set_uint8(dev, "aw-bits", 48);
+}
+
  if (vms->iommu != VIRT_IOMMU_NONE) {
  error_setg(errp, "virt machine does not support multiple IOMMUs");
  return;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index fb5afdcae4..70ac96954c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -30,9 +30,12 @@
  #include "exec/confidential-guest-support.h"
  #include "hw/virtio/virtio-pci.h"
  #include "hw/virtio/virtio-net.h"
+#include "hw/virtio/virtio-iommu.h"
  #include "audio/audio.h"
  
-GlobalProperty hw_compat_8_2[] = {};

+GlobalProperty hw_compat_8_2[] = {
+{ TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" },
+};
  const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2);
  
  GlobalProperty hw_compat_8_1[] = {

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 803244e5cc..0e2bcb4840 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1458,6 +1458,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler 
*hotplug_dev,
  } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
  virtio_md_pci_pre_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), 
errp);
  } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+uint8_t aw_bits = object_property_get_uint(OBJECT(dev),
+   "aw-bits", NULL);
  /* Declare the APIC range as the reserved MSI region */
  char *resv_prop_str = g_strdup_printf("0xfee0:0xfeef:%d",
VIRTIO_IOMMU_RESV_MEM_T_MSI);
@@ -1466,6 +1468,10 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler 
*hotplug_dev,
  qlist_append_str(reserved_regions, resv_prop_str);
  qdev_prop_set_array(dev, "reserved-regions", reserved_regions);
  
+if (!aw_bits) {

+qdev_prop_set_uint8(dev, "aw-bits", 39);


May be use VTD_HOST_AW_39BIT instead of 39 ? This would make it
easier to find uses of certain defaults values and would clarify
that the default AW of virtio-iommu is set as intel-iommu.

Thanks,

C.




+}
+
  g_free(resv_prop_str);
  }
  
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c

index 7870bdbeee..c468e9b13b 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1529,7 +1529,7 @@ static Property virtio_iommu_properties[] = {
  DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus,
   TYPE_PCI_BUS, PCIBus *),
  DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
-DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 64),
+DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 0),
  DEFINE_PROP_END_OF_LIST(),
  };

Re: [PATCH v2 2/3] virtio-iommu: Trace domain range limits as unsigned int


On 2/1/24 17:32, Eric Auger wrote:

Use %u format to trace domain_range limits.

Signed-off-by: Eric Auger 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/virtio/trace-events | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 77905d1994..2350849fbd 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -111,7 +111,7 @@ virtio_iommu_device_reset(void) "reset!"
  virtio_iommu_system_reset(void) "system reset!"
  virtio_iommu_get_features(uint64_t features) "device supports 
features=0x%"PRIx64
  virtio_iommu_device_status(uint8_t status) "driver status = %d"
-virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, uint32_t domain_start, uint32_t 
domain_end, uint32_t probe_size, uint8_t bypass) "page_size_mask=0x%"PRIx64" input range 
start=0x%"PRIx64" input range end=0x%"PRIx64" domain range start=%d domain range end=%d 
probe_size=0x%x bypass=0x%x"
+virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, uint32_t domain_start, uint32_t 
domain_end, uint32_t probe_size, uint8_t bypass) "page_size_mask=0x%"PRIx64" input range 
start=0x%"PRIx64" input range end=0x%"PRIx64" domain range start=%u domain range end=%u 
probe_size=0x%x bypass=0x%x"
  virtio_iommu_set_config(uint8_t bypass) "bypass=0x%x"
  virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d 
endpoint=%d"
  virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d 
endpoint=%d"

Re: [PATCH v2 1/3] virtio-iommu: Add an option to define the input range width


On 2/1/24 17:32, Eric Auger wrote:

aw-bits is a new option that allows to set the bit width of
the input address range. This value will be used as a default for
the device config input_range.end. By default it is set to 64 bits
which is the current value.

Signed-off-by: Eric Auger 

---

v1 -> v2:
- Check the aw-bits value is within [32,64]
---
  include/hw/virtio/virtio-iommu.h | 1 +
  hw/virtio/virtio-iommu.c | 7 ++-
  2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index 781ebaea8f..5fbe4677c2 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -66,6 +66,7 @@ struct VirtIOIOMMU {
  bool boot_bypass;
  Notifier machine_done;
  bool granule_frozen;
+uint8_t aw_bits;
  };
  
  #endif

diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index ec2ba11d1d..7870bdbeee 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1314,7 +1314,11 @@ static void virtio_iommu_device_realize(DeviceState 
*dev, Error **errp)
   */
  s->config.bypass = s->boot_bypass;
  s->config.page_size_mask = qemu_real_host_page_mask();
-s->config.input_range.end = UINT64_MAX;
+if (s->aw_bits < 32 || s->aw_bits > 64) {
+error_setg(errp, "aw-bits must be within [32,64]");
+}
+s->config.input_range.end =
+s->aw_bits == 64 ? UINT64_MAX : BIT_ULL(s->aw_bits) - 1;



This could be simplified :

  s->config.input_range.end = BIT_ULL(s->aw_bits) - 1;

Anyhow,


Reviewed-by: Cédric Le Goater 

Thanks,

C.




  s->config.domain_range.end = UINT32_MAX;
  s->config.probe_size = VIOMMU_PROBE_SIZE;
  
@@ -1525,6 +1529,7 @@ static Property virtio_iommu_properties[] = {

  DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus,
   TYPE_PCI_BUS, PCIBus *),
  DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
+DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 64),
  DEFINE_PROP_END_OF_LIST(),
  };

Re: [PATCH 2/2] migration: Fix return-path thread exit

2024-02-02 Thread Cédric Le Goater


On 2/2/24 15:42, Fabiano Rosas wrote:

Cédric Le Goater  writes:


In case of error, close_return_path_on_source() can perform a shutdown
to exit the return-path thread.  However, in migrate_fd_cleanup(),
'to_dst_file' is closed before calling close_return_path_on_source()
and the shutdown fails, leaving the source and destination waiting for
an event to occur.


At close_return_path_on_source, qemu_file_shutdown() and checking
ms->to_dst_file are done under the qemu_file_lock, so how could
migrate_fd_cleanup() have cleared the pointer but the ms->to_dst_file
check have passed?


This is not a locking issue, it's much simpler. migrate_fd_cleanup()
clears the ms->to_dst_file pointer and closes the QEMUFile and then
calls close_return_path_on_source() which then tries to use resources
which are not available anymore.

Thanks,

C.








Close the file after calling close_return_path_on_source() so that the
shutdown succeeds and the return-path thread exits.

Signed-off-by: Cédric Le Goater 
---
  migration/migration.c | 12 +---
  1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 
2c3362235c7651c11d581f3c3639571f1f9636ef..1e0b6acaedc272e8ce26ad40be2c42177f5fd14e
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1314,6 +1314,7 @@ void migrate_set_state(int *state, int old_state, int 
new_state)
  static void migrate_fd_cleanup(MigrationState *s)
  {
  int file_error = 0;
+QEMUFile *tmp = NULL;
  
  g_free(s->hostname);

  s->hostname = NULL;
@@ -1323,8 +1324,6 @@ static void migrate_fd_cleanup(MigrationState *s)
  qemu_savevm_state_cleanup();
  
  if (s->to_dst_file) {

-QEMUFile *tmp;
-
  trace_migrate_fd_cleanup();
  bql_unlock();
  if (s->migration_thread_running) {
@@ -1344,15 +1343,14 @@ static void migrate_fd_cleanup(MigrationState *s)
   * critical section won't block for long.
   */
  migration_ioc_unregister_yank_from_file(tmp);
-qemu_fclose(tmp);
  }
  
-/*

- * We already cleaned up to_dst_file, so errors from the return
- * path might be due to that, ignore them.
- */
  close_return_path_on_source(s, file_error);
  
+if (tmp) {

+qemu_fclose(tmp);
+}
+
  assert(!migration_is_active(s));
  
  if (s->state == MIGRATION_STATUS_CANCELLING) {

Re: [PATCH 1/2] migration: Add a file_error argument to close_return_path_on_source()

2024-02-02 Thread Cédric Le Goater


On 2/2/24 15:30, Fabiano Rosas wrote:

Cédric Le Goater  writes:


close_return_path_on_source() retrieves the migration error from the
the QEMUFile '->to_dst_file' to know if a shutdown is required to exit
the return-path thread. However, in migrate_fd_cleanup(), '->to_dst_file'
is cleaned up before calling close_return_path_on_source() and the
shutdown is never performed, leaving the source and destination
waiting for an event to occur.


Isn't this just missing qemu_file_shutdown() at migrate_fd_cleanup?

 if (s->to_dst_file) {
 ...
 migration_ioc_unregister_yank_from_file(tmp);
+   qemu_file_shutdown(tmp);
 qemu_fclose(tmp);
 }



That would make the return-path thread exit indeed. It should not
be necessary when there are no errors though and this is done
outside of the close_return_path_on_source() helper. There could
be side effects.


I took into account Peter's comment and replaced the changes of
PATCH 1 with :

@@ -2372,8 +2372,7 @@ static bool close_return_path_on_source(
  * cause it to unblock if it's stuck waiting for the destination.
  */
 WITH_QEMU_LOCK_GUARD(>qemu_file_lock) {
-if (ms->to_dst_file && ms->rp_state.from_dst_file &&
-qemu_file_get_error(ms->to_dst_file)) {
+if (migrate_has_error(ms) && ms->rp_state.from_dst_file) {
 qemu_file_shutdown(ms->rp_state.from_dst_file);
 }
 }

Nevertheless, we need to qemu_file_shutdown() correctly the socket
for this to work and the problem seems more complex than just moving
code as I did in PATCH 2.

Thanks,

C.

Re: [PATCH 0/2] migration: Fix return-path thread exit

2024-02-02 Thread Cédric Le Goater


Hello Peter,


Today, close_return_path_on_source() can perform a shutdown to exit
the return-path thread if an error occured. However, migrate_fd_cleanup()
does cleanups too early and the shutdown in close_return_path_on_source()
fails, leaving the source and destination waiting for an event to occur.

This little series tries to fix that. Comments welcome !


One thing I do agree is that relying on qemu_file_get_error(to_dst_file) in
close_return_path_on_source() is weird: IMHO we have better way to detect
"whether the migration has error" now, which is migrate_has_error().


ok. migrate_has_error() looks safe to use in that case. It works fine
with all the prereq VFIO cleanups (that I didn't send yet) and errors
in the setup of dirty tracking are reported correctly to the migration
core subsystem.


For this specific issue, I think one long standing issue that might be
relevant is we have two QEMUFile (from_dst_file, to_dst_file) that share
the same QIOChannel now.  Logically the two QEMUFile should be able to be
managed separately, say, close() of to_dst_file shouldn't affect the other.

However I don't think it's the case now, as qemu_fclose(to_dst_file) will
do qio_channel_close() already, which means there will be a side effect to
the other QEMUFile that its backing IOC is already closed.

Is this the issue we're facing?  


Yes. The socket is closed before calling close_return_path_on_source()
and ms->rp_state.from_dst_file becomes invalid, the shutdown silently
fails (we should maybe report error in qemu_file_shutdown()) and the
return-path thread does not exits.


IOW, the close() of to_dst_file will not
properly kick the other thread who is blocked at reading from_dst_file,
while the shutdown() will kick it out?


Yes, that's how I understand the comment :

/*
 * If this is a normal exit then the destination will send a SHUT
 * and the rp_thread will exit, however if there's an error we
 * need to cause it to exit. shutdown(2), if we have it, will
 * cause it to unblock if it's stuck waiting for the destination.
 */


If so, not sure whether we can somehow relay the real qio_channel_close()
to until the last user releases it? IOW, conditionally close() the channel> in 
qio_channel_finalize(), if the channel is still open?  Would that make
sense?

It's the first time that I look at this code :/ I can't tell. Here is
the closing section :

qemu_mutex_unlock(>qemu_file_lock);
/*
 * Close the file handle without the lock to make sure the
 * critical section won't block for long.
 */
migration_ioc_unregister_yank_from_file(tmp);
qemu_fclose(tmp);
}


Thanks,

C.

[PATCH 2/2] migration: Fix return-path thread exit

In case of error, close_return_path_on_source() can perform a shutdown
to exit the return-path thread.  However, in migrate_fd_cleanup(),
'to_dst_file' is closed before calling close_return_path_on_source()
and the shutdown fails, leaving the source and destination waiting for
an event to occur.

Close the file after calling close_return_path_on_source() so that the
shutdown succeeds and the return-path thread exits.

Signed-off-by: Cédric Le Goater 
---
 migration/migration.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 
2c3362235c7651c11d581f3c3639571f1f9636ef..1e0b6acaedc272e8ce26ad40be2c42177f5fd14e
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1314,6 +1314,7 @@ void migrate_set_state(int *state, int old_state, int 
new_state)
 static void migrate_fd_cleanup(MigrationState *s)
 {
 int file_error = 0;
+QEMUFile *tmp = NULL;
 
 g_free(s->hostname);
 s->hostname = NULL;
@@ -1323,8 +1324,6 @@ static void migrate_fd_cleanup(MigrationState *s)
 qemu_savevm_state_cleanup();
 
 if (s->to_dst_file) {
-QEMUFile *tmp;
-
 trace_migrate_fd_cleanup();
 bql_unlock();
 if (s->migration_thread_running) {
@@ -1344,15 +1343,14 @@ static void migrate_fd_cleanup(MigrationState *s)
  * critical section won't block for long.
  */
 migration_ioc_unregister_yank_from_file(tmp);
-qemu_fclose(tmp);
 }
 
-/*
- * We already cleaned up to_dst_file, so errors from the return
- * path might be due to that, ignore them.
- */
 close_return_path_on_source(s, file_error);
 
+if (tmp) {
+qemu_fclose(tmp);
+}
+
 assert(!migration_is_active(s));
 
 if (s->state == MIGRATION_STATUS_CANCELLING) {
-- 
2.43.0

[PATCH 1/2] migration: Add a file_error argument to close_return_path_on_source()

close_return_path_on_source() retrieves the migration error from the
the QEMUFile '->to_dst_file' to know if a shutdown is required to exit
the return-path thread. However, in migrate_fd_cleanup(), '->to_dst_file'
is cleaned up before calling close_return_path_on_source() and the
shutdown is never performed, leaving the source and destination
waiting for an event to occur.

Cache the file error in a temporary variable and pass it to
close_return_path_on_source() to avoid relying on '->to_dst_file'.

Signed-off-by: Cédric Le Goater 
---
 migration/migration.c | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 
d5f705ceef4c925589aa49335969672c0d761fa2..2c3362235c7651c11d581f3c3639571f1f9636ef
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -100,7 +100,7 @@ static int migration_maybe_pause(MigrationState *s,
  int *current_active_state,
  int new_state);
 static void migrate_fd_cancel(MigrationState *s);
-static bool close_return_path_on_source(MigrationState *s);
+static bool close_return_path_on_source(MigrationState *s, int eror);
 
 static void migration_downtime_start(MigrationState *s)
 {
@@ -1313,6 +1313,8 @@ void migrate_set_state(int *state, int old_state, int 
new_state)
 
 static void migrate_fd_cleanup(MigrationState *s)
 {
+int file_error = 0;
+
 g_free(s->hostname);
 s->hostname = NULL;
 json_writer_free(s->vmdesc);
@@ -1333,6 +1335,7 @@ static void migrate_fd_cleanup(MigrationState *s)
 
 multifd_save_cleanup();
 qemu_mutex_lock(>qemu_file_lock);
+file_error = qemu_file_get_error(s->to_dst_file);
 tmp = s->to_dst_file;
 s->to_dst_file = NULL;
 qemu_mutex_unlock(>qemu_file_lock);
@@ -1348,7 +1351,7 @@ static void migrate_fd_cleanup(MigrationState *s)
  * We already cleaned up to_dst_file, so errors from the return
  * path might be due to that, ignore them.
  */
-close_return_path_on_source(s);
+close_return_path_on_source(s, file_error);
 
 assert(!migration_is_active(s));
 
@@ -2357,7 +2360,7 @@ static int open_return_path_on_source(MigrationState *ms)
 }
 
 /* Return true if error detected, or false otherwise */
-static bool close_return_path_on_source(MigrationState *ms)
+static bool close_return_path_on_source(MigrationState *ms, int file_error)
 {
 if (!ms->rp_state.rp_thread_created) {
 return false;
@@ -2372,8 +2375,7 @@ static bool close_return_path_on_source(MigrationState 
*ms)
  * cause it to unblock if it's stuck waiting for the destination.
  */
 WITH_QEMU_LOCK_GUARD(>qemu_file_lock) {
-if (ms->to_dst_file && ms->rp_state.from_dst_file &&
-qemu_file_get_error(ms->to_dst_file)) {
+if (file_error && ms->rp_state.from_dst_file) {
 qemu_file_shutdown(ms->rp_state.from_dst_file);
 }
 }
@@ -2707,6 +2709,7 @@ static void migration_completion(MigrationState *s)
 {
 int ret = 0;
 int current_active_state = s->state;
+int file_error = qemu_file_get_error(s->to_dst_file);
 
 if (s->state == MIGRATION_STATUS_ACTIVE) {
 ret = migration_completion_precopy(s, _active_state);
@@ -2720,11 +2723,11 @@ static void migration_completion(MigrationState *s)
 goto fail;
 }
 
-if (close_return_path_on_source(s)) {
+if (close_return_path_on_source(s, file_error)) {
 goto fail;
 }
 
-if (qemu_file_get_error(s->to_dst_file)) {
+if (file_error) {
 trace_migration_completion_file_err();
 goto fail;
 }
@@ -2861,6 +2864,7 @@ static MigThrError postcopy_pause(MigrationState *s)
 
 while (true) {
 QEMUFile *file;
+int file_error;
 
 /*
  * Current channel is possibly broken. Release it.  Note that this is
@@ -2874,6 +2878,7 @@ static MigThrError postcopy_pause(MigrationState *s)
 assert(s->to_dst_file);
 migration_ioc_unregister_yank_from_file(s->to_dst_file);
 qemu_mutex_lock(>qemu_file_lock);
+file_error = qemu_file_get_error(s->to_dst_file);
 file = s->to_dst_file;
 s->to_dst_file = NULL;
 qemu_mutex_unlock(>qemu_file_lock);
@@ -2886,7 +2891,7 @@ static MigThrError postcopy_pause(MigrationState *s)
  * path and just wait for the thread to finish. It will be
  * re-created when we resume.
  */
-close_return_path_on_source(s);
+close_return_path_on_source(s, file_error);
 
 migrate_set_state(>state, s->state,
   MIGRATION_STATUS_POSTCOPY_PAUSED);
-- 
2.43.0

[PATCH 0/2] migration: Fix return-path thread exit

Hello,

Today, close_return_path_on_source() can perform a shutdown to exit
the return-path thread if an error occured. However, migrate_fd_cleanup()
does cleanups too early and the shutdown in close_return_path_on_source()
fails, leaving the source and destination waiting for an event to occur.

This little series tries to fix that. Comments welcome !  

Thanks,

C. 

Cédric Le Goater (2):
  migration: Add a file_error argument to close_return_path_on_source()
  migration: Fix return-path thread exit

 migration/migration.c | 33 ++---
 1 file changed, 18 insertions(+), 15 deletions(-)

-- 
2.43.0

Re: [PATCH v4 22/47] hw/arm/aspeed: use qemu_configure_nic_device()


On 1/26/24 18:24, David Woodhouse wrote:

From: David Woodhouse 

Signed-off-by: David Woodhouse 
Acked-by: Cédric Le Goater 


and

Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/arm/aspeed.c | 9 -
  1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index cc59176563..bed5e4f40b 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -356,7 +356,6 @@ static void aspeed_machine_init(MachineState *machine)
  AspeedMachineClass *amc = ASPEED_MACHINE_GET_CLASS(machine);
  AspeedSoCClass *sc;
  int i;
-NICInfo *nd = _table[0];
  
  bmc->soc = ASPEED_SOC(object_new(amc->soc_name));

  object_property_add_child(OBJECT(machine), "soc", OBJECT(bmc->soc));
@@ -371,10 +370,10 @@ static void aspeed_machine_init(MachineState *machine)
   _fatal);
  
  for (i = 0; i < sc->macs_num; i++) {

-if ((amc->macs_mask & (1 << i)) && nd->used) {
-qemu_check_nic_model(nd, TYPE_FTGMAC100);
-qdev_set_nic_properties(DEVICE(>soc->ftgmac100[i]), nd);
-nd++;
+if ((amc->macs_mask & (1 << i)) &&
+!qemu_configure_nic_device(DEVICE(>soc->ftgmac100[i]),
+   true, NULL)) {
+break; /* No configs left; stop asking */
  }
  }

[PULL 06/17] hw/arm/aspeed: Check for CPU types in machine_run_board_init()